flwr 1.20.0__py3-none-any.whl → 1.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/__init__.py +4 -1
- flwr/app/__init__.py +28 -0
- flwr/app/exception.py +31 -0
- flwr/cli/app.py +2 -0
- flwr/cli/auth_plugin/oidc_cli_plugin.py +4 -4
- flwr/cli/cli_user_auth_interceptor.py +1 -1
- flwr/cli/config_utils.py +3 -3
- flwr/cli/constant.py +25 -8
- flwr/cli/log.py +9 -9
- flwr/cli/login/login.py +3 -3
- flwr/cli/ls.py +5 -5
- flwr/cli/new/new.py +15 -2
- flwr/cli/new/templates/app/README.flowertune.md.tpl +1 -1
- flwr/cli/new/templates/app/code/__init__.pytorch_legacy_api.py.tpl +1 -0
- flwr/cli/new/templates/app/code/client.baseline.py.tpl +64 -47
- flwr/cli/new/templates/app/code/client.huggingface.py.tpl +68 -30
- flwr/cli/new/templates/app/code/client.jax.py.tpl +63 -42
- flwr/cli/new/templates/app/code/client.mlx.py.tpl +80 -51
- flwr/cli/new/templates/app/code/client.numpy.py.tpl +36 -13
- flwr/cli/new/templates/app/code/client.pytorch.py.tpl +71 -46
- flwr/cli/new/templates/app/code/client.pytorch_legacy_api.py.tpl +55 -0
- flwr/cli/new/templates/app/code/client.sklearn.py.tpl +75 -30
- flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +69 -44
- flwr/cli/new/templates/app/code/client.xgboost.py.tpl +110 -0
- flwr/cli/new/templates/app/code/flwr_tune/client_app.py.tpl +56 -90
- flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +1 -23
- flwr/cli/new/templates/app/code/flwr_tune/server_app.py.tpl +37 -58
- flwr/cli/new/templates/app/code/flwr_tune/strategy.py.tpl +39 -44
- flwr/cli/new/templates/app/code/model.baseline.py.tpl +0 -14
- flwr/cli/new/templates/app/code/server.baseline.py.tpl +27 -29
- flwr/cli/new/templates/app/code/server.huggingface.py.tpl +23 -19
- flwr/cli/new/templates/app/code/server.jax.py.tpl +27 -14
- flwr/cli/new/templates/app/code/server.mlx.py.tpl +29 -19
- flwr/cli/new/templates/app/code/server.numpy.py.tpl +30 -17
- flwr/cli/new/templates/app/code/server.pytorch.py.tpl +36 -26
- flwr/cli/new/templates/app/code/server.pytorch_legacy_api.py.tpl +31 -0
- flwr/cli/new/templates/app/code/server.sklearn.py.tpl +29 -21
- flwr/cli/new/templates/app/code/server.tensorflow.py.tpl +28 -19
- flwr/cli/new/templates/app/code/server.xgboost.py.tpl +56 -0
- flwr/cli/new/templates/app/code/task.huggingface.py.tpl +16 -20
- flwr/cli/new/templates/app/code/task.jax.py.tpl +1 -1
- flwr/cli/new/templates/app/code/task.numpy.py.tpl +1 -1
- flwr/cli/new/templates/app/code/task.pytorch.py.tpl +14 -27
- flwr/cli/new/templates/app/code/task.pytorch_legacy_api.py.tpl +111 -0
- flwr/cli/new/templates/app/code/task.tensorflow.py.tpl +1 -2
- flwr/cli/new/templates/app/code/task.xgboost.py.tpl +67 -0
- flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +4 -4
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +2 -2
- flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +4 -4
- flwr/cli/new/templates/app/pyproject.jax.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +2 -2
- flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +3 -3
- flwr/cli/new/templates/app/pyproject.pytorch_legacy_api.toml.tpl +53 -0
- flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.xgboost.toml.tpl +61 -0
- flwr/cli/pull.py +100 -0
- flwr/cli/run/run.py +9 -13
- flwr/cli/stop.py +7 -4
- flwr/cli/utils.py +36 -8
- flwr/client/grpc_rere_client/connection.py +1 -12
- flwr/client/rest_client/connection.py +3 -0
- flwr/clientapp/__init__.py +10 -0
- flwr/clientapp/mod/__init__.py +29 -0
- flwr/clientapp/mod/centraldp_mods.py +248 -0
- flwr/clientapp/mod/localdp_mod.py +169 -0
- flwr/clientapp/typing.py +22 -0
- flwr/common/args.py +20 -6
- flwr/common/auth_plugin/__init__.py +4 -4
- flwr/common/auth_plugin/auth_plugin.py +7 -7
- flwr/common/constant.py +26 -4
- flwr/common/event_log_plugin/event_log_plugin.py +1 -1
- flwr/common/exit/__init__.py +4 -0
- flwr/common/exit/exit.py +8 -1
- flwr/common/exit/exit_code.py +30 -7
- flwr/common/exit/exit_handler.py +62 -0
- flwr/common/{exit_handlers.py → exit/signal_handler.py} +20 -37
- flwr/common/grpc.py +0 -11
- flwr/common/inflatable_utils.py +1 -1
- flwr/common/logger.py +1 -1
- flwr/common/record/typeddict.py +12 -0
- flwr/common/retry_invoker.py +30 -11
- flwr/common/telemetry.py +4 -0
- flwr/compat/server/app.py +2 -2
- flwr/proto/appio_pb2.py +25 -17
- flwr/proto/appio_pb2.pyi +46 -2
- flwr/proto/clientappio_pb2.py +3 -11
- flwr/proto/clientappio_pb2.pyi +0 -47
- flwr/proto/clientappio_pb2_grpc.py +19 -20
- flwr/proto/clientappio_pb2_grpc.pyi +10 -11
- flwr/proto/control_pb2.py +66 -0
- flwr/proto/{exec_pb2.pyi → control_pb2.pyi} +24 -0
- flwr/proto/{exec_pb2_grpc.py → control_pb2_grpc.py} +88 -54
- flwr/proto/control_pb2_grpc.pyi +106 -0
- flwr/proto/serverappio_pb2.py +2 -2
- flwr/proto/serverappio_pb2_grpc.py +68 -0
- flwr/proto/serverappio_pb2_grpc.pyi +26 -0
- flwr/proto/simulationio_pb2.py +4 -11
- flwr/proto/simulationio_pb2.pyi +0 -58
- flwr/proto/simulationio_pb2_grpc.py +129 -27
- flwr/proto/simulationio_pb2_grpc.pyi +52 -13
- flwr/server/app.py +142 -152
- flwr/server/grid/grpc_grid.py +3 -0
- flwr/server/grid/inmemory_grid.py +1 -0
- flwr/server/serverapp/app.py +157 -146
- flwr/server/superlink/fleet/vce/backend/raybackend.py +3 -1
- flwr/server/superlink/fleet/vce/vce_api.py +6 -6
- flwr/server/superlink/linkstate/in_memory_linkstate.py +34 -0
- flwr/server/superlink/linkstate/linkstate.py +2 -1
- flwr/server/superlink/linkstate/sqlite_linkstate.py +45 -0
- flwr/server/superlink/serverappio/serverappio_grpc.py +1 -1
- flwr/server/superlink/serverappio/serverappio_servicer.py +61 -6
- flwr/server/superlink/simulation/simulationio_servicer.py +97 -21
- flwr/serverapp/__init__.py +12 -0
- flwr/serverapp/exception.py +38 -0
- flwr/serverapp/strategy/__init__.py +64 -0
- flwr/serverapp/strategy/bulyan.py +238 -0
- flwr/serverapp/strategy/dp_adaptive_clipping.py +335 -0
- flwr/serverapp/strategy/dp_fixed_clipping.py +374 -0
- flwr/serverapp/strategy/fedadagrad.py +159 -0
- flwr/serverapp/strategy/fedadam.py +178 -0
- flwr/serverapp/strategy/fedavg.py +320 -0
- flwr/serverapp/strategy/fedavgm.py +198 -0
- flwr/serverapp/strategy/fedmedian.py +105 -0
- flwr/serverapp/strategy/fedopt.py +218 -0
- flwr/serverapp/strategy/fedprox.py +174 -0
- flwr/serverapp/strategy/fedtrimmedavg.py +176 -0
- flwr/serverapp/strategy/fedxgb_bagging.py +117 -0
- flwr/serverapp/strategy/fedxgb_cyclic.py +220 -0
- flwr/serverapp/strategy/fedyogi.py +170 -0
- flwr/serverapp/strategy/krum.py +112 -0
- flwr/serverapp/strategy/multikrum.py +247 -0
- flwr/serverapp/strategy/qfedavg.py +252 -0
- flwr/serverapp/strategy/result.py +105 -0
- flwr/serverapp/strategy/strategy.py +285 -0
- flwr/serverapp/strategy/strategy_utils.py +299 -0
- flwr/simulation/app.py +161 -164
- flwr/simulation/run_simulation.py +25 -30
- flwr/supercore/app_utils.py +58 -0
- flwr/{supernode/scheduler → supercore/cli}/__init__.py +3 -3
- flwr/supercore/cli/flower_superexec.py +166 -0
- flwr/supercore/constant.py +19 -0
- flwr/supercore/{scheduler → corestate}/__init__.py +3 -3
- flwr/supercore/corestate/corestate.py +81 -0
- flwr/supercore/grpc_health/__init__.py +3 -0
- flwr/supercore/grpc_health/health_server.py +53 -0
- flwr/supercore/grpc_health/simple_health_servicer.py +2 -2
- flwr/{superexec → supercore/superexec}/__init__.py +1 -1
- flwr/supercore/superexec/plugin/__init__.py +28 -0
- flwr/{supernode/scheduler/simple_clientapp_scheduler_plugin.py → supercore/superexec/plugin/base_exec_plugin.py} +10 -6
- flwr/supercore/superexec/plugin/clientapp_exec_plugin.py +28 -0
- flwr/supercore/{scheduler/plugin.py → superexec/plugin/exec_plugin.py} +15 -5
- flwr/supercore/superexec/plugin/serverapp_exec_plugin.py +28 -0
- flwr/supercore/superexec/plugin/simulation_exec_plugin.py +28 -0
- flwr/supercore/superexec/run_superexec.py +199 -0
- flwr/superlink/artifact_provider/__init__.py +22 -0
- flwr/superlink/artifact_provider/artifact_provider.py +37 -0
- flwr/superlink/servicer/__init__.py +15 -0
- flwr/superlink/servicer/control/__init__.py +22 -0
- flwr/{superexec/exec_event_log_interceptor.py → superlink/servicer/control/control_event_log_interceptor.py} +7 -7
- flwr/{superexec/exec_grpc.py → superlink/servicer/control/control_grpc.py} +27 -29
- flwr/{superexec/exec_license_interceptor.py → superlink/servicer/control/control_license_interceptor.py} +6 -6
- flwr/{superexec/exec_servicer.py → superlink/servicer/control/control_servicer.py} +127 -31
- flwr/{superexec/exec_user_auth_interceptor.py → superlink/servicer/control/control_user_auth_interceptor.py} +10 -10
- flwr/supernode/cli/flower_supernode.py +3 -0
- flwr/supernode/cli/flwr_clientapp.py +18 -21
- flwr/supernode/nodestate/in_memory_nodestate.py +2 -2
- flwr/supernode/nodestate/nodestate.py +3 -59
- flwr/supernode/runtime/run_clientapp.py +39 -102
- flwr/supernode/servicer/clientappio/clientappio_servicer.py +10 -17
- flwr/supernode/start_client_internal.py +35 -76
- {flwr-1.20.0.dist-info → flwr-1.22.0.dist-info}/METADATA +9 -18
- {flwr-1.20.0.dist-info → flwr-1.22.0.dist-info}/RECORD +176 -128
- {flwr-1.20.0.dist-info → flwr-1.22.0.dist-info}/entry_points.txt +1 -0
- flwr/proto/exec_pb2.py +0 -62
- flwr/proto/exec_pb2_grpc.pyi +0 -93
- flwr/superexec/app.py +0 -45
- flwr/superexec/deployment.py +0 -191
- flwr/superexec/executor.py +0 -100
- flwr/superexec/simulation.py +0 -129
- {flwr-1.20.0.dist-info → flwr-1.22.0.dist-info}/WHEEL +0 -0
|
@@ -16,9 +16,6 @@
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
import gc
|
|
19
|
-
import os
|
|
20
|
-
import threading
|
|
21
|
-
import time
|
|
22
19
|
from logging import DEBUG, ERROR, INFO
|
|
23
20
|
from typing import Optional
|
|
24
21
|
|
|
@@ -63,23 +60,15 @@ from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
|
|
|
63
60
|
PushAppOutputsRequest,
|
|
64
61
|
PushAppOutputsResponse,
|
|
65
62
|
)
|
|
66
|
-
|
|
67
|
-
# pylint: disable=E0611
|
|
68
|
-
from flwr.proto.clientappio_pb2 import (
|
|
69
|
-
GetRunIdsWithPendingMessagesRequest,
|
|
70
|
-
GetRunIdsWithPendingMessagesResponse,
|
|
71
|
-
RequestTokenRequest,
|
|
72
|
-
RequestTokenResponse,
|
|
73
|
-
)
|
|
74
63
|
from flwr.proto.clientappio_pb2_grpc import ClientAppIoStub
|
|
75
64
|
from flwr.proto.node_pb2 import Node # pylint: disable=E0611
|
|
65
|
+
from flwr.supercore.app_utils import start_parent_process_monitor
|
|
76
66
|
from flwr.supercore.utils import mask_string
|
|
77
67
|
|
|
78
68
|
|
|
79
69
|
def run_clientapp( # pylint: disable=R0913, R0914, R0917
|
|
80
70
|
clientappio_api_address: str,
|
|
81
|
-
|
|
82
|
-
token: Optional[str] = None,
|
|
71
|
+
token: str,
|
|
83
72
|
flwr_dir: Optional[str] = None,
|
|
84
73
|
certificates: Optional[bytes] = None,
|
|
85
74
|
parent_pid: Optional[int] = None,
|
|
@@ -102,113 +91,61 @@ def run_clientapp( # pylint: disable=R0913, R0914, R0917
|
|
|
102
91
|
stub = ClientAppIoStub(channel)
|
|
103
92
|
_wrap_stub(stub, _make_simple_grpc_retry_invoker())
|
|
104
93
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
if token is None:
|
|
108
|
-
token = get_token(stub)
|
|
94
|
+
# Pull Message, Context, Run and (optional) FAB from SuperNode
|
|
95
|
+
message, context, run, fab = pull_clientappinputs(stub=stub, token=token)
|
|
109
96
|
|
|
110
|
-
|
|
111
|
-
|
|
97
|
+
# Install FAB, if provided
|
|
98
|
+
if fab:
|
|
99
|
+
log(DEBUG, "[flwr-clientapp] Start FAB installation.")
|
|
100
|
+
install_from_fab(fab.content, flwr_dir=flwr_dir_, skip_prompt=True)
|
|
112
101
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
102
|
+
load_client_app_fn = get_load_client_app_fn(
|
|
103
|
+
default_app_ref="",
|
|
104
|
+
app_path=None,
|
|
105
|
+
multi_app=True,
|
|
106
|
+
flwr_dir=str(flwr_dir_),
|
|
107
|
+
)
|
|
117
108
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
109
|
+
try:
|
|
110
|
+
# Load ClientApp
|
|
111
|
+
log(DEBUG, "[flwr-clientapp] Start `ClientApp` Loading.")
|
|
112
|
+
client_app: ClientApp = load_client_app_fn(
|
|
113
|
+
run.fab_id, run.fab_version, fab.hash_str if fab else ""
|
|
123
114
|
)
|
|
124
115
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
log(DEBUG, "[flwr-clientapp] Start `ClientApp` Loading.")
|
|
128
|
-
client_app: ClientApp = load_client_app_fn(
|
|
129
|
-
run.fab_id, run.fab_version, fab.hash_str if fab else ""
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
# Execute ClientApp
|
|
133
|
-
reply_message = client_app(message=message, context=context)
|
|
116
|
+
# Execute ClientApp
|
|
117
|
+
reply_message = client_app(message=message, context=context)
|
|
134
118
|
|
|
135
|
-
|
|
136
|
-
|
|
119
|
+
except Exception as ex: # pylint: disable=broad-exception-caught
|
|
120
|
+
# Don't update/change NodeState
|
|
137
121
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
)
|
|
146
|
-
e_code = ErrorCode.LOAD_CLIENT_APP_EXCEPTION
|
|
122
|
+
e_code = ErrorCode.CLIENT_APP_RAISED_EXCEPTION
|
|
123
|
+
# Ex fmt: "<class 'ZeroDivisionError'>:<'division by zero'>"
|
|
124
|
+
reason = str(type(ex)) + ":<'" + str(ex) + "'>"
|
|
125
|
+
exc_entity = "ClientApp"
|
|
126
|
+
if isinstance(ex, LoadClientAppError):
|
|
127
|
+
reason = "An exception was raised when attempting to load `ClientApp`"
|
|
128
|
+
e_code = ErrorCode.LOAD_CLIENT_APP_EXCEPTION
|
|
147
129
|
|
|
148
|
-
|
|
130
|
+
log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
|
|
149
131
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
Error(code=e_code, reason=reason), reply_to=message
|
|
153
|
-
)
|
|
132
|
+
# Create error message
|
|
133
|
+
reply_message = Message(Error(code=e_code, reason=reason), reply_to=message)
|
|
154
134
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
del client_app, message, context, run, fab, reply_message
|
|
161
|
-
gc.collect()
|
|
162
|
-
|
|
163
|
-
# Reset token to `None` to prevent flwr-clientapp from trying to pull the
|
|
164
|
-
# same inputs again
|
|
165
|
-
token = None
|
|
135
|
+
# Push Message and Context to SuperNode
|
|
136
|
+
_ = push_clientappoutputs(
|
|
137
|
+
stub=stub, token=token, message=reply_message, context=context
|
|
138
|
+
)
|
|
166
139
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
if run_once:
|
|
170
|
-
break
|
|
140
|
+
del client_app, message, context, run, fab, reply_message
|
|
141
|
+
gc.collect()
|
|
171
142
|
|
|
172
|
-
except KeyboardInterrupt:
|
|
173
|
-
log(INFO, "Closing connection")
|
|
174
143
|
except grpc.RpcError as e:
|
|
175
144
|
log(ERROR, "GRPC error occurred: %s", str(e))
|
|
176
145
|
finally:
|
|
177
146
|
channel.close()
|
|
178
147
|
|
|
179
148
|
|
|
180
|
-
def start_parent_process_monitor(
|
|
181
|
-
parent_pid: int,
|
|
182
|
-
) -> None:
|
|
183
|
-
"""Monitor the parent process and exit if it terminates."""
|
|
184
|
-
|
|
185
|
-
def monitor() -> None:
|
|
186
|
-
while True:
|
|
187
|
-
time.sleep(0.2)
|
|
188
|
-
if os.getppid() != parent_pid:
|
|
189
|
-
os.kill(os.getpid(), 9)
|
|
190
|
-
|
|
191
|
-
threading.Thread(target=monitor, daemon=True).start()
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
def get_token(stub: ClientAppIoStub) -> str:
|
|
195
|
-
"""Get a token from SuperNode."""
|
|
196
|
-
log(DEBUG, "[flwr-clientapp] Request token")
|
|
197
|
-
while True:
|
|
198
|
-
res: GetRunIdsWithPendingMessagesResponse = stub.GetRunIdsWithPendingMessages(
|
|
199
|
-
GetRunIdsWithPendingMessagesRequest()
|
|
200
|
-
)
|
|
201
|
-
|
|
202
|
-
for run_id in res.run_ids:
|
|
203
|
-
tk_res: RequestTokenResponse = stub.RequestToken(
|
|
204
|
-
RequestTokenRequest(run_id=run_id)
|
|
205
|
-
)
|
|
206
|
-
if tk_res.token:
|
|
207
|
-
return tk_res.token
|
|
208
|
-
|
|
209
|
-
time.sleep(1) # Wait before retrying to get run IDs
|
|
210
|
-
|
|
211
|
-
|
|
212
149
|
def pull_clientappinputs(
|
|
213
150
|
stub: ClientAppIoStub, token: str
|
|
214
151
|
) -> tuple[Message, Context, Run, Optional[Fab]]:
|
|
@@ -36,6 +36,8 @@ from flwr.common.typing import Fab, Run
|
|
|
36
36
|
# pylint: disable=E0611
|
|
37
37
|
from flwr.proto import clientappio_pb2_grpc
|
|
38
38
|
from flwr.proto.appio_pb2 import ( # pylint: disable=E0401
|
|
39
|
+
ListAppsToLaunchRequest,
|
|
40
|
+
ListAppsToLaunchResponse,
|
|
39
41
|
PullAppInputsRequest,
|
|
40
42
|
PullAppInputsResponse,
|
|
41
43
|
PullAppMessagesRequest,
|
|
@@ -44,10 +46,6 @@ from flwr.proto.appio_pb2 import ( # pylint: disable=E0401
|
|
|
44
46
|
PushAppMessagesResponse,
|
|
45
47
|
PushAppOutputsRequest,
|
|
46
48
|
PushAppOutputsResponse,
|
|
47
|
-
)
|
|
48
|
-
from flwr.proto.clientappio_pb2 import ( # pylint: disable=E0401
|
|
49
|
-
GetRunIdsWithPendingMessagesRequest,
|
|
50
|
-
GetRunIdsWithPendingMessagesResponse,
|
|
51
49
|
RequestTokenRequest,
|
|
52
50
|
RequestTokenResponse,
|
|
53
51
|
)
|
|
@@ -82,13 +80,13 @@ class ClientAppIoServicer(clientappio_pb2_grpc.ClientAppIoServicer):
|
|
|
82
80
|
self.ffs_factory = ffs_factory
|
|
83
81
|
self.objectstore_factory = objectstore_factory
|
|
84
82
|
|
|
85
|
-
def
|
|
83
|
+
def ListAppsToLaunch(
|
|
86
84
|
self,
|
|
87
|
-
request:
|
|
85
|
+
request: ListAppsToLaunchRequest,
|
|
88
86
|
context: grpc.ServicerContext,
|
|
89
|
-
) ->
|
|
90
|
-
"""Get run IDs with
|
|
91
|
-
log(DEBUG, "ClientAppIo.
|
|
87
|
+
) -> ListAppsToLaunchResponse:
|
|
88
|
+
"""Get run IDs with apps to launch."""
|
|
89
|
+
log(DEBUG, "ClientAppIo.ListAppsToLaunch")
|
|
92
90
|
|
|
93
91
|
# Initialize state connection
|
|
94
92
|
state = self.state_factory.state()
|
|
@@ -97,7 +95,7 @@ class ClientAppIoServicer(clientappio_pb2_grpc.ClientAppIoServicer):
|
|
|
97
95
|
run_ids = state.get_run_ids_with_pending_messages()
|
|
98
96
|
|
|
99
97
|
# Return run IDs
|
|
100
|
-
return
|
|
98
|
+
return ListAppsToLaunchResponse(run_ids=run_ids)
|
|
101
99
|
|
|
102
100
|
def RequestToken(
|
|
103
101
|
self, request: RequestTokenRequest, context: grpc.ServicerContext
|
|
@@ -109,15 +107,10 @@ class ClientAppIoServicer(clientappio_pb2_grpc.ClientAppIoServicer):
|
|
|
109
107
|
state = self.state_factory.state()
|
|
110
108
|
|
|
111
109
|
# Attempt to create a token for the provided run ID
|
|
112
|
-
|
|
113
|
-
token = state.create_token(request.run_id)
|
|
114
|
-
except ValueError:
|
|
115
|
-
# Return an empty token if A token already exists for this run ID,
|
|
116
|
-
# indicating the run is in progress
|
|
117
|
-
return RequestTokenResponse(token="")
|
|
110
|
+
token = state.create_token(request.run_id)
|
|
118
111
|
|
|
119
112
|
# Return the token
|
|
120
|
-
return RequestTokenResponse(token=token)
|
|
113
|
+
return RequestTokenResponse(token=token or "")
|
|
121
114
|
|
|
122
115
|
def GetRun(
|
|
123
116
|
self, request: GetRunRequest, context: grpc.ServicerContext
|
|
@@ -21,7 +21,7 @@ import time
|
|
|
21
21
|
from collections.abc import Iterator
|
|
22
22
|
from contextlib import contextmanager
|
|
23
23
|
from functools import partial
|
|
24
|
-
from logging import INFO
|
|
24
|
+
from logging import INFO
|
|
25
25
|
from pathlib import Path
|
|
26
26
|
from typing import Callable, Optional, Union, cast
|
|
27
27
|
|
|
@@ -35,18 +35,15 @@ from flwr.common import GRPC_MAX_MESSAGE_LENGTH, Context, Message, RecordDict
|
|
|
35
35
|
from flwr.common.address import parse_address
|
|
36
36
|
from flwr.common.config import get_flwr_dir, get_fused_config_from_fab
|
|
37
37
|
from flwr.common.constant import (
|
|
38
|
-
CLIENT_OCTET,
|
|
39
38
|
CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS,
|
|
40
39
|
ISOLATION_MODE_SUBPROCESS,
|
|
41
|
-
MAX_RETRY_DELAY,
|
|
42
|
-
SERVER_OCTET,
|
|
43
40
|
TRANSPORT_TYPE_GRPC_ADAPTER,
|
|
44
41
|
TRANSPORT_TYPE_GRPC_RERE,
|
|
45
42
|
TRANSPORT_TYPE_REST,
|
|
46
43
|
TRANSPORT_TYPES,
|
|
44
|
+
ExecPluginType,
|
|
47
45
|
)
|
|
48
|
-
from flwr.common.exit import ExitCode, flwr_exit
|
|
49
|
-
from flwr.common.exit_handlers import register_exit_handlers
|
|
46
|
+
from flwr.common.exit import ExitCode, flwr_exit, register_signal_handlers
|
|
50
47
|
from flwr.common.grpc import generic_create_grpc_server
|
|
51
48
|
from flwr.common.inflatable import iterate_object_tree
|
|
52
49
|
from flwr.common.inflatable_utils import (
|
|
@@ -54,12 +51,13 @@ from flwr.common.inflatable_utils import (
|
|
|
54
51
|
push_object_contents_from_iterable,
|
|
55
52
|
)
|
|
56
53
|
from flwr.common.logger import log
|
|
57
|
-
from flwr.common.retry_invoker import RetryInvoker,
|
|
54
|
+
from flwr.common.retry_invoker import RetryInvoker, _make_simple_grpc_retry_invoker
|
|
58
55
|
from flwr.common.telemetry import EventType
|
|
59
56
|
from flwr.common.typing import Fab, Run, RunNotRunningException, UserConfig
|
|
60
57
|
from flwr.proto.clientappio_pb2_grpc import add_ClientAppIoServicer_to_server
|
|
61
58
|
from flwr.proto.message_pb2 import ObjectTree # pylint: disable=E0611
|
|
62
59
|
from flwr.supercore.ffs import Ffs, FfsFactory
|
|
60
|
+
from flwr.supercore.grpc_health import run_health_server_grpc_no_tls
|
|
63
61
|
from flwr.supercore.object_store import ObjectStore, ObjectStoreFactory
|
|
64
62
|
from flwr.supernode.nodestate import NodeState, NodeStateFactory
|
|
65
63
|
from flwr.supernode.servicer.clientappio import ClientAppIoServicer
|
|
@@ -87,6 +85,7 @@ def start_client_internal(
|
|
|
87
85
|
flwr_path: Optional[Path] = None,
|
|
88
86
|
isolation: str = ISOLATION_MODE_SUBPROCESS,
|
|
89
87
|
clientappio_api_address: str = CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS,
|
|
88
|
+
health_server_address: Optional[str] = None,
|
|
90
89
|
) -> None:
|
|
91
90
|
"""Start a Flower client node which connects to a Flower server.
|
|
92
91
|
|
|
@@ -135,6 +134,9 @@ def start_client_internal(
|
|
|
135
134
|
clientappio_api_address : str
|
|
136
135
|
(default: `CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS`)
|
|
137
136
|
The SuperNode gRPC server address.
|
|
137
|
+
health_server_address : Optional[str] (default: None)
|
|
138
|
+
The address of the health server. If `None` is provided, the health server will
|
|
139
|
+
NOT be started.
|
|
138
140
|
"""
|
|
139
141
|
if insecure is None:
|
|
140
142
|
insecure = root_certificates is None
|
|
@@ -145,6 +147,7 @@ def start_client_internal(
|
|
|
145
147
|
object_store_factory = ObjectStoreFactory()
|
|
146
148
|
|
|
147
149
|
# Launch ClientAppIo API server
|
|
150
|
+
grpc_servers = []
|
|
148
151
|
clientappio_server = run_clientappio_api_grpc(
|
|
149
152
|
address=clientappio_api_address,
|
|
150
153
|
state_factory=state_factory,
|
|
@@ -152,12 +155,18 @@ def start_client_internal(
|
|
|
152
155
|
objectstore_factory=object_store_factory,
|
|
153
156
|
certificates=None,
|
|
154
157
|
)
|
|
158
|
+
grpc_servers.append(clientappio_server)
|
|
159
|
+
|
|
160
|
+
# Launch gRPC health server
|
|
161
|
+
if health_server_address is not None:
|
|
162
|
+
health_server = run_health_server_grpc_no_tls(health_server_address)
|
|
163
|
+
grpc_servers.append(health_server)
|
|
155
164
|
|
|
156
165
|
# Register handlers for graceful shutdown
|
|
157
|
-
|
|
166
|
+
register_signal_handlers(
|
|
158
167
|
event_type=EventType.RUN_SUPERNODE_LEAVE,
|
|
159
168
|
exit_message="SuperNode terminated gracefully.",
|
|
160
|
-
grpc_servers=
|
|
169
|
+
grpc_servers=grpc_servers,
|
|
161
170
|
)
|
|
162
171
|
|
|
163
172
|
# Initialize NodeState, Ffs, and ObjectStore
|
|
@@ -165,6 +174,15 @@ def start_client_internal(
|
|
|
165
174
|
ffs = ffs_factory.ffs()
|
|
166
175
|
store = object_store_factory.store()
|
|
167
176
|
|
|
177
|
+
# Launch the SuperExec if the isolation mode is `subprocess`
|
|
178
|
+
if isolation == ISOLATION_MODE_SUBPROCESS:
|
|
179
|
+
command = ["flower-superexec", "--insecure"]
|
|
180
|
+
command += ["--appio-api-address", clientappio_api_address]
|
|
181
|
+
command += ["--plugin-type", ExecPluginType.CLIENT_APP]
|
|
182
|
+
command += ["--parent-pid", str(os.getpid())]
|
|
183
|
+
# pylint: disable-next=consider-using-with
|
|
184
|
+
subprocess.Popen(command)
|
|
185
|
+
|
|
168
186
|
with _init_connection(
|
|
169
187
|
transport=transport,
|
|
170
188
|
server_address=server_address,
|
|
@@ -208,35 +226,6 @@ def start_client_internal(
|
|
|
208
226
|
confirm_message_received=confirm_message_received,
|
|
209
227
|
)
|
|
210
228
|
|
|
211
|
-
# Two isolation modes:
|
|
212
|
-
# 1. `subprocess`: SuperNode is starting the ClientApp
|
|
213
|
-
# process as a subprocess.
|
|
214
|
-
# 2. `process`: ClientApp process gets started separately
|
|
215
|
-
# (via `flwr-clientapp`), for example, in a separate
|
|
216
|
-
# Docker container.
|
|
217
|
-
|
|
218
|
-
# Mode 1: SuperNode starts ClientApp as subprocess
|
|
219
|
-
start_subprocess = isolation == ISOLATION_MODE_SUBPROCESS
|
|
220
|
-
|
|
221
|
-
if start_subprocess and run_id is not None:
|
|
222
|
-
_octet, _colon, _port = clientappio_api_address.rpartition(":")
|
|
223
|
-
io_address = (
|
|
224
|
-
f"{CLIENT_OCTET}:{_port}"
|
|
225
|
-
if _octet == SERVER_OCTET
|
|
226
|
-
else clientappio_api_address
|
|
227
|
-
)
|
|
228
|
-
# Start ClientApp subprocess
|
|
229
|
-
command = [
|
|
230
|
-
"flwr-clientapp",
|
|
231
|
-
"--clientappio-api-address",
|
|
232
|
-
io_address,
|
|
233
|
-
"--parent-pid",
|
|
234
|
-
str(os.getpid()),
|
|
235
|
-
"--insecure",
|
|
236
|
-
"--run-once",
|
|
237
|
-
]
|
|
238
|
-
subprocess.run(command, check=False)
|
|
239
|
-
|
|
240
229
|
# No message has been pulled therefore we can skip the push stage.
|
|
241
230
|
if run_id is None:
|
|
242
231
|
# If no message was received, wait for a while
|
|
@@ -521,44 +510,14 @@ def _make_fleet_connection_retry_invoker(
|
|
|
521
510
|
connection_error_type: type[Exception] = RpcError,
|
|
522
511
|
) -> RetryInvoker:
|
|
523
512
|
"""Create a retry invoker for fleet connection."""
|
|
513
|
+
retry_invoker = _make_simple_grpc_retry_invoker()
|
|
514
|
+
retry_invoker.recoverable_exceptions = connection_error_type
|
|
515
|
+
if max_retries is not None:
|
|
516
|
+
retry_invoker.max_tries = max_retries + 1
|
|
517
|
+
if max_wait_time is not None:
|
|
518
|
+
retry_invoker.max_time = max_wait_time
|
|
524
519
|
|
|
525
|
-
|
|
526
|
-
if retry_state.tries > 1:
|
|
527
|
-
log(
|
|
528
|
-
INFO,
|
|
529
|
-
"Connection successful after %.2f seconds and %s tries.",
|
|
530
|
-
retry_state.elapsed_time,
|
|
531
|
-
retry_state.tries,
|
|
532
|
-
)
|
|
533
|
-
|
|
534
|
-
def _on_backoff(retry_state: RetryState) -> None:
|
|
535
|
-
if retry_state.tries == 1:
|
|
536
|
-
log(WARN, "Connection attempt failed, retrying...")
|
|
537
|
-
else:
|
|
538
|
-
log(
|
|
539
|
-
WARN,
|
|
540
|
-
"Connection attempt failed, retrying in %.2f seconds",
|
|
541
|
-
retry_state.actual_wait,
|
|
542
|
-
)
|
|
543
|
-
|
|
544
|
-
return RetryInvoker(
|
|
545
|
-
wait_gen_factory=lambda: exponential(max_delay=MAX_RETRY_DELAY),
|
|
546
|
-
recoverable_exceptions=connection_error_type,
|
|
547
|
-
max_tries=max_retries + 1 if max_retries is not None else None,
|
|
548
|
-
max_time=max_wait_time,
|
|
549
|
-
on_giveup=lambda retry_state: (
|
|
550
|
-
log(
|
|
551
|
-
WARN,
|
|
552
|
-
"Giving up reconnection after %.2f seconds and %s tries.",
|
|
553
|
-
retry_state.elapsed_time,
|
|
554
|
-
retry_state.tries,
|
|
555
|
-
)
|
|
556
|
-
if retry_state.tries > 1
|
|
557
|
-
else None
|
|
558
|
-
),
|
|
559
|
-
on_success=_on_success,
|
|
560
|
-
on_backoff=_on_backoff,
|
|
561
|
-
)
|
|
520
|
+
return retry_invoker
|
|
562
521
|
|
|
563
522
|
|
|
564
523
|
def run_clientappio_api_grpc(
|
|
@@ -584,6 +543,6 @@ def run_clientappio_api_grpc(
|
|
|
584
543
|
max_message_length=GRPC_MAX_MESSAGE_LENGTH,
|
|
585
544
|
certificates=certificates,
|
|
586
545
|
)
|
|
587
|
-
log(INFO, "Starting
|
|
546
|
+
log(INFO, "Flower Deployment Runtime: Starting ClientAppIo API on %s", address)
|
|
588
547
|
clientappio_grpc_server.start()
|
|
589
548
|
return clientappio_grpc_server
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: flwr
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.22.0
|
|
4
4
|
Summary: Flower: A Friendly Federated AI Framework
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: Artificial Intelligence,Federated AI,Federated Analytics,Federated Evaluation,Federated Learning,Flower,Machine Learning
|
|
@@ -102,25 +102,15 @@ Meet the Flower community on [flower.ai](https://flower.ai)!
|
|
|
102
102
|
|
|
103
103
|
Flower's goal is to make federated learning accessible to everyone. This series of tutorials introduces the fundamentals of federated learning and how to implement them in Flower.
|
|
104
104
|
|
|
105
|
-
0. **What is Federated Learning
|
|
105
|
+
0. **[What is Federated Learning?](https://flower.ai/docs/framework/main/en/tutorial-series-what-is-federated-learning.html)**
|
|
106
106
|
|
|
107
|
-
|
|
107
|
+
1. **[An Introduction to Federated Learning](https://flower.ai/docs/framework/main/en/tutorial-series-get-started-with-flower-pytorch.html)**
|
|
108
108
|
|
|
109
|
-
|
|
109
|
+
2. **[Using Strategies in Federated Learning](https://flower.ai/docs/framework/main/en/tutorial-series-use-a-federated-learning-strategy-pytorch.html)**
|
|
110
110
|
|
|
111
|
-
|
|
111
|
+
3. **[Customize a Flower Strategy](https://flower.ai/docs/framework/main/en/tutorial-series-build-a-strategy-from-scratch-pytorch.html)**
|
|
112
112
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
[](https://colab.research.google.com/github/adap/flower/blob/main/framework/docs/source/tutorial-series-use-a-federated-learning-strategy-pytorch.ipynb) (or open the [Jupyter Notebook](https://github.com/adap/flower/blob/main/framework/docs/source/tutorial-series-use-a-federated-learning-strategy-pytorch.ipynb))
|
|
116
|
-
|
|
117
|
-
3. **Building Strategies for Federated Learning**
|
|
118
|
-
|
|
119
|
-
[](https://colab.research.google.com/github/adap/flower/blob/main/framework/docs/source/tutorial-series-build-a-strategy-from-scratch-pytorch.ipynb) (or open the [Jupyter Notebook](https://github.com/adap/flower/blob/main/framework/docs/source/tutorial-series-build-a-strategy-from-scratch-pytorch.ipynb))
|
|
120
|
-
|
|
121
|
-
4. **Custom Clients for Federated Learning**
|
|
122
|
-
|
|
123
|
-
[](https://colab.research.google.com/github/adap/flower/blob/main/framework/docs/source/tutorial-series-customize-the-client-pytorch.ipynb) (or open the [Jupyter Notebook](https://github.com/adap/flower/blob/main/framework/docs/source/tutorial-series-customize-the-client-pytorch.ipynb))
|
|
113
|
+
4. **[Communicate Custom Messages](https://flower.ai/docs/framework/main/en/tutorial-series-customize-the-client-pytorch.html)**
|
|
124
114
|
|
|
125
115
|
Stay tuned, more tutorials are coming soon. Topics include **Privacy and Security in Federated Learning**, and **Scaling Federated Learning**.
|
|
126
116
|
|
|
@@ -172,8 +162,9 @@ Flower Baselines is a collection of community-contributed projects that reproduc
|
|
|
172
162
|
- [FedOpt](https://github.com/adap/flower/tree/main/baselines/flwr_baselines/flwr_baselines/publications/adaptive_federated_optimization)
|
|
173
163
|
|
|
174
164
|
Please refer to the [Flower Baselines Documentation](https://flower.ai/docs/baselines/) for a detailed categorization of baselines and for additional info including:
|
|
175
|
-
|
|
176
|
-
|
|
165
|
+
|
|
166
|
+
- [How to use Flower Baselines](https://flower.ai/docs/baselines/how-to-use-baselines.html)
|
|
167
|
+
- [How to contribute a new Flower Baseline](https://flower.ai/docs/baselines/how-to-contribute-baselines.html)
|
|
177
168
|
|
|
178
169
|
## Flower Usage Examples
|
|
179
170
|
|