flwr-nightly 1.11.0.dev20240822__py3-none-any.whl → 1.11.1.dev20240912__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flwr-nightly might be problematic. Click here for more details.
- flwr/cli/app.py +0 -2
- flwr/cli/build.py +1 -1
- flwr/cli/new/new.py +41 -40
- flwr/cli/new/templates/app/LICENSE.tpl +202 -0
- flwr/cli/new/templates/app/README.baseline.md.tpl +127 -0
- flwr/cli/new/templates/app/README.flowertune.md.tpl +16 -6
- flwr/cli/new/templates/app/README.md.tpl +7 -30
- flwr/cli/new/templates/app/code/__init__.baseline.py.tpl +1 -0
- flwr/cli/new/templates/app/code/client.baseline.py.tpl +58 -0
- flwr/cli/new/templates/app/code/client.huggingface.py.tpl +19 -29
- flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +0 -3
- flwr/cli/new/templates/app/code/dataset.baseline.py.tpl +36 -0
- flwr/cli/new/templates/app/code/flwr_tune/{client.py.tpl → client_app.py.tpl} +50 -40
- flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +32 -2
- flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +0 -3
- flwr/cli/new/templates/app/code/flwr_tune/server_app.py.tpl +95 -0
- flwr/cli/new/templates/app/code/flwr_tune/strategy.py.tpl +83 -0
- flwr/cli/new/templates/app/code/model.baseline.py.tpl +80 -0
- flwr/cli/new/templates/app/code/server.baseline.py.tpl +46 -0
- flwr/cli/new/templates/app/code/server.huggingface.py.tpl +18 -3
- flwr/cli/new/templates/app/code/strategy.baseline.py.tpl +1 -0
- flwr/cli/new/templates/app/code/task.huggingface.py.tpl +16 -13
- flwr/cli/new/templates/app/code/utils.baseline.py.tpl +1 -0
- flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +138 -0
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +34 -7
- flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +9 -1
- flwr/cli/run/run.py +12 -2
- flwr/client/__init__.py +0 -4
- flwr/client/app.py +3 -4
- flwr/client/client.py +22 -1
- flwr/client/client_app.py +2 -2
- flwr/client/grpc_rere_client/client_interceptor.py +15 -7
- flwr/client/numpy_client.py +22 -1
- flwr/client/rest_client/connection.py +1 -1
- flwr/client/supernode/app.py +8 -7
- flwr/common/address.py +43 -0
- flwr/common/config.py +14 -11
- flwr/common/constant.py +12 -1
- flwr/common/record/recordset.py +1 -1
- flwr/common/record/typeddict.py +24 -1
- flwr/common/telemetry.py +36 -30
- flwr/server/__init__.py +0 -4
- flwr/server/app.py +27 -22
- flwr/server/compat/app.py +0 -5
- flwr/server/driver/grpc_driver.py +3 -6
- flwr/server/run_serverapp.py +20 -7
- flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +15 -2
- flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +5 -0
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +19 -8
- flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +13 -12
- flwr/server/superlink/fleet/rest_rere/rest_api.py +71 -122
- flwr/server/superlink/fleet/vce/backend/backend.py +1 -2
- flwr/server/superlink/fleet/vce/backend/raybackend.py +33 -15
- flwr/server/superlink/fleet/vce/vce_api.py +2 -6
- flwr/server/superlink/state/in_memory_state.py +15 -15
- flwr/server/superlink/state/sqlite_state.py +10 -10
- flwr/server/superlink/state/state.py +8 -8
- flwr/server/workflow/secure_aggregation/secagg_workflow.py +1 -0
- flwr/server/workflow/secure_aggregation/secaggplus_workflow.py +1 -0
- flwr/simulation/ray_transport/ray_actor.py +2 -2
- flwr/simulation/run_simulation.py +85 -25
- flwr/superexec/__init__.py +0 -6
- flwr/superexec/app.py +5 -3
- flwr/superexec/deployment.py +2 -2
- flwr/superexec/simulation.py +20 -1
- {flwr_nightly-1.11.0.dev20240822.dist-info → flwr_nightly-1.11.1.dev20240912.dist-info}/METADATA +3 -3
- {flwr_nightly-1.11.0.dev20240822.dist-info → flwr_nightly-1.11.1.dev20240912.dist-info}/RECORD +70 -62
- flwr_nightly-1.11.1.dev20240912.dist-info/entry_points.txt +10 -0
- flwr/cli/new/templates/app/code/flwr_tune/app.py.tpl +0 -89
- flwr/cli/new/templates/app/code/flwr_tune/config.yaml.tpl +0 -34
- flwr/cli/new/templates/app/code/flwr_tune/server.py.tpl +0 -48
- flwr/cli/new/templates/app/code/flwr_tune/static_config.yaml.tpl +0 -11
- flwr_nightly-1.11.0.dev20240822.dist-info/entry_points.txt +0 -10
- {flwr_nightly-1.11.0.dev20240822.dist-info → flwr_nightly-1.11.1.dev20240912.dist-info}/LICENSE +0 -0
- {flwr_nightly-1.11.0.dev20240822.dist-info → flwr_nightly-1.11.1.dev20240912.dist-info}/WHEEL +0 -0
|
@@ -45,7 +45,7 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
|
|
|
45
45
|
self.task_ins_store: Dict[UUID, TaskIns] = {}
|
|
46
46
|
self.task_res_store: Dict[UUID, TaskRes] = {}
|
|
47
47
|
|
|
48
|
-
self.
|
|
48
|
+
self.node_public_keys: Set[bytes] = set()
|
|
49
49
|
self.server_public_key: Optional[bytes] = None
|
|
50
50
|
self.server_private_key: Optional[bytes] = None
|
|
51
51
|
|
|
@@ -237,7 +237,7 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
|
|
|
237
237
|
return node_id
|
|
238
238
|
|
|
239
239
|
def delete_node(self, node_id: int, public_key: Optional[bytes] = None) -> None:
|
|
240
|
-
"""Delete a
|
|
240
|
+
"""Delete a node."""
|
|
241
241
|
with self.lock:
|
|
242
242
|
if node_id not in self.node_ids:
|
|
243
243
|
raise ValueError(f"Node {node_id} not found")
|
|
@@ -254,7 +254,7 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
|
|
|
254
254
|
del self.node_ids[node_id]
|
|
255
255
|
|
|
256
256
|
def get_nodes(self, run_id: int) -> Set[int]:
|
|
257
|
-
"""Return all available
|
|
257
|
+
"""Return all available nodes.
|
|
258
258
|
|
|
259
259
|
Constraints
|
|
260
260
|
-----------
|
|
@@ -271,9 +271,9 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
|
|
|
271
271
|
if online_until > current_time
|
|
272
272
|
}
|
|
273
273
|
|
|
274
|
-
def get_node_id(self,
|
|
275
|
-
"""Retrieve stored `node_id` filtered by `
|
|
276
|
-
return self.public_key_to_node_id.get(
|
|
274
|
+
def get_node_id(self, node_public_key: bytes) -> Optional[int]:
|
|
275
|
+
"""Retrieve stored `node_id` filtered by `node_public_keys`."""
|
|
276
|
+
return self.public_key_to_node_id.get(node_public_key)
|
|
277
277
|
|
|
278
278
|
def create_run(
|
|
279
279
|
self,
|
|
@@ -318,19 +318,19 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
|
|
|
318
318
|
"""Retrieve `server_public_key` in urlsafe bytes."""
|
|
319
319
|
return self.server_public_key
|
|
320
320
|
|
|
321
|
-
def
|
|
322
|
-
"""Store a set of `
|
|
321
|
+
def store_node_public_keys(self, public_keys: Set[bytes]) -> None:
|
|
322
|
+
"""Store a set of `node_public_keys` in state."""
|
|
323
323
|
with self.lock:
|
|
324
|
-
self.
|
|
324
|
+
self.node_public_keys = public_keys
|
|
325
325
|
|
|
326
|
-
def
|
|
327
|
-
"""Store a `
|
|
326
|
+
def store_node_public_key(self, public_key: bytes) -> None:
|
|
327
|
+
"""Store a `node_public_key` in state."""
|
|
328
328
|
with self.lock:
|
|
329
|
-
self.
|
|
329
|
+
self.node_public_keys.add(public_key)
|
|
330
330
|
|
|
331
|
-
def
|
|
332
|
-
"""Retrieve all currently stored `
|
|
333
|
-
return self.
|
|
331
|
+
def get_node_public_keys(self) -> Set[bytes]:
|
|
332
|
+
"""Retrieve all currently stored `node_public_keys` as a set."""
|
|
333
|
+
return self.node_public_keys
|
|
334
334
|
|
|
335
335
|
def get_run(self, run_id: int) -> Optional[Run]:
|
|
336
336
|
"""Retrieve information about the run with the specified `run_id`."""
|
|
@@ -569,7 +569,7 @@ class SqliteState(State): # pylint: disable=R0904
|
|
|
569
569
|
return node_id
|
|
570
570
|
|
|
571
571
|
def delete_node(self, node_id: int, public_key: Optional[bytes] = None) -> None:
|
|
572
|
-
"""Delete a
|
|
572
|
+
"""Delete a node."""
|
|
573
573
|
query = "DELETE FROM node WHERE node_id = ?"
|
|
574
574
|
params = (node_id,)
|
|
575
575
|
|
|
@@ -607,10 +607,10 @@ class SqliteState(State): # pylint: disable=R0904
|
|
|
607
607
|
result: Set[int] = {row["node_id"] for row in rows}
|
|
608
608
|
return result
|
|
609
609
|
|
|
610
|
-
def get_node_id(self,
|
|
611
|
-
"""Retrieve stored `node_id` filtered by `
|
|
610
|
+
def get_node_id(self, node_public_key: bytes) -> Optional[int]:
|
|
611
|
+
"""Retrieve stored `node_id` filtered by `node_public_keys`."""
|
|
612
612
|
query = "SELECT node_id FROM node WHERE public_key = :public_key;"
|
|
613
|
-
row = self.query(query, {"public_key":
|
|
613
|
+
row = self.query(query, {"public_key": node_public_key})
|
|
614
614
|
if len(row) > 0:
|
|
615
615
|
node_id: int = row[0]["node_id"]
|
|
616
616
|
return node_id
|
|
@@ -684,19 +684,19 @@ class SqliteState(State): # pylint: disable=R0904
|
|
|
684
684
|
public_key = None
|
|
685
685
|
return public_key
|
|
686
686
|
|
|
687
|
-
def
|
|
688
|
-
"""Store a set of `
|
|
687
|
+
def store_node_public_keys(self, public_keys: Set[bytes]) -> None:
|
|
688
|
+
"""Store a set of `node_public_keys` in state."""
|
|
689
689
|
query = "INSERT INTO public_key (public_key) VALUES (?)"
|
|
690
690
|
data = [(key,) for key in public_keys]
|
|
691
691
|
self.query(query, data)
|
|
692
692
|
|
|
693
|
-
def
|
|
694
|
-
"""Store a `
|
|
693
|
+
def store_node_public_key(self, public_key: bytes) -> None:
|
|
694
|
+
"""Store a `node_public_key` in state."""
|
|
695
695
|
query = "INSERT INTO public_key (public_key) VALUES (:public_key)"
|
|
696
696
|
self.query(query, {"public_key": public_key})
|
|
697
697
|
|
|
698
|
-
def
|
|
699
|
-
"""Retrieve all currently stored `
|
|
698
|
+
def get_node_public_keys(self) -> Set[bytes]:
|
|
699
|
+
"""Retrieve all currently stored `node_public_keys` as a set."""
|
|
700
700
|
query = "SELECT public_key FROM public_key"
|
|
701
701
|
rows = self.query(query)
|
|
702
702
|
result: Set[bytes] = {row["public_key"] for row in rows}
|
|
@@ -153,8 +153,8 @@ class State(abc.ABC): # pylint: disable=R0904
|
|
|
153
153
|
"""
|
|
154
154
|
|
|
155
155
|
@abc.abstractmethod
|
|
156
|
-
def get_node_id(self,
|
|
157
|
-
"""Retrieve stored `node_id` filtered by `
|
|
156
|
+
def get_node_id(self, node_public_key: bytes) -> Optional[int]:
|
|
157
|
+
"""Retrieve stored `node_id` filtered by `node_public_keys`."""
|
|
158
158
|
|
|
159
159
|
@abc.abstractmethod
|
|
160
160
|
def create_run(
|
|
@@ -199,16 +199,16 @@ class State(abc.ABC): # pylint: disable=R0904
|
|
|
199
199
|
"""Retrieve `server_public_key` in urlsafe bytes."""
|
|
200
200
|
|
|
201
201
|
@abc.abstractmethod
|
|
202
|
-
def
|
|
203
|
-
"""Store a set of `
|
|
202
|
+
def store_node_public_keys(self, public_keys: Set[bytes]) -> None:
|
|
203
|
+
"""Store a set of `node_public_keys` in state."""
|
|
204
204
|
|
|
205
205
|
@abc.abstractmethod
|
|
206
|
-
def
|
|
207
|
-
"""Store a `
|
|
206
|
+
def store_node_public_key(self, public_key: bytes) -> None:
|
|
207
|
+
"""Store a `node_public_key` in state."""
|
|
208
208
|
|
|
209
209
|
@abc.abstractmethod
|
|
210
|
-
def
|
|
211
|
-
"""Retrieve all currently stored `
|
|
210
|
+
def get_node_public_keys(self) -> Set[bytes]:
|
|
211
|
+
"""Retrieve all currently stored `node_public_keys` as a set."""
|
|
212
212
|
|
|
213
213
|
@abc.abstractmethod
|
|
214
214
|
def acknowledge_ping(self, node_id: int, ping_interval: float) -> bool:
|
|
@@ -35,6 +35,7 @@ class SecAggWorkflow(SecAggPlusWorkflow):
|
|
|
35
35
|
contributions to compute the weighted average of model parameters.
|
|
36
36
|
|
|
37
37
|
The protocol involves four main stages:
|
|
38
|
+
|
|
38
39
|
- 'setup': Send SecAgg configuration to clients and collect their public keys.
|
|
39
40
|
- 'share keys': Broadcast public keys among clients and collect encrypted secret
|
|
40
41
|
key shares.
|
|
@@ -99,6 +99,7 @@ class SecAggPlusWorkflow:
|
|
|
99
99
|
contributions to compute the weighted average of model parameters.
|
|
100
100
|
|
|
101
101
|
The protocol involves four main stages:
|
|
102
|
+
|
|
102
103
|
- 'setup': Send SecAgg+ configuration to clients and collect their public keys.
|
|
103
104
|
- 'share keys': Broadcast public keys among clients and collect encrypted secret
|
|
104
105
|
key shares.
|
|
@@ -124,14 +124,14 @@ def pool_size_from_resources(client_resources: Dict[str, Union[int, float]]) ->
|
|
|
124
124
|
WARNING,
|
|
125
125
|
"The ActorPool is empty. The system (CPUs=%s, GPUs=%s) "
|
|
126
126
|
"does not meet the criteria to host at least one client with resources:"
|
|
127
|
-
" %s. Lowering
|
|
127
|
+
" %s. Lowering these resources could help.",
|
|
128
128
|
num_cpus,
|
|
129
129
|
num_gpus,
|
|
130
130
|
client_resources,
|
|
131
131
|
)
|
|
132
132
|
raise ValueError(
|
|
133
133
|
"ActorPool is empty. Stopping Simulation. "
|
|
134
|
-
"Check
|
|
134
|
+
"Check `num_cpus` and/or `num_gpus` passed to the simulation engine"
|
|
135
135
|
)
|
|
136
136
|
|
|
137
137
|
return total_num_actors
|
|
@@ -25,7 +25,7 @@ from argparse import Namespace
|
|
|
25
25
|
from logging import DEBUG, ERROR, INFO, WARNING
|
|
26
26
|
from pathlib import Path
|
|
27
27
|
from time import sleep
|
|
28
|
-
from typing import List, Optional
|
|
28
|
+
from typing import Any, List, Optional
|
|
29
29
|
|
|
30
30
|
from flwr.cli.config_utils import load_and_validate
|
|
31
31
|
from flwr.client import ClientApp
|
|
@@ -35,6 +35,7 @@ from flwr.common.constant import RUN_ID_NUM_BYTES
|
|
|
35
35
|
from flwr.common.logger import (
|
|
36
36
|
set_logger_propagation,
|
|
37
37
|
update_console_handler,
|
|
38
|
+
warn_deprecated_feature,
|
|
38
39
|
warn_deprecated_feature_with_example,
|
|
39
40
|
)
|
|
40
41
|
from flwr.common.typing import Run, UserConfig
|
|
@@ -91,12 +92,41 @@ def _check_args_do_not_interfere(args: Namespace) -> bool:
|
|
|
91
92
|
return True
|
|
92
93
|
|
|
93
94
|
|
|
95
|
+
def _replace_keys(d: Any, match: str, target: str) -> Any:
|
|
96
|
+
if isinstance(d, dict):
|
|
97
|
+
return {
|
|
98
|
+
k.replace(match, target): _replace_keys(v, match, target)
|
|
99
|
+
for k, v in d.items()
|
|
100
|
+
}
|
|
101
|
+
if isinstance(d, list):
|
|
102
|
+
return [_replace_keys(i, match, target) for i in d]
|
|
103
|
+
return d
|
|
104
|
+
|
|
105
|
+
|
|
94
106
|
# Entry point from CLI
|
|
95
107
|
# pylint: disable=too-many-locals
|
|
96
108
|
def run_simulation_from_cli() -> None:
|
|
97
109
|
"""Run Simulation Engine from the CLI."""
|
|
98
110
|
args = _parse_args_run_simulation().parse_args()
|
|
99
111
|
|
|
112
|
+
event(
|
|
113
|
+
EventType.CLI_FLOWER_SIMULATION_ENTER,
|
|
114
|
+
event_details={"backend": args.backend, "num-supernodes": args.num_supernodes},
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Add warnings for deprecated server_app and client_app arguments
|
|
118
|
+
if args.server_app:
|
|
119
|
+
warn_deprecated_feature(
|
|
120
|
+
"The `--server-app` argument is deprecated. "
|
|
121
|
+
"Please use the `--app` argument instead."
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
if args.client_app:
|
|
125
|
+
warn_deprecated_feature(
|
|
126
|
+
"The `--client-app` argument is deprecated. "
|
|
127
|
+
"Use the `--app` argument instead."
|
|
128
|
+
)
|
|
129
|
+
|
|
100
130
|
if args.enable_tf_gpu_growth:
|
|
101
131
|
warn_deprecated_feature_with_example(
|
|
102
132
|
"Passing `--enable-tf-gpu-growth` is deprecated.",
|
|
@@ -105,6 +135,14 @@ def run_simulation_from_cli() -> None:
|
|
|
105
135
|
code_example='TF_FORCE_GPU_ALLOW_GROWTH="true" flower-simulation <...>',
|
|
106
136
|
)
|
|
107
137
|
|
|
138
|
+
# Load JSON config
|
|
139
|
+
backend_config_dict = json.loads(args.backend_config)
|
|
140
|
+
|
|
141
|
+
if backend_config_dict:
|
|
142
|
+
# Backend config internally operates with `_` not with `-`
|
|
143
|
+
backend_config_dict = _replace_keys(backend_config_dict, match="-", target="_")
|
|
144
|
+
log(DEBUG, "backend_config_dict: %s", backend_config_dict)
|
|
145
|
+
|
|
108
146
|
# We are supporting two modes for the CLI entrypoint:
|
|
109
147
|
# 1) Running an app dir containing a `pyproject.toml`
|
|
110
148
|
# 2) Running any ClientApp and SeverApp w/o pyproject.toml being present
|
|
@@ -144,7 +182,9 @@ def run_simulation_from_cli() -> None:
|
|
|
144
182
|
client_app_attr = app_components["clientapp"]
|
|
145
183
|
server_app_attr = app_components["serverapp"]
|
|
146
184
|
|
|
147
|
-
override_config = parse_config_args(
|
|
185
|
+
override_config = parse_config_args(
|
|
186
|
+
[args.run_config] if args.run_config else args.run_config
|
|
187
|
+
)
|
|
148
188
|
fused_config = get_fused_config_from_dir(app_path, override_config)
|
|
149
189
|
app_dir = args.app
|
|
150
190
|
is_app = True
|
|
@@ -167,9 +207,6 @@ def run_simulation_from_cli() -> None:
|
|
|
167
207
|
override_config=override_config,
|
|
168
208
|
)
|
|
169
209
|
|
|
170
|
-
# Load JSON config
|
|
171
|
-
backend_config_dict = json.loads(args.backend_config)
|
|
172
|
-
|
|
173
210
|
_run_simulation(
|
|
174
211
|
server_app_attr=server_app_attr,
|
|
175
212
|
client_app_attr=client_app_attr,
|
|
@@ -179,9 +216,11 @@ def run_simulation_from_cli() -> None:
|
|
|
179
216
|
app_dir=app_dir,
|
|
180
217
|
run=run,
|
|
181
218
|
enable_tf_gpu_growth=args.enable_tf_gpu_growth,
|
|
219
|
+
delay_start=args.delay_start,
|
|
182
220
|
verbose_logging=args.verbose,
|
|
183
221
|
server_app_run_config=fused_config,
|
|
184
222
|
is_app=is_app,
|
|
223
|
+
exit_event=EventType.CLI_FLOWER_SIMULATION_LEAVE,
|
|
185
224
|
)
|
|
186
225
|
|
|
187
226
|
|
|
@@ -209,9 +248,8 @@ def run_simulation(
|
|
|
209
248
|
messages sent by the `ServerApp`.
|
|
210
249
|
|
|
211
250
|
num_supernodes : int
|
|
212
|
-
Number of nodes that run a ClientApp. They can be sampled by a
|
|
213
|
-
|
|
214
|
-
should perform.
|
|
251
|
+
Number of nodes that run a ClientApp. They can be sampled by a Driver in the
|
|
252
|
+
ServerApp and receive a Message describing what the ClientApp should perform.
|
|
215
253
|
|
|
216
254
|
backend_name : str (default: ray)
|
|
217
255
|
A simulation backend that runs `ClientApp`s.
|
|
@@ -236,10 +274,15 @@ def run_simulation(
|
|
|
236
274
|
When disabled, only INFO, WARNING and ERROR log messages will be shown. If
|
|
237
275
|
enabled, DEBUG-level logs will be displayed.
|
|
238
276
|
"""
|
|
277
|
+
event(
|
|
278
|
+
EventType.PYTHON_API_RUN_SIMULATION_ENTER,
|
|
279
|
+
event_details={"backend": backend_name, "num-supernodes": num_supernodes},
|
|
280
|
+
)
|
|
281
|
+
|
|
239
282
|
if enable_tf_gpu_growth:
|
|
240
283
|
warn_deprecated_feature_with_example(
|
|
241
284
|
"Passing `enable_tf_gpu_growth=True` is deprecated.",
|
|
242
|
-
example_message="Instead, set the `TF_FORCE_GPU_ALLOW_GROWTH`
|
|
285
|
+
example_message="Instead, set the `TF_FORCE_GPU_ALLOW_GROWTH` environment "
|
|
243
286
|
"variable to true.",
|
|
244
287
|
code_example='import os;os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"'
|
|
245
288
|
"\n\tflwr.simulation.run_simulationt(...)",
|
|
@@ -253,6 +296,7 @@ def run_simulation(
|
|
|
253
296
|
backend_config=backend_config,
|
|
254
297
|
enable_tf_gpu_growth=enable_tf_gpu_growth,
|
|
255
298
|
verbose_logging=verbose_logging,
|
|
299
|
+
exit_event=EventType.PYTHON_API_RUN_SIMULATION_LEAVE,
|
|
256
300
|
)
|
|
257
301
|
|
|
258
302
|
|
|
@@ -266,7 +310,6 @@ def run_serverapp_th(
|
|
|
266
310
|
f_stop: threading.Event,
|
|
267
311
|
has_exception: threading.Event,
|
|
268
312
|
enable_tf_gpu_growth: bool,
|
|
269
|
-
delay_launch: int = 3,
|
|
270
313
|
) -> threading.Thread:
|
|
271
314
|
"""Run SeverApp in a thread."""
|
|
272
315
|
|
|
@@ -322,7 +365,6 @@ def run_serverapp_th(
|
|
|
322
365
|
server_app,
|
|
323
366
|
),
|
|
324
367
|
)
|
|
325
|
-
sleep(delay_launch)
|
|
326
368
|
serverapp_th.start()
|
|
327
369
|
return serverapp_th
|
|
328
370
|
|
|
@@ -336,6 +378,8 @@ def _main_loop(
|
|
|
336
378
|
is_app: bool,
|
|
337
379
|
enable_tf_gpu_growth: bool,
|
|
338
380
|
run: Run,
|
|
381
|
+
exit_event: EventType,
|
|
382
|
+
delay_start: int,
|
|
339
383
|
flwr_dir: Optional[str] = None,
|
|
340
384
|
client_app: Optional[ClientApp] = None,
|
|
341
385
|
client_app_attr: Optional[str] = None,
|
|
@@ -343,7 +387,7 @@ def _main_loop(
|
|
|
343
387
|
server_app_attr: Optional[str] = None,
|
|
344
388
|
server_app_run_config: Optional[UserConfig] = None,
|
|
345
389
|
) -> None:
|
|
346
|
-
"""
|
|
390
|
+
"""Start ServerApp on a separate thread, then launch Simulation Engine."""
|
|
347
391
|
# Initialize StateFactory
|
|
348
392
|
state_factory = StateFactory(":flwr-in-memory-state:")
|
|
349
393
|
|
|
@@ -351,6 +395,7 @@ def _main_loop(
|
|
|
351
395
|
# A Threading event to indicate if an exception was raised in the ServerApp thread
|
|
352
396
|
server_app_thread_has_exception = threading.Event()
|
|
353
397
|
serverapp_th = None
|
|
398
|
+
success = True
|
|
354
399
|
try:
|
|
355
400
|
# Register run
|
|
356
401
|
log(DEBUG, "Pre-registering run with id %s", run.run_id)
|
|
@@ -374,8 +419,10 @@ def _main_loop(
|
|
|
374
419
|
enable_tf_gpu_growth=enable_tf_gpu_growth,
|
|
375
420
|
)
|
|
376
421
|
|
|
377
|
-
#
|
|
378
|
-
|
|
422
|
+
# Buffer time so the `ServerApp` in separate thread is ready
|
|
423
|
+
log(DEBUG, "Buffer time delay: %ds", delay_start)
|
|
424
|
+
sleep(delay_start)
|
|
425
|
+
# Start Simulation Engine
|
|
379
426
|
vce.start_vce(
|
|
380
427
|
num_supernodes=num_supernodes,
|
|
381
428
|
client_app_attr=client_app_attr,
|
|
@@ -393,13 +440,13 @@ def _main_loop(
|
|
|
393
440
|
except Exception as ex:
|
|
394
441
|
log(ERROR, "An exception occurred !! %s", ex)
|
|
395
442
|
log(ERROR, traceback.format_exc())
|
|
443
|
+
success = False
|
|
396
444
|
raise RuntimeError("An error was encountered. Ending simulation.") from ex
|
|
397
445
|
|
|
398
446
|
finally:
|
|
399
447
|
# Trigger stop event
|
|
400
448
|
f_stop.set()
|
|
401
|
-
|
|
402
|
-
event(EventType.RUN_SUPERLINK_LEAVE)
|
|
449
|
+
event(exit_event, event_details={"success": success})
|
|
403
450
|
if serverapp_th:
|
|
404
451
|
serverapp_th.join()
|
|
405
452
|
if server_app_thread_has_exception.is_set():
|
|
@@ -411,6 +458,7 @@ def _main_loop(
|
|
|
411
458
|
# pylint: disable=too-many-arguments,too-many-locals
|
|
412
459
|
def _run_simulation(
|
|
413
460
|
num_supernodes: int,
|
|
461
|
+
exit_event: EventType,
|
|
414
462
|
client_app: Optional[ClientApp] = None,
|
|
415
463
|
server_app: Optional[ServerApp] = None,
|
|
416
464
|
backend_name: str = "ray",
|
|
@@ -422,6 +470,7 @@ def _run_simulation(
|
|
|
422
470
|
flwr_dir: Optional[str] = None,
|
|
423
471
|
run: Optional[Run] = None,
|
|
424
472
|
enable_tf_gpu_growth: bool = False,
|
|
473
|
+
delay_start: int = 5,
|
|
425
474
|
verbose_logging: bool = False,
|
|
426
475
|
is_app: bool = False,
|
|
427
476
|
) -> None:
|
|
@@ -477,6 +526,8 @@ def _run_simulation(
|
|
|
477
526
|
is_app,
|
|
478
527
|
enable_tf_gpu_growth,
|
|
479
528
|
run,
|
|
529
|
+
exit_event,
|
|
530
|
+
delay_start,
|
|
480
531
|
flwr_dir,
|
|
481
532
|
client_app,
|
|
482
533
|
client_app_attr,
|
|
@@ -512,13 +563,22 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
512
563
|
parser = argparse.ArgumentParser(
|
|
513
564
|
description="Start a Flower simulation",
|
|
514
565
|
)
|
|
566
|
+
parser.add_argument(
|
|
567
|
+
"--app",
|
|
568
|
+
type=str,
|
|
569
|
+
default=None,
|
|
570
|
+
help="Path to a directory containing a FAB-like structure with a "
|
|
571
|
+
"pyproject.toml.",
|
|
572
|
+
)
|
|
515
573
|
parser.add_argument(
|
|
516
574
|
"--server-app",
|
|
517
|
-
help="For example: `server:app` or
|
|
575
|
+
help="(DEPRECATED: use --app instead) For example: `server:app` or "
|
|
576
|
+
"`project.package.module:wrapper.app`",
|
|
518
577
|
)
|
|
519
578
|
parser.add_argument(
|
|
520
579
|
"--client-app",
|
|
521
|
-
help="For example: `client:app` or
|
|
580
|
+
help="(DEPRECATED: use --app instead) For example: `client:app` or "
|
|
581
|
+
"`project.package.module:wrapper.app`",
|
|
522
582
|
)
|
|
523
583
|
parser.add_argument(
|
|
524
584
|
"--num-supernodes",
|
|
@@ -526,13 +586,6 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
526
586
|
required=True,
|
|
527
587
|
help="Number of simulated SuperNodes.",
|
|
528
588
|
)
|
|
529
|
-
parser.add_argument(
|
|
530
|
-
"--app",
|
|
531
|
-
type=str,
|
|
532
|
-
default=None,
|
|
533
|
-
help="Path to a directory containing a FAB-like structure with a "
|
|
534
|
-
"pyproject.toml.",
|
|
535
|
-
)
|
|
536
589
|
parser.add_argument(
|
|
537
590
|
"--run-config",
|
|
538
591
|
default=None,
|
|
@@ -562,6 +615,13 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
562
615
|
"Read more about how `tf.config.experimental.set_memory_growth()` works in "
|
|
563
616
|
"the TensorFlow documentation: https://www.tensorflow.org/api/stable.",
|
|
564
617
|
)
|
|
618
|
+
parser.add_argument(
|
|
619
|
+
"--delay-start",
|
|
620
|
+
type=int,
|
|
621
|
+
default=3,
|
|
622
|
+
help="Buffer time (in seconds) to delay the start the simulation engine after "
|
|
623
|
+
"the `ServerApp`, which runs in a separate thread, has been launched.",
|
|
624
|
+
)
|
|
565
625
|
parser.add_argument(
|
|
566
626
|
"--verbose",
|
|
567
627
|
action="store_true",
|
flwr/superexec/__init__.py
CHANGED
flwr/superexec/app.py
CHANGED
|
@@ -25,7 +25,7 @@ import grpc
|
|
|
25
25
|
from flwr.common import EventType, event, log
|
|
26
26
|
from flwr.common.address import parse_address
|
|
27
27
|
from flwr.common.config import parse_config_args
|
|
28
|
-
from flwr.common.constant import
|
|
28
|
+
from flwr.common.constant import EXEC_API_DEFAULT_ADDRESS
|
|
29
29
|
from flwr.common.exit_handlers import register_exit_handlers
|
|
30
30
|
from flwr.common.object_ref import load_app, validate
|
|
31
31
|
|
|
@@ -56,7 +56,9 @@ def run_superexec() -> None:
|
|
|
56
56
|
address=address,
|
|
57
57
|
executor=_load_executor(args),
|
|
58
58
|
certificates=certificates,
|
|
59
|
-
config=parse_config_args(
|
|
59
|
+
config=parse_config_args(
|
|
60
|
+
[args.executor_config] if args.executor_config else args.executor_config
|
|
61
|
+
),
|
|
60
62
|
)
|
|
61
63
|
|
|
62
64
|
grpc_servers = [superexec_server]
|
|
@@ -79,7 +81,7 @@ def _parse_args_run_superexec() -> argparse.ArgumentParser:
|
|
|
79
81
|
parser.add_argument(
|
|
80
82
|
"--address",
|
|
81
83
|
help="SuperExec (gRPC) server address (IPv4, IPv6, or a domain name)",
|
|
82
|
-
default=
|
|
84
|
+
default=EXEC_API_DEFAULT_ADDRESS,
|
|
83
85
|
)
|
|
84
86
|
parser.add_argument(
|
|
85
87
|
"--executor",
|
flwr/superexec/deployment.py
CHANGED
|
@@ -23,13 +23,13 @@ from typing import Optional
|
|
|
23
23
|
from typing_extensions import override
|
|
24
24
|
|
|
25
25
|
from flwr.cli.install import install_from_fab
|
|
26
|
+
from flwr.common.constant import DRIVER_API_DEFAULT_ADDRESS
|
|
26
27
|
from flwr.common.grpc import create_channel
|
|
27
28
|
from flwr.common.logger import log
|
|
28
29
|
from flwr.common.serde import fab_to_proto, user_config_to_proto
|
|
29
30
|
from flwr.common.typing import Fab, UserConfig
|
|
30
31
|
from flwr.proto.driver_pb2 import CreateRunRequest # pylint: disable=E0611
|
|
31
32
|
from flwr.proto.driver_pb2_grpc import DriverStub
|
|
32
|
-
from flwr.server.driver.grpc_driver import DEFAULT_SERVER_ADDRESS_DRIVER
|
|
33
33
|
|
|
34
34
|
from .executor import Executor, RunTracker
|
|
35
35
|
|
|
@@ -50,7 +50,7 @@ class DeploymentEngine(Executor):
|
|
|
50
50
|
|
|
51
51
|
def __init__(
|
|
52
52
|
self,
|
|
53
|
-
superlink: str =
|
|
53
|
+
superlink: str = DRIVER_API_DEFAULT_ADDRESS,
|
|
54
54
|
root_certificates: Optional[str] = None,
|
|
55
55
|
flwr_dir: Optional[str] = None,
|
|
56
56
|
) -> None:
|
flwr/superexec/simulation.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
"""Simulation engine executor."""
|
|
16
16
|
|
|
17
17
|
|
|
18
|
+
import json
|
|
18
19
|
import subprocess
|
|
19
20
|
import sys
|
|
20
21
|
from logging import ERROR, INFO, WARN
|
|
@@ -24,6 +25,7 @@ from typing_extensions import override
|
|
|
24
25
|
|
|
25
26
|
from flwr.cli.config_utils import load_and_validate
|
|
26
27
|
from flwr.cli.install import install_from_fab
|
|
28
|
+
from flwr.common.config import unflatten_dict
|
|
27
29
|
from flwr.common.constant import RUN_ID_NUM_BYTES
|
|
28
30
|
from flwr.common.logger import log
|
|
29
31
|
from flwr.common.typing import UserConfig
|
|
@@ -108,6 +110,7 @@ class SimulationEngine(Executor):
|
|
|
108
110
|
)
|
|
109
111
|
self.verbose = verbose
|
|
110
112
|
|
|
113
|
+
# pylint: disable=too-many-locals
|
|
111
114
|
@override
|
|
112
115
|
def start_run(
|
|
113
116
|
self,
|
|
@@ -152,6 +155,15 @@ class SimulationEngine(Executor):
|
|
|
152
155
|
"Config extracted from FAB's pyproject.toml is not valid"
|
|
153
156
|
)
|
|
154
157
|
|
|
158
|
+
# Flatten federated config
|
|
159
|
+
federation_config_flat = unflatten_dict(federation_config)
|
|
160
|
+
|
|
161
|
+
num_supernodes = federation_config_flat.get(
|
|
162
|
+
"num-supernodes", self.num_supernodes
|
|
163
|
+
)
|
|
164
|
+
backend_cfg = federation_config_flat.get("backend", {})
|
|
165
|
+
verbose: Optional[bool] = federation_config_flat.get("verbose")
|
|
166
|
+
|
|
155
167
|
# In Simulation there is no SuperLink, still we create a run_id
|
|
156
168
|
run_id = generate_rand_int_from_bytes(RUN_ID_NUM_BYTES)
|
|
157
169
|
log(INFO, "Created run %s", str(run_id))
|
|
@@ -162,11 +174,18 @@ class SimulationEngine(Executor):
|
|
|
162
174
|
"--app",
|
|
163
175
|
f"{str(fab_path)}",
|
|
164
176
|
"--num-supernodes",
|
|
165
|
-
f"{
|
|
177
|
+
f"{num_supernodes}",
|
|
166
178
|
"--run-id",
|
|
167
179
|
str(run_id),
|
|
168
180
|
]
|
|
169
181
|
|
|
182
|
+
if backend_cfg:
|
|
183
|
+
# Stringify as JSON
|
|
184
|
+
command.extend(["--backend-config", json.dumps(backend_cfg)])
|
|
185
|
+
|
|
186
|
+
if verbose:
|
|
187
|
+
command.extend(["--verbose"])
|
|
188
|
+
|
|
170
189
|
if override_config:
|
|
171
190
|
override_config_str = _user_config_to_str(override_config)
|
|
172
191
|
command.extend(["--run-config", f"{override_config_str}"])
|
{flwr_nightly-1.11.0.dev20240822.dist-info → flwr_nightly-1.11.1.dev20240912.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flwr-nightly
|
|
3
|
-
Version: 1.11.
|
|
3
|
+
Version: 1.11.1.dev20240912
|
|
4
4
|
Summary: Flower: A Friendly Federated Learning Framework
|
|
5
5
|
Home-page: https://flower.ai
|
|
6
6
|
License: Apache-2.0
|
|
@@ -195,8 +195,8 @@ Other [examples](https://github.com/adap/flower/tree/main/examples):
|
|
|
195
195
|
- [PyTorch: From Centralized to Federated](https://github.com/adap/flower/tree/main/examples/pytorch-from-centralized-to-federated)
|
|
196
196
|
- [Vertical FL](https://github.com/adap/flower/tree/main/examples/vertical-fl)
|
|
197
197
|
- [Federated Finetuning of OpenAI's Whisper](https://github.com/adap/flower/tree/main/examples/whisper-federated-finetuning)
|
|
198
|
-
- [Federated Finetuning of Large Language Model](https://github.com/adap/flower/tree/main/examples/llm
|
|
199
|
-
- [Federated Finetuning of a Vision Transformer](https://github.com/adap/flower/tree/main/examples/vit
|
|
198
|
+
- [Federated Finetuning of Large Language Model](https://github.com/adap/flower/tree/main/examples/flowertune-llm)
|
|
199
|
+
- [Federated Finetuning of a Vision Transformer](https://github.com/adap/flower/tree/main/examples/flowertune-vit)
|
|
200
200
|
- [Advanced Flower with TensorFlow/Keras](https://github.com/adap/flower/tree/main/examples/advanced-tensorflow)
|
|
201
201
|
- [Advanced Flower with PyTorch](https://github.com/adap/flower/tree/main/examples/advanced-pytorch)
|
|
202
202
|
- Single-Machine Simulation of Federated Learning Systems ([PyTorch](https://github.com/adap/flower/tree/main/examples/simulation-pytorch)) ([Tensorflow](https://github.com/adap/flower/tree/main/examples/simulation-tensorflow))
|