flwr-nightly 1.22.0.dev20250915__py3-none-any.whl → 1.22.0.dev20250917__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/cli/app.py +2 -0
- flwr/cli/new/new.py +2 -2
- flwr/cli/new/templates/app/README.flowertune.md.tpl +1 -1
- flwr/cli/new/templates/app/code/client.baseline.py.tpl +64 -47
- flwr/cli/new/templates/app/code/flwr_tune/client_app.py.tpl +56 -90
- flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +1 -23
- flwr/cli/new/templates/app/code/flwr_tune/server_app.py.tpl +37 -58
- flwr/cli/new/templates/app/code/flwr_tune/strategy.py.tpl +39 -44
- flwr/cli/new/templates/app/code/model.baseline.py.tpl +0 -14
- flwr/cli/new/templates/app/code/server.baseline.py.tpl +27 -29
- flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +3 -3
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +1 -1
- flwr/cli/pull.py +100 -0
- flwr/cli/utils.py +17 -0
- flwr/common/constant.py +2 -0
- flwr/proto/control_pb2.py +7 -3
- flwr/proto/control_pb2.pyi +24 -0
- flwr/proto/control_pb2_grpc.py +34 -0
- flwr/proto/control_pb2_grpc.pyi +13 -0
- flwr/server/app.py +13 -0
- flwr/serverapp/strategy/__init__.py +8 -0
- flwr/serverapp/strategy/fedavg.py +23 -2
- flwr/serverapp/strategy/fedavgm.py +198 -0
- flwr/serverapp/strategy/fedmedian.py +71 -0
- flwr/serverapp/strategy/fedprox.py +174 -0
- flwr/serverapp/strategy/fedtrimmedavg.py +176 -0
- flwr/serverapp/strategy/strategy_utils_tests.py +20 -1
- flwr/simulation/app.py +1 -1
- flwr/simulation/run_simulation.py +25 -30
- flwr/superlink/artifact_provider/__init__.py +22 -0
- flwr/superlink/artifact_provider/artifact_provider.py +37 -0
- flwr/superlink/servicer/control/control_grpc.py +3 -0
- flwr/superlink/servicer/control/control_servicer.py +59 -2
- {flwr_nightly-1.22.0.dev20250915.dist-info → flwr_nightly-1.22.0.dev20250917.dist-info}/METADATA +6 -16
- {flwr_nightly-1.22.0.dev20250915.dist-info → flwr_nightly-1.22.0.dev20250917.dist-info}/RECORD +37 -30
- {flwr_nightly-1.22.0.dev20250915.dist-info → flwr_nightly-1.22.0.dev20250917.dist-info}/WHEEL +0 -0
- {flwr_nightly-1.22.0.dev20250915.dist-info → flwr_nightly-1.22.0.dev20250917.dist-info}/entry_points.txt +0 -0
flwr/cli/app.py
CHANGED
@@ -25,6 +25,7 @@ from .log import log
|
|
25
25
|
from .login import login
|
26
26
|
from .ls import ls
|
27
27
|
from .new import new
|
28
|
+
from .pull import pull
|
28
29
|
from .run import run
|
29
30
|
from .stop import stop
|
30
31
|
|
@@ -46,6 +47,7 @@ app.command()(log)
|
|
46
47
|
app.command()(ls)
|
47
48
|
app.command()(stop)
|
48
49
|
app.command()(login)
|
50
|
+
app.command()(pull)
|
49
51
|
|
50
52
|
typer_click_object = get_command(app)
|
51
53
|
|
flwr/cli/new/new.py
CHANGED
@@ -201,7 +201,7 @@ def new(
|
|
201
201
|
}
|
202
202
|
|
203
203
|
# Challenge specific context
|
204
|
-
|
204
|
+
fraction_train = "0.2" if llm_challenge_str == "code" else "0.1"
|
205
205
|
if llm_challenge_str == "generalnlp":
|
206
206
|
challenge_name = "General NLP"
|
207
207
|
num_clients = "20"
|
@@ -220,7 +220,7 @@ def new(
|
|
220
220
|
dataset_name = "flwrlabs/code-alpaca-20k"
|
221
221
|
|
222
222
|
context["llm_challenge_str"] = llm_challenge_str
|
223
|
-
context["
|
223
|
+
context["fraction_train"] = fraction_train
|
224
224
|
context["challenge_name"] = challenge_name
|
225
225
|
context["num_clients"] = num_clients
|
226
226
|
context["dataset_name"] = dataset_name
|
@@ -26,7 +26,7 @@ pip install -e .
|
|
26
26
|
## Experimental setup
|
27
27
|
|
28
28
|
The dataset is divided into $num_clients partitions in an IID fashion, a partition is assigned to each ClientApp.
|
29
|
-
We randomly sample a fraction ($
|
29
|
+
We randomly sample a fraction ($fraction_train) of the total nodes to participate in each round, for a total of `200` rounds.
|
30
30
|
All settings are defined in `pyproject.toml`.
|
31
31
|
|
32
32
|
> [!IMPORTANT]
|
@@ -1,58 +1,75 @@
|
|
1
1
|
"""$project_name: A Flower Baseline."""
|
2
2
|
|
3
3
|
import torch
|
4
|
-
from flwr.
|
5
|
-
from flwr.
|
4
|
+
from flwr.app import ArrayRecord, Context, Message, MetricRecord, RecordDict
|
5
|
+
from flwr.clientapp import ClientApp
|
6
6
|
|
7
7
|
from $import_name.dataset import load_data
|
8
|
-
from $import_name.model import Net
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
self.net,
|
27
|
-
self.trainloader,
|
28
|
-
self.local_epochs,
|
29
|
-
self.device,
|
30
|
-
)
|
31
|
-
return (
|
32
|
-
get_weights(self.net),
|
33
|
-
len(self.trainloader.dataset),
|
34
|
-
{"train_loss": train_loss},
|
35
|
-
)
|
36
|
-
|
37
|
-
def evaluate(self, parameters, config):
|
38
|
-
"""Evaluate model using this client's data."""
|
39
|
-
set_weights(self.net, parameters)
|
40
|
-
loss, accuracy = test(self.net, self.valloader, self.device)
|
41
|
-
return loss, len(self.valloader.dataset), {"accuracy": accuracy}
|
42
|
-
|
43
|
-
|
44
|
-
def client_fn(context: Context):
|
45
|
-
"""Construct a Client that will be run in a ClientApp."""
|
46
|
-
# Load model and data
|
47
|
-
net = Net()
|
8
|
+
from $import_name.model import Net
|
9
|
+
from $import_name.model import test as test_fn
|
10
|
+
from $import_name.model import train as train_fn
|
11
|
+
|
12
|
+
# Flower ClientApp
|
13
|
+
app = ClientApp()
|
14
|
+
|
15
|
+
|
16
|
+
@app.train()
|
17
|
+
def train(msg: Message, context: Context):
|
18
|
+
"""Train the model on local data."""
|
19
|
+
|
20
|
+
# Load the model and initialize it with the received weights
|
21
|
+
model = Net()
|
22
|
+
model.load_state_dict(msg.content["arrays"].to_torch_state_dict())
|
23
|
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
24
|
+
|
25
|
+
# Load the data
|
48
26
|
partition_id = int(context.node_config["partition-id"])
|
49
27
|
num_partitions = int(context.node_config["num-partitions"])
|
50
|
-
trainloader,
|
28
|
+
trainloader, _ = load_data(partition_id, num_partitions)
|
51
29
|
local_epochs = context.run_config["local-epochs"]
|
52
30
|
|
53
|
-
#
|
54
|
-
|
31
|
+
# Call the training function
|
32
|
+
train_loss = train_fn(
|
33
|
+
model,
|
34
|
+
trainloader,
|
35
|
+
local_epochs,
|
36
|
+
device,
|
37
|
+
)
|
55
38
|
|
39
|
+
# Construct and return reply Message
|
40
|
+
model_record = ArrayRecord(model.state_dict())
|
41
|
+
metrics = {
|
42
|
+
"train_loss": train_loss,
|
43
|
+
"num-examples": len(trainloader.dataset),
|
44
|
+
}
|
45
|
+
metric_record = MetricRecord(metrics)
|
46
|
+
content = RecordDict({"arrays": model_record, "metrics": metric_record})
|
47
|
+
return Message(content=content, reply_to=msg)
|
56
48
|
|
57
|
-
|
58
|
-
app
|
49
|
+
|
50
|
+
@app.evaluate()
|
51
|
+
def evaluate(msg: Message, context: Context):
|
52
|
+
"""Evaluate the model on local data."""
|
53
|
+
|
54
|
+
# Load the model and initialize it with the received weights
|
55
|
+
model = Net()
|
56
|
+
model.load_state_dict(msg.content["arrays"].to_torch_state_dict())
|
57
|
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
58
|
+
|
59
|
+
# Load the data
|
60
|
+
partition_id = int(context.node_config["partition-id"])
|
61
|
+
num_partitions = int(context.node_config["num-partitions"])
|
62
|
+
_, valloader = load_data(partition_id, num_partitions)
|
63
|
+
|
64
|
+
# Call the evaluation function
|
65
|
+
eval_loss, eval_acc = test_fn(model, valloader, device)
|
66
|
+
|
67
|
+
# Construct and return reply Message
|
68
|
+
metrics = {
|
69
|
+
"eval_loss": eval_loss,
|
70
|
+
"eval_acc": eval_acc,
|
71
|
+
"num-examples": len(valloader.dataset),
|
72
|
+
}
|
73
|
+
metric_record = MetricRecord(metrics)
|
74
|
+
content = RecordDict({"metrics": metric_record})
|
75
|
+
return Message(content=content, reply_to=msg)
|
@@ -2,15 +2,12 @@
|
|
2
2
|
|
3
3
|
import os
|
4
4
|
import warnings
|
5
|
-
from typing import Dict, Tuple
|
6
5
|
|
7
|
-
import
|
8
|
-
from flwr.
|
9
|
-
from flwr.common import Context
|
6
|
+
from flwr.app import ArrayRecord, Context, Message, MetricRecord, RecordDict
|
7
|
+
from flwr.clientapp import ClientApp
|
10
8
|
from flwr.common.config import unflatten_dict
|
11
|
-
from flwr.common.typing import NDArrays, Scalar
|
12
9
|
from omegaconf import DictConfig
|
13
|
-
|
10
|
+
from peft import get_peft_model_state_dict, set_peft_model_state_dict
|
14
11
|
from transformers import TrainingArguments
|
15
12
|
from trl import SFTTrainer
|
16
13
|
|
@@ -19,12 +16,7 @@ from $import_name.dataset import (
|
|
19
16
|
load_data,
|
20
17
|
replace_keys,
|
21
18
|
)
|
22
|
-
from $import_name.models import
|
23
|
-
cosine_annealing,
|
24
|
-
get_model,
|
25
|
-
set_parameters,
|
26
|
-
get_parameters,
|
27
|
-
)
|
19
|
+
from $import_name.models import cosine_annealing, get_model
|
28
20
|
|
29
21
|
# Avoid warnings
|
30
22
|
os.environ["TOKENIZERS_PARALLELISM"] = "true"
|
@@ -32,95 +24,69 @@ os.environ["RAY_DISABLE_DOCKER_CPU_WARNING"] = "1"
|
|
32
24
|
warnings.filterwarnings("ignore", category=UserWarning)
|
33
25
|
|
34
26
|
|
35
|
-
#
|
36
|
-
|
37
|
-
|
38
|
-
|
27
|
+
# Avoid warnings
|
28
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "true"
|
29
|
+
os.environ["RAY_DISABLE_DOCKER_CPU_WARNING"] = "1"
|
30
|
+
warnings.filterwarnings("ignore", category=UserWarning)
|
39
31
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
): # pylint: disable=too-many-arguments
|
50
|
-
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
51
|
-
self.train_cfg = train_cfg
|
52
|
-
self.training_arguments = TrainingArguments(**train_cfg.training_arguments)
|
53
|
-
self.tokenizer = tokenizer
|
54
|
-
self.formatting_prompts_func = formatting_prompts_func
|
55
|
-
self.data_collator = data_collator
|
56
|
-
self.num_rounds = num_rounds
|
57
|
-
self.trainset = trainset
|
58
|
-
|
59
|
-
# instantiate model
|
60
|
-
self.model = get_model(model_cfg)
|
61
|
-
|
62
|
-
def fit(
|
63
|
-
self, parameters: NDArrays, config: Dict[str, Scalar]
|
64
|
-
) -> Tuple[NDArrays, int, Dict]:
|
65
|
-
"""Implement distributed fit function for a given client."""
|
66
|
-
set_parameters(self.model, parameters)
|
67
|
-
|
68
|
-
new_lr = cosine_annealing(
|
69
|
-
int(config["current_round"]),
|
70
|
-
self.num_rounds,
|
71
|
-
self.train_cfg.learning_rate_max,
|
72
|
-
self.train_cfg.learning_rate_min,
|
73
|
-
)
|
74
|
-
|
75
|
-
self.training_arguments.learning_rate = new_lr
|
76
|
-
self.training_arguments.output_dir = config["save_path"]
|
77
|
-
|
78
|
-
# Construct trainer
|
79
|
-
trainer = SFTTrainer(
|
80
|
-
model=self.model,
|
81
|
-
tokenizer=self.tokenizer,
|
82
|
-
args=self.training_arguments,
|
83
|
-
max_seq_length=self.train_cfg.seq_length,
|
84
|
-
train_dataset=self.trainset,
|
85
|
-
formatting_func=self.formatting_prompts_func,
|
86
|
-
data_collator=self.data_collator,
|
87
|
-
)
|
88
|
-
|
89
|
-
# Do local training
|
90
|
-
results = trainer.train()
|
91
|
-
|
92
|
-
return (
|
93
|
-
get_parameters(self.model),
|
94
|
-
len(self.trainset),
|
95
|
-
{"train_loss": results.training_loss},
|
96
|
-
)
|
97
|
-
|
98
|
-
|
99
|
-
def client_fn(context: Context) -> FlowerClient:
|
100
|
-
"""Create a Flower client representing a single organization."""
|
32
|
+
|
33
|
+
# Flower ClientApp
|
34
|
+
app = ClientApp()
|
35
|
+
|
36
|
+
|
37
|
+
@app.train()
|
38
|
+
def train(msg: Message, context: Context):
|
39
|
+
"""Train the model on local data."""
|
40
|
+
# Parse config
|
101
41
|
partition_id = context.node_config["partition-id"]
|
102
42
|
num_partitions = context.node_config["num-partitions"]
|
103
43
|
num_rounds = context.run_config["num-server-rounds"]
|
104
44
|
cfg = DictConfig(replace_keys(unflatten_dict(context.run_config)))
|
45
|
+
training_arguments = TrainingArguments(**cfg.train.training_arguments)
|
105
46
|
|
106
47
|
# Let's get the client partition
|
107
|
-
|
48
|
+
trainset = load_data(partition_id, num_partitions, cfg.static.dataset.name)
|
108
49
|
(
|
109
50
|
tokenizer,
|
110
51
|
data_collator,
|
111
52
|
formatting_prompts_func,
|
112
53
|
) = get_tokenizer_and_data_collator_and_propt_formatting(cfg.model.name)
|
113
54
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
client_trainset,
|
118
|
-
tokenizer,
|
119
|
-
formatting_prompts_func,
|
120
|
-
data_collator,
|
121
|
-
num_rounds,
|
122
|
-
).to_client()
|
123
|
-
|
55
|
+
# Load the model and initialize it with the received weights
|
56
|
+
model = get_model(cfg.model)
|
57
|
+
set_peft_model_state_dict(model, msg.content["arrays"].to_torch_state_dict())
|
124
58
|
|
125
|
-
#
|
126
|
-
|
59
|
+
# Set learning rate for current round
|
60
|
+
new_lr = cosine_annealing(
|
61
|
+
msg.content["config"]["server-round"],
|
62
|
+
num_rounds,
|
63
|
+
cfg.train.learning_rate_max,
|
64
|
+
cfg.train.learning_rate_min,
|
65
|
+
)
|
66
|
+
|
67
|
+
training_arguments.learning_rate = new_lr
|
68
|
+
training_arguments.output_dir = msg.content["config"]["save_path"]
|
69
|
+
|
70
|
+
# Construct trainer
|
71
|
+
trainer = SFTTrainer(
|
72
|
+
model=model,
|
73
|
+
tokenizer=tokenizer,
|
74
|
+
args=training_arguments,
|
75
|
+
max_seq_length=cfg.train.seq_length,
|
76
|
+
train_dataset=trainset,
|
77
|
+
formatting_func=formatting_prompts_func,
|
78
|
+
data_collator=data_collator,
|
79
|
+
)
|
80
|
+
|
81
|
+
# Do local training
|
82
|
+
results = trainer.train()
|
83
|
+
|
84
|
+
# Construct and return reply Message
|
85
|
+
model_record = ArrayRecord(get_peft_model_state_dict(model))
|
86
|
+
metrics = {
|
87
|
+
"train_loss": results.training_loss,
|
88
|
+
"num-examples": len(trainset),
|
89
|
+
}
|
90
|
+
metric_record = MetricRecord(metrics)
|
91
|
+
content = RecordDict({"arrays": model_record, "metrics": metric_record})
|
92
|
+
return Message(content=content, reply_to=msg)
|
@@ -4,18 +4,10 @@ import math
|
|
4
4
|
|
5
5
|
import torch
|
6
6
|
from omegaconf import DictConfig
|
7
|
-
from
|
8
|
-
from peft import (
|
9
|
-
LoraConfig,
|
10
|
-
get_peft_model,
|
11
|
-
get_peft_model_state_dict,
|
12
|
-
set_peft_model_state_dict,
|
13
|
-
)
|
7
|
+
from peft import LoraConfig, get_peft_model
|
14
8
|
from peft.utils import prepare_model_for_kbit_training
|
15
9
|
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
|
16
10
|
|
17
|
-
from flwr.common.typing import NDArrays
|
18
|
-
|
19
11
|
|
20
12
|
def cosine_annealing(
|
21
13
|
current_round: int,
|
@@ -62,17 +54,3 @@ def get_model(model_cfg: DictConfig):
|
|
62
54
|
model.config.use_cache = False
|
63
55
|
|
64
56
|
return get_peft_model(model, peft_config)
|
65
|
-
|
66
|
-
|
67
|
-
def set_parameters(model, parameters: NDArrays) -> None:
|
68
|
-
"""Change the parameters of the model using the given ones."""
|
69
|
-
peft_state_dict_keys = get_peft_model_state_dict(model).keys()
|
70
|
-
params_dict = zip(peft_state_dict_keys, parameters)
|
71
|
-
state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
|
72
|
-
set_peft_model_state_dict(model, state_dict)
|
73
|
-
|
74
|
-
|
75
|
-
def get_parameters(model) -> NDArrays:
|
76
|
-
"""Return the parameters of the current net."""
|
77
|
-
state_dict = get_peft_model_state_dict(model)
|
78
|
-
return [val.cpu().numpy() for _, val in state_dict.items()]
|
@@ -3,62 +3,23 @@
|
|
3
3
|
import os
|
4
4
|
from datetime import datetime
|
5
5
|
|
6
|
-
from flwr.
|
6
|
+
from flwr.app import ArrayRecord, ConfigRecord, Context, MetricRecord
|
7
7
|
from flwr.common.config import unflatten_dict
|
8
|
-
from flwr.
|
8
|
+
from flwr.serverapp import Grid, ServerApp
|
9
9
|
from omegaconf import DictConfig
|
10
|
+
from peft import get_peft_model_state_dict, set_peft_model_state_dict
|
10
11
|
|
11
|
-
from $import_name.models import get_model, get_parameters, set_parameters
|
12
12
|
from $import_name.dataset import replace_keys
|
13
|
+
from $import_name.models import get_model
|
13
14
|
from $import_name.strategy import FlowerTuneLlm
|
14
15
|
|
15
|
-
|
16
|
-
|
17
|
-
# Here we use it to save global model checkpoints
|
18
|
-
def get_evaluate_fn(model_cfg, save_every_round, total_round, save_path):
|
19
|
-
"""Return an evaluation function for saving global model."""
|
20
|
-
|
21
|
-
def evaluate(server_round: int, parameters, config):
|
22
|
-
# Save model
|
23
|
-
if server_round != 0 and (
|
24
|
-
server_round == total_round or server_round % save_every_round == 0
|
25
|
-
):
|
26
|
-
# Init model
|
27
|
-
model = get_model(model_cfg)
|
28
|
-
set_parameters(model, parameters)
|
29
|
-
|
30
|
-
model.save_pretrained(f"{save_path}/peft_{server_round}")
|
31
|
-
|
32
|
-
return 0.0, {}
|
33
|
-
|
34
|
-
return evaluate
|
35
|
-
|
36
|
-
|
37
|
-
def get_on_fit_config(save_path):
|
38
|
-
"""Return a function that will be used to construct the config that the
|
39
|
-
client's fit() method will receive."""
|
40
|
-
|
41
|
-
def fit_config_fn(server_round: int):
|
42
|
-
fit_config = {}
|
43
|
-
fit_config["current_round"] = server_round
|
44
|
-
fit_config["save_path"] = save_path
|
45
|
-
return fit_config
|
46
|
-
|
47
|
-
return fit_config_fn
|
16
|
+
# Create ServerApp
|
17
|
+
app = ServerApp()
|
48
18
|
|
49
19
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
losses = [num_examples * m["train_loss"] for num_examples, m in metrics]
|
54
|
-
examples = [num_examples for num_examples, _ in metrics]
|
55
|
-
|
56
|
-
# Aggregate and return custom metric (weighted average)
|
57
|
-
return {"train_loss": sum(losses) / sum(examples)}
|
58
|
-
|
59
|
-
|
60
|
-
def server_fn(context: Context):
|
61
|
-
"""Construct components that set the ServerApp behaviour."""
|
20
|
+
@app.main()
|
21
|
+
def main(grid: Grid, context: Context) -> None:
|
22
|
+
"""Main entry point for the ServerApp."""
|
62
23
|
# Create output directory given current timestamp
|
63
24
|
current_time = datetime.now()
|
64
25
|
folder_name = current_time.strftime("%Y-%m-%d_%H-%M-%S")
|
@@ -71,24 +32,42 @@ def server_fn(context: Context):
|
|
71
32
|
|
72
33
|
# Get initial model weights
|
73
34
|
init_model = get_model(cfg.model)
|
74
|
-
|
75
|
-
init_model_parameters = ndarrays_to_parameters(init_model_parameters)
|
35
|
+
arrays = ArrayRecord(get_peft_model_state_dict(init_model))
|
76
36
|
|
77
37
|
# Define strategy
|
78
38
|
strategy = FlowerTuneLlm(
|
79
|
-
|
39
|
+
fraction_train=cfg.strategy.fraction_train,
|
80
40
|
fraction_evaluate=cfg.strategy.fraction_evaluate,
|
81
|
-
|
82
|
-
|
83
|
-
|
41
|
+
)
|
42
|
+
|
43
|
+
# Start strategy, run FedAvg for `num_rounds`
|
44
|
+
strategy.start(
|
45
|
+
grid=grid,
|
46
|
+
initial_arrays=arrays,
|
47
|
+
train_config=ConfigRecord({"save_path": save_path}),
|
48
|
+
num_rounds=num_rounds,
|
84
49
|
evaluate_fn=get_evaluate_fn(
|
85
50
|
cfg.model, cfg.train.save_every_round, num_rounds, save_path
|
86
51
|
),
|
87
52
|
)
|
88
|
-
config = ServerConfig(num_rounds=num_rounds)
|
89
53
|
|
90
|
-
return ServerAppComponents(strategy=strategy, config=config)
|
91
54
|
|
55
|
+
# Get function that will be executed by the strategy
|
56
|
+
# Here we use it to save global model checkpoints
|
57
|
+
def get_evaluate_fn(model_cfg, save_every_round, total_round, save_path):
|
58
|
+
"""Return an evaluation function for saving global model."""
|
92
59
|
|
93
|
-
|
94
|
-
|
60
|
+
def evaluate(server_round: int, arrays: ArrayRecord) -> MetricRecord:
|
61
|
+
# Save model
|
62
|
+
if server_round != 0 and (
|
63
|
+
server_round == total_round or server_round % save_every_round == 0
|
64
|
+
):
|
65
|
+
# Init model
|
66
|
+
model = get_model(model_cfg)
|
67
|
+
set_peft_model_state_dict(model, arrays.to_torch_state_dict())
|
68
|
+
|
69
|
+
model.save_pretrained(f"{save_path}/peft_{server_round}")
|
70
|
+
|
71
|
+
return MetricRecord()
|
72
|
+
|
73
|
+
return evaluate
|
@@ -1,53 +1,48 @@
|
|
1
1
|
"""$project_name: A Flower / FlowerTune app."""
|
2
2
|
|
3
|
-
from
|
3
|
+
from collections.abc import Iterable
|
4
4
|
from logging import INFO, WARN
|
5
|
-
from typing import
|
5
|
+
from typing import Optional
|
6
6
|
|
7
|
-
from flwr.
|
8
|
-
from flwr.
|
9
|
-
from flwr.
|
10
|
-
from flwr.
|
7
|
+
from flwr.app import ArrayRecord, ConfigRecord, Message, MetricRecord
|
8
|
+
from flwr.common import log
|
9
|
+
from flwr.serverapp import Grid
|
10
|
+
from flwr.serverapp.strategy import FedAvg
|
11
11
|
|
12
12
|
|
13
13
|
class FlowerTuneLlm(FedAvg):
|
14
14
|
"""Customised FedAvg strategy implementation.
|
15
|
-
|
15
|
+
|
16
16
|
This class behaves just like FedAvg but also tracks the communication
|
17
|
-
costs associated with `
|
17
|
+
costs associated with `train` over FL rounds.
|
18
18
|
"""
|
19
19
|
def __init__(self, **kwargs):
|
20
20
|
super().__init__(**kwargs)
|
21
21
|
self.comm_tracker = CommunicationTracker()
|
22
22
|
|
23
|
-
def
|
24
|
-
|
25
|
-
):
|
23
|
+
def configure_train(
|
24
|
+
self, server_round: int, arrays: ArrayRecord, config: ConfigRecord, grid: Grid
|
25
|
+
) -> Iterable[Message]:
|
26
26
|
"""Configure the next round of training."""
|
27
|
-
|
28
|
-
|
29
|
-
#
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
# Test communication costs
|
43
|
-
fit_res_list = [fit_res for _, fit_res in results]
|
44
|
-
self.comm_tracker.track(fit_res_list)
|
45
|
-
|
46
|
-
parameters_aggregated, metrics_aggregated = super().aggregate_fit(
|
47
|
-
server_round, results, failures
|
48
|
-
)
|
27
|
+
messages = super().configure_train(server_round, arrays, config, grid)
|
28
|
+
|
29
|
+
# Track communication costs
|
30
|
+
self.comm_tracker.track(messages)
|
31
|
+
|
32
|
+
return messages
|
33
|
+
|
34
|
+
def aggregate_train(
|
35
|
+
self,
|
36
|
+
server_round: int,
|
37
|
+
replies: Iterable[Message],
|
38
|
+
) -> tuple[Optional[ArrayRecord], Optional[MetricRecord]]:
|
39
|
+
"""Aggregate ArrayRecords and MetricRecords in the received Messages."""
|
40
|
+
# Track communication costs
|
41
|
+
self.comm_tracker.track(replies)
|
49
42
|
|
50
|
-
|
43
|
+
arrays, metrics = super().aggregate_train(server_round, replies)
|
44
|
+
|
45
|
+
return arrays, metrics
|
51
46
|
|
52
47
|
|
53
48
|
class CommunicationTracker:
|
@@ -55,16 +50,16 @@ class CommunicationTracker:
|
|
55
50
|
def __init__(self):
|
56
51
|
self.curr_comm_cost = 0.0
|
57
52
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
53
|
+
def track(self, messages: Iterable[Message]):
|
54
|
+
comm_cost = (
|
55
|
+
sum(
|
56
|
+
record.count_bytes()
|
57
|
+
for msg in messages
|
58
|
+
if msg.has_content()
|
59
|
+
for record in msg.content.array_records.values()
|
60
|
+
)
|
61
|
+
/ 1024**2
|
62
|
+
)
|
68
63
|
|
69
64
|
self.curr_comm_cost += comm_cost
|
70
65
|
log(
|
@@ -1,7 +1,5 @@
|
|
1
1
|
"""$project_name: A Flower Baseline."""
|
2
2
|
|
3
|
-
from collections import OrderedDict
|
4
|
-
|
5
3
|
import torch
|
6
4
|
import torch.nn.functional as F
|
7
5
|
from torch import nn
|
@@ -66,15 +64,3 @@ def test(net, testloader, device):
|
|
66
64
|
accuracy = correct / len(testloader.dataset)
|
67
65
|
loss = loss / len(testloader)
|
68
66
|
return loss, accuracy
|
69
|
-
|
70
|
-
|
71
|
-
def get_weights(net):
|
72
|
-
"""Extract model parameters as numpy arrays from state_dict."""
|
73
|
-
return [val.cpu().numpy() for _, val in net.state_dict().items()]
|
74
|
-
|
75
|
-
|
76
|
-
def set_weights(net, parameters):
|
77
|
-
"""Apply parameters to an existing model."""
|
78
|
-
params_dict = zip(net.state_dict().keys(), parameters)
|
79
|
-
state_dict = OrderedDict({k: torch.from_numpy(v) for k, v in params_dict})
|
80
|
-
net.load_state_dict(state_dict, strict=True)
|