flwr-nightly 1.22.0.dev20250916__py3-none-any.whl → 1.22.0.dev20250918__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/cli/app.py +2 -0
- flwr/cli/new/new.py +4 -2
- flwr/cli/new/templates/app/README.flowertune.md.tpl +1 -1
- flwr/cli/new/templates/app/code/client.baseline.py.tpl +64 -47
- flwr/cli/new/templates/app/code/client.xgboost.py.tpl +110 -0
- flwr/cli/new/templates/app/code/flwr_tune/client_app.py.tpl +56 -90
- flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +1 -23
- flwr/cli/new/templates/app/code/flwr_tune/server_app.py.tpl +37 -58
- flwr/cli/new/templates/app/code/flwr_tune/strategy.py.tpl +39 -44
- flwr/cli/new/templates/app/code/model.baseline.py.tpl +0 -14
- flwr/cli/new/templates/app/code/server.baseline.py.tpl +27 -29
- flwr/cli/new/templates/app/code/server.xgboost.py.tpl +56 -0
- flwr/cli/new/templates/app/code/task.xgboost.py.tpl +67 -0
- flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +3 -3
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.xgboost.toml.tpl +61 -0
- flwr/cli/pull.py +100 -0
- flwr/cli/utils.py +17 -0
- flwr/common/constant.py +2 -0
- flwr/common/exit/exit_code.py +4 -0
- flwr/proto/control_pb2.py +7 -3
- flwr/proto/control_pb2.pyi +24 -0
- flwr/proto/control_pb2_grpc.py +34 -0
- flwr/proto/control_pb2_grpc.pyi +13 -0
- flwr/server/app.py +13 -0
- flwr/serverapp/strategy/__init__.py +4 -0
- flwr/serverapp/strategy/fedprox.py +174 -0
- flwr/serverapp/strategy/fedxgb_cyclic.py +220 -0
- flwr/simulation/app.py +1 -1
- flwr/simulation/run_simulation.py +25 -30
- flwr/supercore/cli/flower_superexec.py +26 -1
- flwr/supercore/constant.py +19 -0
- flwr/supercore/superexec/plugin/exec_plugin.py +11 -1
- flwr/supercore/superexec/run_superexec.py +16 -2
- flwr/superlink/artifact_provider/__init__.py +22 -0
- flwr/superlink/artifact_provider/artifact_provider.py +37 -0
- flwr/superlink/servicer/control/control_grpc.py +3 -0
- flwr/superlink/servicer/control/control_servicer.py +59 -2
- {flwr_nightly-1.22.0.dev20250916.dist-info → flwr_nightly-1.22.0.dev20250918.dist-info}/METADATA +1 -1
- {flwr_nightly-1.22.0.dev20250916.dist-info → flwr_nightly-1.22.0.dev20250918.dist-info}/RECORD +42 -33
- flwr/serverapp/strategy/strategy_utils_tests.py +0 -323
- {flwr_nightly-1.22.0.dev20250916.dist-info → flwr_nightly-1.22.0.dev20250918.dist-info}/WHEEL +0 -0
- {flwr_nightly-1.22.0.dev20250916.dist-info → flwr_nightly-1.22.0.dev20250918.dist-info}/entry_points.txt +0 -0
flwr/cli/app.py
CHANGED
@@ -25,6 +25,7 @@ from .log import log
|
|
25
25
|
from .login import login
|
26
26
|
from .ls import ls
|
27
27
|
from .new import new
|
28
|
+
from .pull import pull
|
28
29
|
from .run import run
|
29
30
|
from .stop import stop
|
30
31
|
|
@@ -46,6 +47,7 @@ app.command()(log)
|
|
46
47
|
app.command()(ls)
|
47
48
|
app.command()(stop)
|
48
49
|
app.command()(login)
|
50
|
+
app.command()(pull)
|
49
51
|
|
50
52
|
typer_click_object = get_command(app)
|
51
53
|
|
flwr/cli/new/new.py
CHANGED
@@ -41,6 +41,7 @@ class MlFramework(str, Enum):
|
|
41
41
|
JAX = "JAX"
|
42
42
|
MLX = "MLX"
|
43
43
|
NUMPY = "NumPy"
|
44
|
+
XGBOOST = "XGBoost"
|
44
45
|
FLOWERTUNE = "FlowerTune"
|
45
46
|
BASELINE = "Flower Baseline"
|
46
47
|
PYTORCH_LEGACY_API = "PyTorch (Legacy API, deprecated)"
|
@@ -201,7 +202,7 @@ def new(
|
|
201
202
|
}
|
202
203
|
|
203
204
|
# Challenge specific context
|
204
|
-
|
205
|
+
fraction_train = "0.2" if llm_challenge_str == "code" else "0.1"
|
205
206
|
if llm_challenge_str == "generalnlp":
|
206
207
|
challenge_name = "General NLP"
|
207
208
|
num_clients = "20"
|
@@ -220,7 +221,7 @@ def new(
|
|
220
221
|
dataset_name = "flwrlabs/code-alpaca-20k"
|
221
222
|
|
222
223
|
context["llm_challenge_str"] = llm_challenge_str
|
223
|
-
context["
|
224
|
+
context["fraction_train"] = fraction_train
|
224
225
|
context["challenge_name"] = challenge_name
|
225
226
|
context["num_clients"] = num_clients
|
226
227
|
context["dataset_name"] = dataset_name
|
@@ -247,6 +248,7 @@ def new(
|
|
247
248
|
MlFramework.TENSORFLOW.value,
|
248
249
|
MlFramework.SKLEARN.value,
|
249
250
|
MlFramework.NUMPY.value,
|
251
|
+
MlFramework.XGBOOST.value,
|
250
252
|
"pytorch_legacy_api",
|
251
253
|
]
|
252
254
|
if framework_str in frameworks_with_tasks:
|
@@ -26,7 +26,7 @@ pip install -e .
|
|
26
26
|
## Experimental setup
|
27
27
|
|
28
28
|
The dataset is divided into $num_clients partitions in an IID fashion, a partition is assigned to each ClientApp.
|
29
|
-
We randomly sample a fraction ($
|
29
|
+
We randomly sample a fraction ($fraction_train) of the total nodes to participate in each round, for a total of `200` rounds.
|
30
30
|
All settings are defined in `pyproject.toml`.
|
31
31
|
|
32
32
|
> [!IMPORTANT]
|
@@ -1,58 +1,75 @@
|
|
1
1
|
"""$project_name: A Flower Baseline."""
|
2
2
|
|
3
3
|
import torch
|
4
|
-
from flwr.
|
5
|
-
from flwr.
|
4
|
+
from flwr.app import ArrayRecord, Context, Message, MetricRecord, RecordDict
|
5
|
+
from flwr.clientapp import ClientApp
|
6
6
|
|
7
7
|
from $import_name.dataset import load_data
|
8
|
-
from $import_name.model import Net
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
self.net,
|
27
|
-
self.trainloader,
|
28
|
-
self.local_epochs,
|
29
|
-
self.device,
|
30
|
-
)
|
31
|
-
return (
|
32
|
-
get_weights(self.net),
|
33
|
-
len(self.trainloader.dataset),
|
34
|
-
{"train_loss": train_loss},
|
35
|
-
)
|
36
|
-
|
37
|
-
def evaluate(self, parameters, config):
|
38
|
-
"""Evaluate model using this client's data."""
|
39
|
-
set_weights(self.net, parameters)
|
40
|
-
loss, accuracy = test(self.net, self.valloader, self.device)
|
41
|
-
return loss, len(self.valloader.dataset), {"accuracy": accuracy}
|
42
|
-
|
43
|
-
|
44
|
-
def client_fn(context: Context):
|
45
|
-
"""Construct a Client that will be run in a ClientApp."""
|
46
|
-
# Load model and data
|
47
|
-
net = Net()
|
8
|
+
from $import_name.model import Net
|
9
|
+
from $import_name.model import test as test_fn
|
10
|
+
from $import_name.model import train as train_fn
|
11
|
+
|
12
|
+
# Flower ClientApp
|
13
|
+
app = ClientApp()
|
14
|
+
|
15
|
+
|
16
|
+
@app.train()
|
17
|
+
def train(msg: Message, context: Context):
|
18
|
+
"""Train the model on local data."""
|
19
|
+
|
20
|
+
# Load the model and initialize it with the received weights
|
21
|
+
model = Net()
|
22
|
+
model.load_state_dict(msg.content["arrays"].to_torch_state_dict())
|
23
|
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
24
|
+
|
25
|
+
# Load the data
|
48
26
|
partition_id = int(context.node_config["partition-id"])
|
49
27
|
num_partitions = int(context.node_config["num-partitions"])
|
50
|
-
trainloader,
|
28
|
+
trainloader, _ = load_data(partition_id, num_partitions)
|
51
29
|
local_epochs = context.run_config["local-epochs"]
|
52
30
|
|
53
|
-
#
|
54
|
-
|
31
|
+
# Call the training function
|
32
|
+
train_loss = train_fn(
|
33
|
+
model,
|
34
|
+
trainloader,
|
35
|
+
local_epochs,
|
36
|
+
device,
|
37
|
+
)
|
55
38
|
|
39
|
+
# Construct and return reply Message
|
40
|
+
model_record = ArrayRecord(model.state_dict())
|
41
|
+
metrics = {
|
42
|
+
"train_loss": train_loss,
|
43
|
+
"num-examples": len(trainloader.dataset),
|
44
|
+
}
|
45
|
+
metric_record = MetricRecord(metrics)
|
46
|
+
content = RecordDict({"arrays": model_record, "metrics": metric_record})
|
47
|
+
return Message(content=content, reply_to=msg)
|
56
48
|
|
57
|
-
|
58
|
-
app
|
49
|
+
|
50
|
+
@app.evaluate()
|
51
|
+
def evaluate(msg: Message, context: Context):
|
52
|
+
"""Evaluate the model on local data."""
|
53
|
+
|
54
|
+
# Load the model and initialize it with the received weights
|
55
|
+
model = Net()
|
56
|
+
model.load_state_dict(msg.content["arrays"].to_torch_state_dict())
|
57
|
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
58
|
+
|
59
|
+
# Load the data
|
60
|
+
partition_id = int(context.node_config["partition-id"])
|
61
|
+
num_partitions = int(context.node_config["num-partitions"])
|
62
|
+
_, valloader = load_data(partition_id, num_partitions)
|
63
|
+
|
64
|
+
# Call the evaluation function
|
65
|
+
eval_loss, eval_acc = test_fn(model, valloader, device)
|
66
|
+
|
67
|
+
# Construct and return reply Message
|
68
|
+
metrics = {
|
69
|
+
"eval_loss": eval_loss,
|
70
|
+
"eval_acc": eval_acc,
|
71
|
+
"num-examples": len(valloader.dataset),
|
72
|
+
}
|
73
|
+
metric_record = MetricRecord(metrics)
|
74
|
+
content = RecordDict({"metrics": metric_record})
|
75
|
+
return Message(content=content, reply_to=msg)
|
@@ -0,0 +1,110 @@
|
|
1
|
+
"""$project_name: A Flower / $framework_str app."""
|
2
|
+
|
3
|
+
import warnings
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
import xgboost as xgb
|
7
|
+
from flwr.app import ArrayRecord, Context, Message, MetricRecord, RecordDict
|
8
|
+
from flwr.clientapp import ClientApp
|
9
|
+
from flwr.common.config import unflatten_dict
|
10
|
+
|
11
|
+
from $import_name.task import load_data, replace_keys
|
12
|
+
|
13
|
+
warnings.filterwarnings("ignore", category=UserWarning)
|
14
|
+
|
15
|
+
|
16
|
+
# Flower ClientApp
|
17
|
+
app = ClientApp()
|
18
|
+
|
19
|
+
|
20
|
+
def _local_boost(bst_input, num_local_round, train_dmatrix):
|
21
|
+
# Update trees based on local training data.
|
22
|
+
for i in range(num_local_round):
|
23
|
+
bst_input.update(train_dmatrix, bst_input.num_boosted_rounds())
|
24
|
+
|
25
|
+
# Bagging: extract the last N=num_local_round trees for sever aggregation
|
26
|
+
bst = bst_input[
|
27
|
+
bst_input.num_boosted_rounds()
|
28
|
+
- num_local_round : bst_input.num_boosted_rounds()
|
29
|
+
]
|
30
|
+
return bst
|
31
|
+
|
32
|
+
|
33
|
+
@app.train()
|
34
|
+
def train(msg: Message, context: Context) -> Message:
|
35
|
+
# Load model and data
|
36
|
+
partition_id = context.node_config["partition-id"]
|
37
|
+
num_partitions = context.node_config["num-partitions"]
|
38
|
+
train_dmatrix, _, num_train, _ = load_data(partition_id, num_partitions)
|
39
|
+
|
40
|
+
# Read from run config
|
41
|
+
num_local_round = context.run_config["local-epochs"]
|
42
|
+
# Flatted config dict and replace "-" with "_"
|
43
|
+
cfg = replace_keys(unflatten_dict(context.run_config))
|
44
|
+
params = cfg["params"]
|
45
|
+
|
46
|
+
global_round = msg.content["config"]["server-round"]
|
47
|
+
if global_round == 1:
|
48
|
+
# First round local training
|
49
|
+
bst = xgb.train(
|
50
|
+
params,
|
51
|
+
train_dmatrix,
|
52
|
+
num_boost_round=num_local_round,
|
53
|
+
)
|
54
|
+
else:
|
55
|
+
bst = xgb.Booster(params=params)
|
56
|
+
global_model = bytearray(msg.content["arrays"]["0"].numpy().tobytes())
|
57
|
+
|
58
|
+
# Load global model into booster
|
59
|
+
bst.load_model(global_model)
|
60
|
+
|
61
|
+
# Local training
|
62
|
+
bst = _local_boost(bst, num_local_round, train_dmatrix)
|
63
|
+
|
64
|
+
# Save model
|
65
|
+
local_model = bst.save_raw("json")
|
66
|
+
model_np = np.frombuffer(local_model, dtype=np.uint8)
|
67
|
+
|
68
|
+
# Construct reply message
|
69
|
+
# Note: we store the model as the first item in a list into ArrayRecord,
|
70
|
+
# which can be accessed using index ["0"].
|
71
|
+
model_record = ArrayRecord([model_np])
|
72
|
+
metrics = {
|
73
|
+
"num-examples": num_train,
|
74
|
+
}
|
75
|
+
metric_record = MetricRecord(metrics)
|
76
|
+
content = RecordDict({"arrays": model_record, "metrics": metric_record})
|
77
|
+
return Message(content=content, reply_to=msg)
|
78
|
+
|
79
|
+
|
80
|
+
@app.evaluate()
|
81
|
+
def evaluate(msg: Message, context: Context) -> Message:
|
82
|
+
# Load model and data
|
83
|
+
partition_id = context.node_config["partition-id"]
|
84
|
+
num_partitions = context.node_config["num-partitions"]
|
85
|
+
_, valid_dmatrix, _, num_val = load_data(partition_id, num_partitions)
|
86
|
+
|
87
|
+
# Load config
|
88
|
+
cfg = replace_keys(unflatten_dict(context.run_config))
|
89
|
+
params = cfg["params"]
|
90
|
+
|
91
|
+
# Load global model
|
92
|
+
bst = xgb.Booster(params=params)
|
93
|
+
global_model = bytearray(msg.content["arrays"]["0"].numpy().tobytes())
|
94
|
+
bst.load_model(global_model)
|
95
|
+
|
96
|
+
# Run evaluation
|
97
|
+
eval_results = bst.eval_set(
|
98
|
+
evals=[(valid_dmatrix, "valid")],
|
99
|
+
iteration=bst.num_boosted_rounds() - 1,
|
100
|
+
)
|
101
|
+
auc = float(eval_results.split("\t")[1].split(":")[1])
|
102
|
+
|
103
|
+
# Construct and return reply Message
|
104
|
+
metrics = {
|
105
|
+
"auc": auc,
|
106
|
+
"num-examples": num_val,
|
107
|
+
}
|
108
|
+
metric_record = MetricRecord(metrics)
|
109
|
+
content = RecordDict({"metrics": metric_record})
|
110
|
+
return Message(content=content, reply_to=msg)
|
@@ -2,15 +2,12 @@
|
|
2
2
|
|
3
3
|
import os
|
4
4
|
import warnings
|
5
|
-
from typing import Dict, Tuple
|
6
5
|
|
7
|
-
import
|
8
|
-
from flwr.
|
9
|
-
from flwr.common import Context
|
6
|
+
from flwr.app import ArrayRecord, Context, Message, MetricRecord, RecordDict
|
7
|
+
from flwr.clientapp import ClientApp
|
10
8
|
from flwr.common.config import unflatten_dict
|
11
|
-
from flwr.common.typing import NDArrays, Scalar
|
12
9
|
from omegaconf import DictConfig
|
13
|
-
|
10
|
+
from peft import get_peft_model_state_dict, set_peft_model_state_dict
|
14
11
|
from transformers import TrainingArguments
|
15
12
|
from trl import SFTTrainer
|
16
13
|
|
@@ -19,12 +16,7 @@ from $import_name.dataset import (
|
|
19
16
|
load_data,
|
20
17
|
replace_keys,
|
21
18
|
)
|
22
|
-
from $import_name.models import
|
23
|
-
cosine_annealing,
|
24
|
-
get_model,
|
25
|
-
set_parameters,
|
26
|
-
get_parameters,
|
27
|
-
)
|
19
|
+
from $import_name.models import cosine_annealing, get_model
|
28
20
|
|
29
21
|
# Avoid warnings
|
30
22
|
os.environ["TOKENIZERS_PARALLELISM"] = "true"
|
@@ -32,95 +24,69 @@ os.environ["RAY_DISABLE_DOCKER_CPU_WARNING"] = "1"
|
|
32
24
|
warnings.filterwarnings("ignore", category=UserWarning)
|
33
25
|
|
34
26
|
|
35
|
-
#
|
36
|
-
|
37
|
-
|
38
|
-
|
27
|
+
# Avoid warnings
|
28
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "true"
|
29
|
+
os.environ["RAY_DISABLE_DOCKER_CPU_WARNING"] = "1"
|
30
|
+
warnings.filterwarnings("ignore", category=UserWarning)
|
39
31
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
): # pylint: disable=too-many-arguments
|
50
|
-
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
51
|
-
self.train_cfg = train_cfg
|
52
|
-
self.training_arguments = TrainingArguments(**train_cfg.training_arguments)
|
53
|
-
self.tokenizer = tokenizer
|
54
|
-
self.formatting_prompts_func = formatting_prompts_func
|
55
|
-
self.data_collator = data_collator
|
56
|
-
self.num_rounds = num_rounds
|
57
|
-
self.trainset = trainset
|
58
|
-
|
59
|
-
# instantiate model
|
60
|
-
self.model = get_model(model_cfg)
|
61
|
-
|
62
|
-
def fit(
|
63
|
-
self, parameters: NDArrays, config: Dict[str, Scalar]
|
64
|
-
) -> Tuple[NDArrays, int, Dict]:
|
65
|
-
"""Implement distributed fit function for a given client."""
|
66
|
-
set_parameters(self.model, parameters)
|
67
|
-
|
68
|
-
new_lr = cosine_annealing(
|
69
|
-
int(config["current_round"]),
|
70
|
-
self.num_rounds,
|
71
|
-
self.train_cfg.learning_rate_max,
|
72
|
-
self.train_cfg.learning_rate_min,
|
73
|
-
)
|
74
|
-
|
75
|
-
self.training_arguments.learning_rate = new_lr
|
76
|
-
self.training_arguments.output_dir = config["save_path"]
|
77
|
-
|
78
|
-
# Construct trainer
|
79
|
-
trainer = SFTTrainer(
|
80
|
-
model=self.model,
|
81
|
-
tokenizer=self.tokenizer,
|
82
|
-
args=self.training_arguments,
|
83
|
-
max_seq_length=self.train_cfg.seq_length,
|
84
|
-
train_dataset=self.trainset,
|
85
|
-
formatting_func=self.formatting_prompts_func,
|
86
|
-
data_collator=self.data_collator,
|
87
|
-
)
|
88
|
-
|
89
|
-
# Do local training
|
90
|
-
results = trainer.train()
|
91
|
-
|
92
|
-
return (
|
93
|
-
get_parameters(self.model),
|
94
|
-
len(self.trainset),
|
95
|
-
{"train_loss": results.training_loss},
|
96
|
-
)
|
97
|
-
|
98
|
-
|
99
|
-
def client_fn(context: Context) -> FlowerClient:
|
100
|
-
"""Create a Flower client representing a single organization."""
|
32
|
+
|
33
|
+
# Flower ClientApp
|
34
|
+
app = ClientApp()
|
35
|
+
|
36
|
+
|
37
|
+
@app.train()
|
38
|
+
def train(msg: Message, context: Context):
|
39
|
+
"""Train the model on local data."""
|
40
|
+
# Parse config
|
101
41
|
partition_id = context.node_config["partition-id"]
|
102
42
|
num_partitions = context.node_config["num-partitions"]
|
103
43
|
num_rounds = context.run_config["num-server-rounds"]
|
104
44
|
cfg = DictConfig(replace_keys(unflatten_dict(context.run_config)))
|
45
|
+
training_arguments = TrainingArguments(**cfg.train.training_arguments)
|
105
46
|
|
106
47
|
# Let's get the client partition
|
107
|
-
|
48
|
+
trainset = load_data(partition_id, num_partitions, cfg.static.dataset.name)
|
108
49
|
(
|
109
50
|
tokenizer,
|
110
51
|
data_collator,
|
111
52
|
formatting_prompts_func,
|
112
53
|
) = get_tokenizer_and_data_collator_and_propt_formatting(cfg.model.name)
|
113
54
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
client_trainset,
|
118
|
-
tokenizer,
|
119
|
-
formatting_prompts_func,
|
120
|
-
data_collator,
|
121
|
-
num_rounds,
|
122
|
-
).to_client()
|
123
|
-
|
55
|
+
# Load the model and initialize it with the received weights
|
56
|
+
model = get_model(cfg.model)
|
57
|
+
set_peft_model_state_dict(model, msg.content["arrays"].to_torch_state_dict())
|
124
58
|
|
125
|
-
#
|
126
|
-
|
59
|
+
# Set learning rate for current round
|
60
|
+
new_lr = cosine_annealing(
|
61
|
+
msg.content["config"]["server-round"],
|
62
|
+
num_rounds,
|
63
|
+
cfg.train.learning_rate_max,
|
64
|
+
cfg.train.learning_rate_min,
|
65
|
+
)
|
66
|
+
|
67
|
+
training_arguments.learning_rate = new_lr
|
68
|
+
training_arguments.output_dir = msg.content["config"]["save_path"]
|
69
|
+
|
70
|
+
# Construct trainer
|
71
|
+
trainer = SFTTrainer(
|
72
|
+
model=model,
|
73
|
+
tokenizer=tokenizer,
|
74
|
+
args=training_arguments,
|
75
|
+
max_seq_length=cfg.train.seq_length,
|
76
|
+
train_dataset=trainset,
|
77
|
+
formatting_func=formatting_prompts_func,
|
78
|
+
data_collator=data_collator,
|
79
|
+
)
|
80
|
+
|
81
|
+
# Do local training
|
82
|
+
results = trainer.train()
|
83
|
+
|
84
|
+
# Construct and return reply Message
|
85
|
+
model_record = ArrayRecord(get_peft_model_state_dict(model))
|
86
|
+
metrics = {
|
87
|
+
"train_loss": results.training_loss,
|
88
|
+
"num-examples": len(trainset),
|
89
|
+
}
|
90
|
+
metric_record = MetricRecord(metrics)
|
91
|
+
content = RecordDict({"arrays": model_record, "metrics": metric_record})
|
92
|
+
return Message(content=content, reply_to=msg)
|
@@ -4,18 +4,10 @@ import math
|
|
4
4
|
|
5
5
|
import torch
|
6
6
|
from omegaconf import DictConfig
|
7
|
-
from
|
8
|
-
from peft import (
|
9
|
-
LoraConfig,
|
10
|
-
get_peft_model,
|
11
|
-
get_peft_model_state_dict,
|
12
|
-
set_peft_model_state_dict,
|
13
|
-
)
|
7
|
+
from peft import LoraConfig, get_peft_model
|
14
8
|
from peft.utils import prepare_model_for_kbit_training
|
15
9
|
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
|
16
10
|
|
17
|
-
from flwr.common.typing import NDArrays
|
18
|
-
|
19
11
|
|
20
12
|
def cosine_annealing(
|
21
13
|
current_round: int,
|
@@ -62,17 +54,3 @@ def get_model(model_cfg: DictConfig):
|
|
62
54
|
model.config.use_cache = False
|
63
55
|
|
64
56
|
return get_peft_model(model, peft_config)
|
65
|
-
|
66
|
-
|
67
|
-
def set_parameters(model, parameters: NDArrays) -> None:
|
68
|
-
"""Change the parameters of the model using the given ones."""
|
69
|
-
peft_state_dict_keys = get_peft_model_state_dict(model).keys()
|
70
|
-
params_dict = zip(peft_state_dict_keys, parameters)
|
71
|
-
state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
|
72
|
-
set_peft_model_state_dict(model, state_dict)
|
73
|
-
|
74
|
-
|
75
|
-
def get_parameters(model) -> NDArrays:
|
76
|
-
"""Return the parameters of the current net."""
|
77
|
-
state_dict = get_peft_model_state_dict(model)
|
78
|
-
return [val.cpu().numpy() for _, val in state_dict.items()]
|
@@ -3,62 +3,23 @@
|
|
3
3
|
import os
|
4
4
|
from datetime import datetime
|
5
5
|
|
6
|
-
from flwr.
|
6
|
+
from flwr.app import ArrayRecord, ConfigRecord, Context, MetricRecord
|
7
7
|
from flwr.common.config import unflatten_dict
|
8
|
-
from flwr.
|
8
|
+
from flwr.serverapp import Grid, ServerApp
|
9
9
|
from omegaconf import DictConfig
|
10
|
+
from peft import get_peft_model_state_dict, set_peft_model_state_dict
|
10
11
|
|
11
|
-
from $import_name.models import get_model, get_parameters, set_parameters
|
12
12
|
from $import_name.dataset import replace_keys
|
13
|
+
from $import_name.models import get_model
|
13
14
|
from $import_name.strategy import FlowerTuneLlm
|
14
15
|
|
15
|
-
|
16
|
-
|
17
|
-
# Here we use it to save global model checkpoints
|
18
|
-
def get_evaluate_fn(model_cfg, save_every_round, total_round, save_path):
|
19
|
-
"""Return an evaluation function for saving global model."""
|
20
|
-
|
21
|
-
def evaluate(server_round: int, parameters, config):
|
22
|
-
# Save model
|
23
|
-
if server_round != 0 and (
|
24
|
-
server_round == total_round or server_round % save_every_round == 0
|
25
|
-
):
|
26
|
-
# Init model
|
27
|
-
model = get_model(model_cfg)
|
28
|
-
set_parameters(model, parameters)
|
29
|
-
|
30
|
-
model.save_pretrained(f"{save_path}/peft_{server_round}")
|
31
|
-
|
32
|
-
return 0.0, {}
|
33
|
-
|
34
|
-
return evaluate
|
35
|
-
|
36
|
-
|
37
|
-
def get_on_fit_config(save_path):
|
38
|
-
"""Return a function that will be used to construct the config that the
|
39
|
-
client's fit() method will receive."""
|
40
|
-
|
41
|
-
def fit_config_fn(server_round: int):
|
42
|
-
fit_config = {}
|
43
|
-
fit_config["current_round"] = server_round
|
44
|
-
fit_config["save_path"] = save_path
|
45
|
-
return fit_config
|
46
|
-
|
47
|
-
return fit_config_fn
|
16
|
+
# Create ServerApp
|
17
|
+
app = ServerApp()
|
48
18
|
|
49
19
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
losses = [num_examples * m["train_loss"] for num_examples, m in metrics]
|
54
|
-
examples = [num_examples for num_examples, _ in metrics]
|
55
|
-
|
56
|
-
# Aggregate and return custom metric (weighted average)
|
57
|
-
return {"train_loss": sum(losses) / sum(examples)}
|
58
|
-
|
59
|
-
|
60
|
-
def server_fn(context: Context):
|
61
|
-
"""Construct components that set the ServerApp behaviour."""
|
20
|
+
@app.main()
|
21
|
+
def main(grid: Grid, context: Context) -> None:
|
22
|
+
"""Main entry point for the ServerApp."""
|
62
23
|
# Create output directory given current timestamp
|
63
24
|
current_time = datetime.now()
|
64
25
|
folder_name = current_time.strftime("%Y-%m-%d_%H-%M-%S")
|
@@ -71,24 +32,42 @@ def server_fn(context: Context):
|
|
71
32
|
|
72
33
|
# Get initial model weights
|
73
34
|
init_model = get_model(cfg.model)
|
74
|
-
|
75
|
-
init_model_parameters = ndarrays_to_parameters(init_model_parameters)
|
35
|
+
arrays = ArrayRecord(get_peft_model_state_dict(init_model))
|
76
36
|
|
77
37
|
# Define strategy
|
78
38
|
strategy = FlowerTuneLlm(
|
79
|
-
|
39
|
+
fraction_train=cfg.strategy.fraction_train,
|
80
40
|
fraction_evaluate=cfg.strategy.fraction_evaluate,
|
81
|
-
|
82
|
-
|
83
|
-
|
41
|
+
)
|
42
|
+
|
43
|
+
# Start strategy, run FedAvg for `num_rounds`
|
44
|
+
strategy.start(
|
45
|
+
grid=grid,
|
46
|
+
initial_arrays=arrays,
|
47
|
+
train_config=ConfigRecord({"save_path": save_path}),
|
48
|
+
num_rounds=num_rounds,
|
84
49
|
evaluate_fn=get_evaluate_fn(
|
85
50
|
cfg.model, cfg.train.save_every_round, num_rounds, save_path
|
86
51
|
),
|
87
52
|
)
|
88
|
-
config = ServerConfig(num_rounds=num_rounds)
|
89
53
|
|
90
|
-
return ServerAppComponents(strategy=strategy, config=config)
|
91
54
|
|
55
|
+
# Get function that will be executed by the strategy
|
56
|
+
# Here we use it to save global model checkpoints
|
57
|
+
def get_evaluate_fn(model_cfg, save_every_round, total_round, save_path):
|
58
|
+
"""Return an evaluation function for saving global model."""
|
92
59
|
|
93
|
-
|
94
|
-
|
60
|
+
def evaluate(server_round: int, arrays: ArrayRecord) -> MetricRecord:
|
61
|
+
# Save model
|
62
|
+
if server_round != 0 and (
|
63
|
+
server_round == total_round or server_round % save_every_round == 0
|
64
|
+
):
|
65
|
+
# Init model
|
66
|
+
model = get_model(model_cfg)
|
67
|
+
set_peft_model_state_dict(model, arrays.to_torch_state_dict())
|
68
|
+
|
69
|
+
model.save_pretrained(f"{save_path}/peft_{server_round}")
|
70
|
+
|
71
|
+
return MetricRecord()
|
72
|
+
|
73
|
+
return evaluate
|