FedModelKit 0.6.1__tar.gz → 0.6.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/PKG-INFO +2 -2
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/pyproject.toml +2 -2
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/cli.py +3 -3
- fedmodelkit-0.6.6/src/FedModelKit/templates/client_app_template.py +106 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/ds_template.ipynb +3 -3
- fedmodelkit-0.6.6/src/FedModelKit/templates/server_app_template.py +144 -0
- fedmodelkit-0.6.6/src/FedModelKit/templates/task_template.py +373 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates.py +6 -6
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/uv.lock +1 -1
- fedmodelkit-0.6.1/src/FedModelKit/templates/client_app_template.py +0 -49
- fedmodelkit-0.6.1/src/FedModelKit/templates/server_app_template.py +0 -38
- fedmodelkit-0.6.1/src/FedModelKit/templates/task_template.py +0 -243
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/.gitignore +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/.python-version +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/LICENSE +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/README.md +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/examples/simulation-scikit-model/.gitignore +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/examples/simulation-scikit-model/AML_preprocessed_dataset.xlsx +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/examples/simulation-scikit-model/README.md +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/examples/simulation-scikit-model/simulation_example.ipynb +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/images/aggregator.png +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/images/federated_learning_model.png +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/images/local_learner.png +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/README.md +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/__init__.py +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/aggregator.py +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/default_create_functions.py +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/interface.py +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/local_learner.py +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/py.typed +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/src/utils.py +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/__init__template.py +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/extern_pyproject_template.toml +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/images/doSendModels.png +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/images/doWaitsForJobs.png +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/images/dsAggregateModels.png +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/images/dsDoneSubmittingJobs.png +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/images/dsSendsJobs.png +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/images/overview.png +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/main_template.py +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/pyproject_template.toml +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/readme_template.md +0 -0
- {fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/uv_template.lock +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: FedModelKit
|
|
3
|
-
Version: 0.6.
|
|
4
|
-
Summary: This package contains the core components and protocols for creating, managing, and registering federated learning models using MLflow. It provides utilities for defining local learners, aggregation strategies, and integrating them with MLflow for tracking and deployment.
|
|
3
|
+
Version: 0.6.6
|
|
4
|
+
Summary: LOW_LEVEL_API: This package contains the core components and protocols for creating, managing, and registering federated learning models using MLflow. It provides utilities for defining local learners, aggregation strategies, and integrating them with MLflow for tracking and deployment.
|
|
5
5
|
Author-email: ceresale <alessandro.ceresi@upm.es>
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
Requires-Python: >=3.13
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "FedModelKit"
|
|
3
|
-
version = "0.6.
|
|
4
|
-
description = "This package contains the core components and protocols for creating, managing, and registering federated learning models using MLflow. It provides utilities for defining local learners, aggregation strategies, and integrating them with MLflow for tracking and deployment."
|
|
3
|
+
version = "0.6.6"
|
|
4
|
+
description = "LOW_LEVEL_API: This package contains the core components and protocols for creating, managing, and registering federated learning models using MLflow. It provides utilities for defining local learners, aggregation strategies, and integrating them with MLflow for tracking and deployment."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
7
7
|
{ name = "ceresale", email = "alessandro.ceresi@upm.es" }
|
|
@@ -25,14 +25,14 @@ def create_structure(exp_name: str = "new_experiment") -> None:
|
|
|
25
25
|
f"{exp_name}": {
|
|
26
26
|
"__init__.py": get_init_template(),
|
|
27
27
|
"task.py": get_task_template(),
|
|
28
|
-
"server_app.py": get_server_template(),
|
|
29
|
-
"client_app.py": get_client_template(),
|
|
28
|
+
"server_app.py": get_server_template(exp_name),
|
|
29
|
+
"client_app.py": get_client_template(exp_name),
|
|
30
30
|
},
|
|
31
31
|
},
|
|
32
32
|
"pyproject.toml": get_extern_pyproject_template(exp_name),
|
|
33
33
|
"uv.lock": get_uv_template(),
|
|
34
34
|
"README.md": get_readme_template(),
|
|
35
|
-
"ds.ipynb": get_ds_template(),
|
|
35
|
+
"ds.ipynb": get_ds_template(exp_name),
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import pickle
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from flwr.client import ClientApp
|
|
6
|
+
from flwr.common import Message, Context
|
|
7
|
+
from flwr.common.record import RecordDict, MetricRecord, ConfigRecord
|
|
8
|
+
from sklearn.preprocessing import OneHotEncoder
|
|
9
|
+
import FedModelKit as msi
|
|
10
|
+
import FedModelKit as msi
|
|
11
|
+
|
|
12
|
+
from EXPERIMENT_NAME.task import Net # Type: ignore[import]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# Initialize the Flower ClientApp
|
|
16
|
+
app = ClientApp()
|
|
17
|
+
|
|
18
|
+
@app.query()
|
|
19
|
+
def query(msg: Message, ctx: Context) -> Message:
|
|
20
|
+
"""
|
|
21
|
+
Query function to be executed by the Flower client. This function handles the
|
|
22
|
+
initial configuration sent by the server.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
# Retrieve the configuration sent by the server
|
|
26
|
+
fancy_config = msg.content.config_records['fancy_config']
|
|
27
|
+
|
|
28
|
+
# Instantiate the federated model
|
|
29
|
+
federated_model = Net()
|
|
30
|
+
|
|
31
|
+
# Load the client split data using the load_data function
|
|
32
|
+
federated_model.prepare_data(fancy_config['client_id'], fancy_config['num_clients'])
|
|
33
|
+
|
|
34
|
+
# Store the local learner and the data split in the context
|
|
35
|
+
# To store in context other objects, you can use ctx.state.<object_name> = <object>
|
|
36
|
+
ctx.state.local_learner = federated_model
|
|
37
|
+
|
|
38
|
+
return Message(RecordDict(), reply_to=msg)
|
|
39
|
+
|
|
40
|
+
@app.train()
|
|
41
|
+
def train(msg: Message, ctx: Context):
|
|
42
|
+
"""
|
|
43
|
+
Train function to be executed by the Flower client.
|
|
44
|
+
This function handles the training of the local model using the data provided.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
# Retrieve the local learner and the client split from the context
|
|
48
|
+
local_learner = ctx.state.local_learner
|
|
49
|
+
|
|
50
|
+
# Retrieve configuration sent by the server - example
|
|
51
|
+
#fancy_config = msg.content.configs_records['fancy_config']
|
|
52
|
+
#local_epochs = fancy_config['local_epochs']
|
|
53
|
+
|
|
54
|
+
# Retrieve the model parameters sent by the server
|
|
55
|
+
fancy_parameters = msg.content.array_records['fancy_model']
|
|
56
|
+
local_learner.set_parameters(fancy_parameters)
|
|
57
|
+
|
|
58
|
+
# Perform local training and obtain training metrics
|
|
59
|
+
train_metrics = local_learner.train_round()
|
|
60
|
+
|
|
61
|
+
# Retrieve the trained model parameters
|
|
62
|
+
new_array_records = local_learner.get_parameters()
|
|
63
|
+
|
|
64
|
+
# Construct a reply message carrying updated model parameters and generated metrics
|
|
65
|
+
reply_content = RecordDict()
|
|
66
|
+
reply_content.array_records['fancy_model_returned'] = new_array_records
|
|
67
|
+
reply_content.metric_records['train_metrics'] = MetricRecord(train_metrics)
|
|
68
|
+
|
|
69
|
+
# Store the metrics and the local learner in the context for future reference
|
|
70
|
+
ctx.state.metric_records['prev'] = MetricRecord(train_metrics)
|
|
71
|
+
ctx.state.local_learner = local_learner
|
|
72
|
+
|
|
73
|
+
# Return the reply message to the server
|
|
74
|
+
return Message(reply_content, reply_to=msg)
|
|
75
|
+
|
|
76
|
+
@app.evaluate()
|
|
77
|
+
def eval(msg: Message, ctx: Context):
|
|
78
|
+
"""
|
|
79
|
+
Evaluate function to be executed by the Flower client.
|
|
80
|
+
This function handles the evaluation of the local model using the data provided.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
# Retrieve the local learner and the client split from the context
|
|
84
|
+
local_learner = ctx.state.local_learner
|
|
85
|
+
|
|
86
|
+
# Retrieve configuration sent by the server - example
|
|
87
|
+
#fancy_config = msg.content.configs_records['fancy_config']
|
|
88
|
+
#local_epochs = fancy_config['local_epochs']
|
|
89
|
+
|
|
90
|
+
# Retrieve the model parameters sent by the server
|
|
91
|
+
fancy_parameters = msg.content.array_records['fancy_model']
|
|
92
|
+
local_learner.set_parameters(fancy_parameters)
|
|
93
|
+
|
|
94
|
+
# Evaluate the model and obtain evaluation metrics
|
|
95
|
+
eval_metrics = local_learner.evaluate()
|
|
96
|
+
|
|
97
|
+
# Construct a reply message with evaluation metrics
|
|
98
|
+
reply_content = RecordDict()
|
|
99
|
+
reply_content.metric_records['eval_metrics'] = MetricRecord(eval_metrics)
|
|
100
|
+
|
|
101
|
+
# Store the metrics and the local learner in the context for future reference
|
|
102
|
+
ctx.state.metric_records['prev'] = MetricRecord(eval_metrics)
|
|
103
|
+
ctx.state.local_learner = local_learner
|
|
104
|
+
|
|
105
|
+
# Return the reply message to the server
|
|
106
|
+
return Message(reply_content, reply_to=msg)
|
|
@@ -131,7 +131,7 @@
|
|
|
131
131
|
"metadata": {},
|
|
132
132
|
"outputs": [],
|
|
133
133
|
"source": [
|
|
134
|
-
"SYFT_FLWR_PROJECT_PATH = Path(\"./
|
|
134
|
+
"SYFT_FLWR_PROJECT_PATH = Path(\"./EXPERIMENT_NAME\")\n",
|
|
135
135
|
"assert SYFT_FLWR_PROJECT_PATH.exists()"
|
|
136
136
|
]
|
|
137
137
|
},
|
|
@@ -184,7 +184,7 @@
|
|
|
184
184
|
"outputs": [],
|
|
185
185
|
"source": [
|
|
186
186
|
"# clean up\n",
|
|
187
|
-
"!rm -rf {SYFT_FLWR_PROJECT_PATH / \"
|
|
187
|
+
"!rm -rf {SYFT_FLWR_PROJECT_PATH / \"EXPERIMENT_NAME\" / \"__pycache__\"}\n",
|
|
188
188
|
"!rm -rf weights/"
|
|
189
189
|
]
|
|
190
190
|
},
|
|
@@ -229,7 +229,7 @@
|
|
|
229
229
|
"outputs": [],
|
|
230
230
|
"source": [
|
|
231
231
|
"# clean up before submitting jobs\n",
|
|
232
|
-
"!rm -rf {SYFT_FLWR_PROJECT_PATH / \"
|
|
232
|
+
"!rm -rf {SYFT_FLWR_PROJECT_PATH / \"EXPERIMENT_NAME\" / \"__pycache__\"}\n",
|
|
233
233
|
"!rm -rf {SYFT_FLWR_PROJECT_PATH / \"simulation_logs\"}\n",
|
|
234
234
|
"!rm -rf weights/"
|
|
235
235
|
]
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
|
|
2
|
+
# from tkinter import Grid
|
|
3
|
+
from typing import List
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
import flwr as fl
|
|
7
|
+
from flwr.common import (
|
|
8
|
+
Context,
|
|
9
|
+
NDArrays,
|
|
10
|
+
Message,
|
|
11
|
+
MessageType,
|
|
12
|
+
Metrics,
|
|
13
|
+
RecordDict,
|
|
14
|
+
ConfigRecord,
|
|
15
|
+
DEFAULT_TTL,
|
|
16
|
+
)
|
|
17
|
+
from flwr.server import Grid
|
|
18
|
+
|
|
19
|
+
from EXPERIMENT_NAME.task import Net, Strategy #type: ignore[import]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# Run via `flower-server-app server:app`
|
|
23
|
+
app = fl.server.ServerApp()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@app.main()
|
|
29
|
+
def main(grid: Grid, context: Context):
|
|
30
|
+
"""
|
|
31
|
+
Main function to run the federated learning server.
|
|
32
|
+
|
|
33
|
+
Structure:
|
|
34
|
+
- Send a query message to clients for creating the local learner and loading the data
|
|
35
|
+
- Start global epochs loop for training and evaluation
|
|
36
|
+
- Send training messages to clients
|
|
37
|
+
- Aggregate parameters received from clients
|
|
38
|
+
- Send evaluation messages to clients
|
|
39
|
+
- Aggregate evaluation metrics
|
|
40
|
+
"""
|
|
41
|
+
print("Starting test run")
|
|
42
|
+
|
|
43
|
+
# Get node IDs of connected clients
|
|
44
|
+
node_ids = grid.get_node_ids()
|
|
45
|
+
|
|
46
|
+
# Initialize the federated model
|
|
47
|
+
global_model = Net()
|
|
48
|
+
aggregation_strategy = Strategy()
|
|
49
|
+
|
|
50
|
+
# Send a query message to clients for creating the local learner and loading the data
|
|
51
|
+
messages = []
|
|
52
|
+
for idx, node_id in enumerate(node_ids):
|
|
53
|
+
# Create messages to send to clients
|
|
54
|
+
record_dict = RecordDict()
|
|
55
|
+
|
|
56
|
+
# Add a config with information to send the client for the query
|
|
57
|
+
record_dict.config_records["fancy_config"] = ConfigRecord({"num_clients": len(node_ids), "client_id": idx})
|
|
58
|
+
|
|
59
|
+
# Create a query message for each client
|
|
60
|
+
message = Message(
|
|
61
|
+
content=record_dict,
|
|
62
|
+
message_type=MessageType.QUERY,
|
|
63
|
+
dst_node_id=node_id,
|
|
64
|
+
group_id=str(1),
|
|
65
|
+
ttl=DEFAULT_TTL,
|
|
66
|
+
)
|
|
67
|
+
messages.append(message)
|
|
68
|
+
|
|
69
|
+
# Send training messages to clients
|
|
70
|
+
all_replies = list(grid.send_and_receive(messages))
|
|
71
|
+
print(f"Received {len(all_replies)} answers")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# Run federated training and evaluation for a fixed number of rounds
|
|
75
|
+
for server_round in range(3):
|
|
76
|
+
print(f"Commencing server train and evaluation round {server_round + 1}")
|
|
77
|
+
|
|
78
|
+
messages = []
|
|
79
|
+
for idx, node_id in enumerate(node_ids):
|
|
80
|
+
# Create messages to send to clients
|
|
81
|
+
record_dict = RecordDict()
|
|
82
|
+
|
|
83
|
+
# Add model parameters to record
|
|
84
|
+
record_dict.array_records["fancy_model"] = global_model.get_parameters()
|
|
85
|
+
# Add a config with information to send the client for training
|
|
86
|
+
record_dict.config_records["fancy_config"] = ConfigRecord({"local_epochs": 3})
|
|
87
|
+
|
|
88
|
+
# Create a training message for each client
|
|
89
|
+
message = Message(
|
|
90
|
+
content=record_dict,
|
|
91
|
+
message_type=MessageType.TRAIN,
|
|
92
|
+
dst_node_id=node_id,
|
|
93
|
+
group_id=str(server_round),
|
|
94
|
+
ttl=DEFAULT_TTL,
|
|
95
|
+
)
|
|
96
|
+
messages.append(message)
|
|
97
|
+
|
|
98
|
+
# Send training messages to clients
|
|
99
|
+
all_replies = list(grid.send_and_receive(messages))
|
|
100
|
+
print(f"Received {len(all_replies)} results")
|
|
101
|
+
|
|
102
|
+
# Print metrics received from clients
|
|
103
|
+
for reply in all_replies:
|
|
104
|
+
print(reply.content.metric_records)
|
|
105
|
+
|
|
106
|
+
# Aggregate parameters received from clients
|
|
107
|
+
array_records_list = [reply.content.array_records["fancy_model_returned"] for reply in all_replies]
|
|
108
|
+
new_array_records = aggregation_strategy.aggregate_parameters(array_records_list)
|
|
109
|
+
global_model.set_parameters(new_array_records)
|
|
110
|
+
|
|
111
|
+
# Evaluate the updated global model
|
|
112
|
+
messages = []
|
|
113
|
+
for idx, node_id in enumerate(node_ids):
|
|
114
|
+
# Create evaluation messages for clients
|
|
115
|
+
record_dict = RecordDict()
|
|
116
|
+
|
|
117
|
+
# Add updated model parameters to record
|
|
118
|
+
record_dict.array_records["fancy_model"] = new_array_records
|
|
119
|
+
# Add a config with information to send the client for evaluation
|
|
120
|
+
record_dict.config_records["fancy_config"] = ConfigRecord({"local_epochs": 3})
|
|
121
|
+
|
|
122
|
+
# Create an evaluation message for each client
|
|
123
|
+
message = Message(
|
|
124
|
+
content=record_dict,
|
|
125
|
+
message_type=MessageType.EVALUATE,
|
|
126
|
+
dst_node_id=node_id,
|
|
127
|
+
group_id=str(server_round),
|
|
128
|
+
ttl=DEFAULT_TTL,
|
|
129
|
+
)
|
|
130
|
+
messages.append(message)
|
|
131
|
+
|
|
132
|
+
# Send evaluation messages to clients
|
|
133
|
+
all_replies = list(grid.send_and_receive(messages))
|
|
134
|
+
print(f"Received {len(all_replies)} results")
|
|
135
|
+
|
|
136
|
+
# Print evaluation metrics received from clients
|
|
137
|
+
metrics_records_list = [reply.content.metric_records['eval_metrics'] for reply in all_replies]
|
|
138
|
+
for i, reply in enumerate(all_replies):
|
|
139
|
+
print(f"Client {i+1} metrics: ", reply.content.metric_records['eval_metrics'])
|
|
140
|
+
|
|
141
|
+
# Aggregate evaluation metrics
|
|
142
|
+
print("Aggregated metrics result: ", aggregation_strategy.aggregate_metrics(metrics_records_list))
|
|
143
|
+
|
|
144
|
+
print("🎉🎉🎉 Successfully completed federated learning run! 🎉🎉🎉")
|
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
from collections import OrderedDict
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
import torch.nn as nn
|
|
5
|
+
import torch.optim as optim
|
|
6
|
+
import flwr
|
|
7
|
+
import numpy as np
|
|
8
|
+
from typing import Optional
|
|
9
|
+
from flwr_datasets import FederatedDataset
|
|
10
|
+
from flwr_datasets.partitioner import IidPartitioner
|
|
11
|
+
from imblearn.over_sampling import SMOTE
|
|
12
|
+
from loguru import logger
|
|
13
|
+
from pandas import DataFrame
|
|
14
|
+
from sklearn.model_selection import train_test_split
|
|
15
|
+
from sklearn.preprocessing import StandardScaler
|
|
16
|
+
from torch.utils.data import DataLoader, TensorDataset
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_device():
|
|
20
|
+
if torch.cuda.is_available():
|
|
21
|
+
return torch.device("cuda")
|
|
22
|
+
elif torch.backends.mps.is_available():
|
|
23
|
+
return torch.device("mps")
|
|
24
|
+
elif torch.xpu.is_available():
|
|
25
|
+
return torch.device("xpu")
|
|
26
|
+
else:
|
|
27
|
+
return torch.device("cpu")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
DEVICE = get_device()
|
|
31
|
+
logger.info(f"Using device: {DEVICE}")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Net(nn.Module):
|
|
35
|
+
def __init__(self, input_dim=6):
|
|
36
|
+
super(Net, self).__init__()
|
|
37
|
+
self.trainloader = None
|
|
38
|
+
self.testloader = None
|
|
39
|
+
self.fds = None # Cache FederatedDataset
|
|
40
|
+
# First layer with more units and batch normalization
|
|
41
|
+
self.layer1 = nn.Sequential(
|
|
42
|
+
nn.Linear(input_dim, 32), # Increased from 20 to 32
|
|
43
|
+
nn.BatchNorm1d(32), # Added batch normalization
|
|
44
|
+
nn.LeakyReLU(0.1), # LeakyReLU instead of ReLU
|
|
45
|
+
nn.Dropout(0.2), # Increased dropout
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Second layer with more units
|
|
49
|
+
self.layer2 = nn.Sequential(
|
|
50
|
+
nn.Linear(32, 24), # Increased from 14 to 24
|
|
51
|
+
nn.BatchNorm1d(24), # Added batch normalization
|
|
52
|
+
nn.LeakyReLU(0.1),
|
|
53
|
+
nn.Dropout(0.25),
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Third layer
|
|
57
|
+
self.layer3 = nn.Sequential(
|
|
58
|
+
nn.Linear(24, 16), nn.BatchNorm1d(16), nn.LeakyReLU(0.1)
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Output layer
|
|
62
|
+
self.output_layer = nn.Sequential(nn.Linear(16, 1), nn.Sigmoid())
|
|
63
|
+
|
|
64
|
+
def forward(self, x):
|
|
65
|
+
x = self.layer1(x)
|
|
66
|
+
x = self.layer2(x)
|
|
67
|
+
x = self.layer3(x)
|
|
68
|
+
x = self.output_layer(x)
|
|
69
|
+
return x
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def dataset_processing(
|
|
75
|
+
self, train_df: DataFrame, test_df: DataFrame
|
|
76
|
+
) -> tuple[DataLoader, DataLoader]:
|
|
77
|
+
def preprocess_df(df: DataFrame) -> DataFrame:
|
|
78
|
+
columns_to_drop = ["SkinThickness", "Insulin"]
|
|
79
|
+
df_new: DataFrame = df.drop(columns_to_drop, axis=1)
|
|
80
|
+
|
|
81
|
+
# Calculate mean and median (excluding zeros)
|
|
82
|
+
mean_glucose = df_new[df_new["Glucose"] != 0]["Glucose"].mean()
|
|
83
|
+
median_bmi = df_new[df_new["BMI"] != 0]["BMI"].median()
|
|
84
|
+
median_bp = df_new[df_new["BloodPressure"] != 0]["BloodPressure"].median()
|
|
85
|
+
|
|
86
|
+
# Replace zeros values with mean/median
|
|
87
|
+
df_new.replace(
|
|
88
|
+
{
|
|
89
|
+
"Glucose": {0: mean_glucose},
|
|
90
|
+
"BMI": {0: median_bmi},
|
|
91
|
+
"BloodPressure": {0: median_bp},
|
|
92
|
+
},
|
|
93
|
+
inplace=True,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return df_new
|
|
97
|
+
|
|
98
|
+
# Preprocess both datasets
|
|
99
|
+
train_processed = preprocess_df(train_df)
|
|
100
|
+
test_processed = preprocess_df(test_df)
|
|
101
|
+
|
|
102
|
+
# Split features and labels for both sets
|
|
103
|
+
X_train = train_processed.values[:, :6]
|
|
104
|
+
y_train = train_processed.values[:, 6:]
|
|
105
|
+
X_test = test_processed.values[:, :6]
|
|
106
|
+
y_test = test_processed.values[:, 6:]
|
|
107
|
+
|
|
108
|
+
from collections import Counter
|
|
109
|
+
|
|
110
|
+
def get_minority_class_count(y):
|
|
111
|
+
return min(Counter(y.flatten()).values())
|
|
112
|
+
|
|
113
|
+
minority_count = get_minority_class_count(y_train)
|
|
114
|
+
k_neighbors = min(5, minority_count - 1) if minority_count > 1 else 1
|
|
115
|
+
|
|
116
|
+
# Resample the training data to fix the class imbalance
|
|
117
|
+
smote = SMOTE(random_state=42, k_neighbors=k_neighbors)
|
|
118
|
+
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
|
|
119
|
+
|
|
120
|
+
# Scale the data to have zero mean and unit variance
|
|
121
|
+
scaler = StandardScaler()
|
|
122
|
+
X_train_resampled = scaler.fit_transform(X_train_resampled)
|
|
123
|
+
X_test = scaler.transform(X_test)
|
|
124
|
+
|
|
125
|
+
# Convert numpy arrays to PyTorch tensors
|
|
126
|
+
X_train_tensor = torch.FloatTensor(X_train_resampled)
|
|
127
|
+
y_train_tensor = torch.FloatTensor(y_train_resampled).reshape(
|
|
128
|
+
-1, 1
|
|
129
|
+
) # Add this reshape
|
|
130
|
+
X_test_tensor = torch.FloatTensor(X_test)
|
|
131
|
+
y_test_tensor = torch.FloatTensor(y_test).reshape(-1, 1)
|
|
132
|
+
|
|
133
|
+
# Create datasets and dataloaders
|
|
134
|
+
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
|
|
135
|
+
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
|
|
136
|
+
|
|
137
|
+
train_loader = DataLoader(dataset=train_dataset, batch_size=10, shuffle=True)
|
|
138
|
+
test_loader = DataLoader(
|
|
139
|
+
dataset=test_dataset, batch_size=len(test_dataset), shuffle=False
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
return train_loader, test_loader
|
|
143
|
+
|
|
144
|
+
def _parameters_to_dict(self, params_record: flwr.common.ParametersRecord) -> OrderedDict:
|
|
145
|
+
# Convert ParametersRecord to an OrderedDict
|
|
146
|
+
state_dict = OrderedDict()
|
|
147
|
+
for k, v in params_record.items():
|
|
148
|
+
state_dict[k] = self._basic_array_deserialisation(v)
|
|
149
|
+
return state_dict
|
|
150
|
+
|
|
151
|
+
def _dict_to_parameter_record(self,
|
|
152
|
+
parameters: OrderedDict["str", flwr.common.NDArray],
|
|
153
|
+
) -> flwr.common.ParametersRecord:
|
|
154
|
+
# Convert OrderedDict to ParametersRecord
|
|
155
|
+
state_dict = OrderedDict()
|
|
156
|
+
for k, v in parameters.items():
|
|
157
|
+
state_dict[k] = self._ndarray_to_array(v)
|
|
158
|
+
|
|
159
|
+
return flwr.common.ParametersRecord(state_dict)
|
|
160
|
+
|
|
161
|
+
def _ndarray_to_array(self, ndarray: flwr.common.NDArray) -> flwr.common.Array:
|
|
162
|
+
"""Represent NumPy ndarray as Array."""
|
|
163
|
+
return flwr.common.Array(
|
|
164
|
+
data=ndarray.tobytes(),
|
|
165
|
+
dtype=str(ndarray.dtype),
|
|
166
|
+
stype="numpy.ndarray.tobytes",
|
|
167
|
+
shape=list(ndarray.shape),
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
def _basic_array_deserialisation(self, array: flwr.common.Array) -> flwr.common.NDArray:
|
|
171
|
+
# Deserialize Array to NumPy ndarray
|
|
172
|
+
return np.frombuffer(buffer=array.data, dtype=array.dtype).reshape(array.shape)
|
|
173
|
+
|
|
174
|
+
def load_syftbox_dataset(self) -> None:
|
|
175
|
+
import pandas as pd
|
|
176
|
+
|
|
177
|
+
from syft_flwr.utils import get_syftbox_dataset_path
|
|
178
|
+
|
|
179
|
+
data_dir = get_syftbox_dataset_path()
|
|
180
|
+
logger.info(f"Loading dataset from {data_dir}")
|
|
181
|
+
|
|
182
|
+
train_df = pd.read_csv(data_dir / "train.csv")
|
|
183
|
+
test_df = pd.read_csv(data_dir / "test.csv")
|
|
184
|
+
|
|
185
|
+
self.trainloader, self.testloader = self.dataset_processing(train_df, test_df)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def load_flwr_data(
|
|
189
|
+
self, partition_id: int, num_partitions: int
|
|
190
|
+
) -> None:
|
|
191
|
+
"""
|
|
192
|
+
Load the `fl-diabetes-prediction` dataset to memory
|
|
193
|
+
"""
|
|
194
|
+
# global fds
|
|
195
|
+
if self.fds is None:
|
|
196
|
+
partitioner = IidPartitioner(num_partitions=num_partitions)
|
|
197
|
+
self.fds = FederatedDataset(
|
|
198
|
+
dataset="khoaguin/pima-indians-diabetes-database",
|
|
199
|
+
partitioners={"train": partitioner},
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
partition: DataFrame = self.fds.load_partition(partition_id, "train").with_format(
|
|
203
|
+
"pandas"
|
|
204
|
+
)[:]
|
|
205
|
+
train_df, test_df = train_test_split(partition, test_size=0.2, random_state=95)
|
|
206
|
+
|
|
207
|
+
self.trainloader, self.testloader = self.dataset_processing(train_df, test_df)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def prepare_data(
|
|
211
|
+
self, partition_id: int, num_partitions: int
|
|
212
|
+
) -> None:
|
|
213
|
+
from syft_flwr.utils import run_syft_flwr
|
|
214
|
+
if not run_syft_flwr():
|
|
215
|
+
logger.info("Running flwr locally")
|
|
216
|
+
self.load_flwr_data(
|
|
217
|
+
partition_id=partition_id,
|
|
218
|
+
num_partitions=num_partitions,
|
|
219
|
+
)
|
|
220
|
+
else:
|
|
221
|
+
logger.info("Running with syft_flwr")
|
|
222
|
+
self.load_syftbox_dataset()
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def train_round(self, local_epochs=1):
|
|
226
|
+
criterion = nn.BCELoss()
|
|
227
|
+
optimizer = optim.Adam(self.parameters(), lr=0.001, weight_decay=0.0005)
|
|
228
|
+
history = {"train_loss": [], "train_acc": []}
|
|
229
|
+
self.to(DEVICE)
|
|
230
|
+
|
|
231
|
+
for epoch in range(local_epochs):
|
|
232
|
+
self.train()
|
|
233
|
+
running_loss = 0.0
|
|
234
|
+
correct = 0
|
|
235
|
+
total = 0
|
|
236
|
+
|
|
237
|
+
for inputs, labels in self.trainloader:
|
|
238
|
+
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
|
|
239
|
+
|
|
240
|
+
optimizer.zero_grad()
|
|
241
|
+
outputs = self(inputs)
|
|
242
|
+
loss = criterion(outputs, labels)
|
|
243
|
+
loss.backward()
|
|
244
|
+
optimizer.step()
|
|
245
|
+
|
|
246
|
+
running_loss += loss.item() * inputs.size(0)
|
|
247
|
+
predicted = (outputs > 0.5).float()
|
|
248
|
+
total += labels.size(0)
|
|
249
|
+
correct += (predicted == labels).sum().item()
|
|
250
|
+
|
|
251
|
+
epoch_loss = running_loss / len(self.trainloader.dataset)
|
|
252
|
+
epoch_acc = correct / total
|
|
253
|
+
history["train_loss"].append(epoch_loss)
|
|
254
|
+
history["train_acc"].append(epoch_acc)
|
|
255
|
+
|
|
256
|
+
return history
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def evaluate(self):
|
|
260
|
+
self.to(DEVICE)
|
|
261
|
+
self.eval()
|
|
262
|
+
criterion = nn.BCELoss()
|
|
263
|
+
|
|
264
|
+
running_loss = 0.0
|
|
265
|
+
correct = 0
|
|
266
|
+
total = 0
|
|
267
|
+
|
|
268
|
+
with torch.no_grad():
|
|
269
|
+
for inputs, labels in self.testloader:
|
|
270
|
+
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
|
|
271
|
+
outputs = self(inputs)
|
|
272
|
+
loss = criterion(outputs, labels)
|
|
273
|
+
running_loss += loss.item() * inputs.size(0)
|
|
274
|
+
predicted = (outputs > 0.5).float()
|
|
275
|
+
total += labels.size(0)
|
|
276
|
+
correct += (predicted == labels).sum().item()
|
|
277
|
+
|
|
278
|
+
epoch_loss = running_loss / len(self.testloader.dataset)
|
|
279
|
+
epoch_acc = correct / total
|
|
280
|
+
|
|
281
|
+
return {"epoch_loss": epoch_loss, "epoch_accuracy": epoch_acc}
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def get_parameters(self) -> flwr.common.ArrayRecord:
|
|
285
|
+
return self.pytorch_to_parameter_record(self.state_dict())
|
|
286
|
+
|
|
287
|
+
def set_parameters(self, parameters: flwr.common.ArrayRecord) -> None:
|
|
288
|
+
self.load_state_dict(self.parameters_to_pytorch_state_dict(parameters))
|
|
289
|
+
|
|
290
|
+
def pytorch_to_parameter_record(
|
|
291
|
+
self, state_dict: dict,
|
|
292
|
+
) -> flwr.common.ArrayRecord:
|
|
293
|
+
"""Serialise your PyTorch model."""
|
|
294
|
+
transformed_state_dict = OrderedDict()
|
|
295
|
+
|
|
296
|
+
for k, v in state_dict.items():
|
|
297
|
+
transformed_state_dict[k] = self._ndarray_to_array(v.cpu().numpy())
|
|
298
|
+
|
|
299
|
+
return flwr.common.ArrayRecord(transformed_state_dict)
|
|
300
|
+
|
|
301
|
+
def parameters_to_pytorch_state_dict(
|
|
302
|
+
self, params_record: flwr.common.ArrayRecord,
|
|
303
|
+
) -> dict:
|
|
304
|
+
# Make sure to import locally torch as it is only available in the server
|
|
305
|
+
import torch
|
|
306
|
+
|
|
307
|
+
"""Reconstruct PyTorch state_dict from its serialised representation."""
|
|
308
|
+
state_dict = {}
|
|
309
|
+
for k, v in params_record.items():
|
|
310
|
+
state_dict[k] = torch.tensor(self._basic_array_deserialisation(v))
|
|
311
|
+
|
|
312
|
+
return state_dict
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
class Strategy:
|
|
316
|
+
|
|
317
|
+
def _parameters_to_dict(self, params_record: flwr.common.ArrayRecord) -> OrderedDict:
|
|
318
|
+
# Convert ParametersRecord to an OrderedDict
|
|
319
|
+
state_dict = OrderedDict()
|
|
320
|
+
for k, v in params_record.items():
|
|
321
|
+
state_dict[k] = self._basic_array_deserialisation(v)
|
|
322
|
+
return state_dict
|
|
323
|
+
|
|
324
|
+
def _dict_to_parameter_record(self,
|
|
325
|
+
parameters: OrderedDict["str", flwr.common.NDArray],
|
|
326
|
+
) -> flwr.common.ArrayRecord:
|
|
327
|
+
# Convert OrderedDict to ParametersRecord
|
|
328
|
+
state_dict = OrderedDict()
|
|
329
|
+
for k, v in parameters.items():
|
|
330
|
+
state_dict[k] = self._ndarray_to_array(v)
|
|
331
|
+
|
|
332
|
+
return flwr.common.ArrayRecord(state_dict)
|
|
333
|
+
|
|
334
|
+
def _ndarray_to_array(self, ndarray: flwr.common.NDArray) -> flwr.common.Array:
|
|
335
|
+
"""Represent NumPy ndarray as Array."""
|
|
336
|
+
return flwr.common.Array(
|
|
337
|
+
data=ndarray.tobytes(),
|
|
338
|
+
dtype=str(ndarray.dtype),
|
|
339
|
+
stype="numpy.ndarray.tobytes",
|
|
340
|
+
shape=list(ndarray.shape),
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
def _basic_array_deserialisation(self, array: flwr.common.Array) -> flwr.common.NDArray:
|
|
344
|
+
# Deserialize Array to NumPy ndarray
|
|
345
|
+
return np.frombuffer(buffer=array.data, dtype=array.dtype).reshape(array.shape)
|
|
346
|
+
|
|
347
|
+
def aggregate_parameters(self, results: list[flwr.common.ArrayRecord], config: Optional[flwr.common.ConfigRecord]=None
|
|
348
|
+
) -> flwr.common.ArrayRecord:
|
|
349
|
+
parameters = [self._parameters_to_dict(param) for param in results]
|
|
350
|
+
keys = parameters[0].keys()
|
|
351
|
+
result = OrderedDict()
|
|
352
|
+
for key in keys:
|
|
353
|
+
# Init array
|
|
354
|
+
this_array: np.ndarray = np.zeros_like(parameters[0][key])
|
|
355
|
+
for p in parameters:
|
|
356
|
+
this_array += p[key]
|
|
357
|
+
result[key] = this_array / len(results)
|
|
358
|
+
return self._dict_to_parameter_record(result)
|
|
359
|
+
|
|
360
|
+
def aggregate_metrics(self, results: list[flwr.common.MetricRecord], config: Optional[flwr.common.ConfigRecord]=None) -> flwr.common.MetricRecord:
|
|
361
|
+
keys = results[0].keys()
|
|
362
|
+
result = OrderedDict()
|
|
363
|
+
for key in keys:
|
|
364
|
+
# Init array
|
|
365
|
+
cumsum = 0.0
|
|
366
|
+
for m in results:
|
|
367
|
+
if not isinstance(m[key], (int, float)):
|
|
368
|
+
raise ValueError(
|
|
369
|
+
f"flwr.common.MetricsRecord value type not supported: {type(m[key])}"
|
|
370
|
+
)
|
|
371
|
+
cumsum += m[key] # type: ignore
|
|
372
|
+
result[key] = cumsum / len(results)
|
|
373
|
+
return flwr.common.MetricRecord(result)
|
|
@@ -24,17 +24,17 @@ def get_readme_template() -> str:
|
|
|
24
24
|
return file.read()
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def get_server_template() -> str:
|
|
27
|
+
def get_server_template(experiment_name: str) -> str:
|
|
28
28
|
current_dir = os.path.dirname(__file__)
|
|
29
29
|
template_path = os.path.join(current_dir, os.path.join("templates", 'server_app_template.py'))
|
|
30
30
|
with open(template_path, 'r') as file:
|
|
31
|
-
return file.read()
|
|
31
|
+
return file.read().replace("EXPERIMENT_NAME", experiment_name)
|
|
32
32
|
|
|
33
|
-
def get_client_template() -> str:
|
|
33
|
+
def get_client_template(experiment_name: str) -> str:
|
|
34
34
|
current_dir = os.path.dirname(__file__)
|
|
35
35
|
template_path = os.path.join(current_dir, os.path.join("templates", 'client_app_template.py'))
|
|
36
36
|
with open(template_path, 'r') as file:
|
|
37
|
-
return file.read()
|
|
37
|
+
return file.read().replace("EXPERIMENT_NAME", experiment_name)
|
|
38
38
|
|
|
39
39
|
def get_pyproject_template(experiment_name: str) -> str:
|
|
40
40
|
current_dir = os.path.dirname(__file__)
|
|
@@ -61,11 +61,11 @@ def get_init_template() -> str:
|
|
|
61
61
|
with open(template_path, 'r') as file:
|
|
62
62
|
return file.read()
|
|
63
63
|
|
|
64
|
-
def get_ds_template() -> str:
|
|
64
|
+
def get_ds_template(experiment_name: str) -> str:
|
|
65
65
|
current_dir = os.path.dirname(__file__)
|
|
66
66
|
template_path = os.path.join(current_dir, os.path.join("templates", 'ds_template.ipynb'))
|
|
67
67
|
with open(template_path, 'r') as file:
|
|
68
|
-
return file.read()
|
|
68
|
+
return file.read().replace("EXPERIMENT_NAME", experiment_name)
|
|
69
69
|
|
|
70
70
|
def get_images(output_folder) -> None:
|
|
71
71
|
current_dir = os.path.dirname(__file__)
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
from flwr.client import ClientApp, NumPyClient
|
|
2
|
-
from flwr.common import Context
|
|
3
|
-
from loguru import logger
|
|
4
|
-
|
|
5
|
-
from EXPERIMENT_NAME.task import (
|
|
6
|
-
Net,
|
|
7
|
-
evaluate,
|
|
8
|
-
get_weights,
|
|
9
|
-
load_flwr_data,
|
|
10
|
-
set_weights,
|
|
11
|
-
train,
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class FlowerClient(NumPyClient):
|
|
16
|
-
def __init__(self, net, trainloader, testloader):
|
|
17
|
-
self.net = net
|
|
18
|
-
self.trainloader = trainloader
|
|
19
|
-
self.testloader = testloader
|
|
20
|
-
|
|
21
|
-
def fit(self, parameters, config):
|
|
22
|
-
set_weights(self.net, parameters)
|
|
23
|
-
train(self.net, self.trainloader)
|
|
24
|
-
return get_weights(self.net), len(self.trainloader), {}
|
|
25
|
-
|
|
26
|
-
def evaluate(self, parameters, config):
|
|
27
|
-
set_weights(self.net, parameters)
|
|
28
|
-
loss, accuracy = evaluate(self.net, self.testloader)
|
|
29
|
-
return loss, len(self.testloader), {"accuracy": accuracy}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def client_fn(context: Context):
|
|
33
|
-
from EXPERIMENT_NAME.task import load_syftbox_dataset
|
|
34
|
-
from syft_flwr.utils import run_syft_flwr
|
|
35
|
-
|
|
36
|
-
if not run_syft_flwr():
|
|
37
|
-
logger.info("Running flwr locally")
|
|
38
|
-
train_loader, test_loader = load_flwr_data(
|
|
39
|
-
partition_id=context.node_config["partition-id"],
|
|
40
|
-
num_partitions=context.node_config["num-partitions"],
|
|
41
|
-
)
|
|
42
|
-
else:
|
|
43
|
-
logger.info("Running with syft_flwr")
|
|
44
|
-
train_loader, test_loader = load_syftbox_dataset()
|
|
45
|
-
net = Net()
|
|
46
|
-
return FlowerClient(net, train_loader, test_loader).to_client()
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
app = ClientApp(client_fn=client_fn)
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
"""fltabular: Flower Example on Adult Census Income Tabular Dataset."""
|
|
2
|
-
|
|
3
|
-
from flwr.common import Context, ndarrays_to_parameters
|
|
4
|
-
from flwr.server import ServerApp, ServerAppComponents, ServerConfig
|
|
5
|
-
|
|
6
|
-
from EXPERIMENT_NAME.task import Net, get_weights
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def weighted_average(metrics):
|
|
10
|
-
accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics]
|
|
11
|
-
examples = [num_examples for num_examples, _ in metrics]
|
|
12
|
-
|
|
13
|
-
return {"accuracy": sum(accuracies) / sum(examples)}
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def server_fn(context: Context) -> ServerAppComponents:
|
|
17
|
-
net = Net()
|
|
18
|
-
params = ndarrays_to_parameters(get_weights(net))
|
|
19
|
-
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
|
|
22
|
-
from syft_flwr.strategy import FedAvgWithModelSaving
|
|
23
|
-
|
|
24
|
-
strategy = FedAvgWithModelSaving(
|
|
25
|
-
save_path=Path(__file__).parent.parent.parent / "weights",
|
|
26
|
-
fraction_fit=1.0,
|
|
27
|
-
fraction_evaluate=1.0,
|
|
28
|
-
min_available_clients=2,
|
|
29
|
-
initial_parameters=params,
|
|
30
|
-
evaluate_metrics_aggregation_fn=weighted_average,
|
|
31
|
-
)
|
|
32
|
-
num_rounds = context.run_config["num-server-rounds"]
|
|
33
|
-
config = ServerConfig(num_rounds=num_rounds)
|
|
34
|
-
|
|
35
|
-
return ServerAppComponents(config=config, strategy=strategy)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
app = ServerApp(server_fn=server_fn)
|
|
@@ -1,243 +0,0 @@
|
|
|
1
|
-
from collections import OrderedDict
|
|
2
|
-
|
|
3
|
-
import torch
|
|
4
|
-
import torch.nn as nn
|
|
5
|
-
import torch.optim as optim
|
|
6
|
-
from flwr_datasets import FederatedDataset
|
|
7
|
-
from flwr_datasets.partitioner import IidPartitioner
|
|
8
|
-
from imblearn.over_sampling import SMOTE
|
|
9
|
-
from loguru import logger
|
|
10
|
-
from pandas import DataFrame
|
|
11
|
-
from sklearn.model_selection import train_test_split
|
|
12
|
-
from sklearn.preprocessing import StandardScaler
|
|
13
|
-
from torch.utils.data import DataLoader, TensorDataset
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def get_device():
|
|
17
|
-
if torch.cuda.is_available():
|
|
18
|
-
return torch.device("cuda")
|
|
19
|
-
elif torch.backends.mps.is_available():
|
|
20
|
-
return torch.device("mps")
|
|
21
|
-
elif torch.xpu.is_available():
|
|
22
|
-
return torch.device("xpu")
|
|
23
|
-
else:
|
|
24
|
-
return torch.device("cpu")
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
DEVICE = get_device()
|
|
28
|
-
logger.info(f"Using device: {DEVICE}")
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class Net(nn.Module):
|
|
32
|
-
def __init__(self, input_dim=6):
|
|
33
|
-
super(Net, self).__init__()
|
|
34
|
-
# First layer with more units and batch normalization
|
|
35
|
-
self.layer1 = nn.Sequential(
|
|
36
|
-
nn.Linear(input_dim, 32), # Increased from 20 to 32
|
|
37
|
-
nn.BatchNorm1d(32), # Added batch normalization
|
|
38
|
-
nn.LeakyReLU(0.1), # LeakyReLU instead of ReLU
|
|
39
|
-
nn.Dropout(0.2), # Increased dropout
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
# Second layer with more units
|
|
43
|
-
self.layer2 = nn.Sequential(
|
|
44
|
-
nn.Linear(32, 24), # Increased from 14 to 24
|
|
45
|
-
nn.BatchNorm1d(24), # Added batch normalization
|
|
46
|
-
nn.LeakyReLU(0.1),
|
|
47
|
-
nn.Dropout(0.25),
|
|
48
|
-
)
|
|
49
|
-
|
|
50
|
-
# Third layer
|
|
51
|
-
self.layer3 = nn.Sequential(
|
|
52
|
-
nn.Linear(24, 16), nn.BatchNorm1d(16), nn.LeakyReLU(0.1)
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
# Output layer
|
|
56
|
-
self.output_layer = nn.Sequential(nn.Linear(16, 1), nn.Sigmoid())
|
|
57
|
-
|
|
58
|
-
def forward(self, x):
|
|
59
|
-
x = self.layer1(x)
|
|
60
|
-
x = self.layer2(x)
|
|
61
|
-
x = self.layer3(x)
|
|
62
|
-
x = self.output_layer(x)
|
|
63
|
-
return x
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def dataset_processing(
|
|
67
|
-
train_df: DataFrame, test_df: DataFrame
|
|
68
|
-
) -> tuple[DataLoader, DataLoader]:
|
|
69
|
-
def preprocess_df(df: DataFrame) -> DataFrame:
|
|
70
|
-
columns_to_drop = ["SkinThickness", "Insulin"]
|
|
71
|
-
df_new: DataFrame = df.drop(columns_to_drop, axis=1)
|
|
72
|
-
|
|
73
|
-
# Calculate mean and median (excluding zeros)
|
|
74
|
-
mean_glucose = df_new[df_new["Glucose"] != 0]["Glucose"].mean()
|
|
75
|
-
median_bmi = df_new[df_new["BMI"] != 0]["BMI"].median()
|
|
76
|
-
median_bp = df_new[df_new["BloodPressure"] != 0]["BloodPressure"].median()
|
|
77
|
-
|
|
78
|
-
# Replace zeros values with mean/median
|
|
79
|
-
df_new.replace(
|
|
80
|
-
{
|
|
81
|
-
"Glucose": {0: mean_glucose},
|
|
82
|
-
"BMI": {0: median_bmi},
|
|
83
|
-
"BloodPressure": {0: median_bp},
|
|
84
|
-
},
|
|
85
|
-
inplace=True,
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
return df_new
|
|
89
|
-
|
|
90
|
-
# Preprocess both datasets
|
|
91
|
-
train_processed = preprocess_df(train_df)
|
|
92
|
-
test_processed = preprocess_df(test_df)
|
|
93
|
-
|
|
94
|
-
# Split features and labels for both sets
|
|
95
|
-
X_train = train_processed.values[:, :6]
|
|
96
|
-
y_train = train_processed.values[:, 6:]
|
|
97
|
-
X_test = test_processed.values[:, :6]
|
|
98
|
-
y_test = test_processed.values[:, 6:]
|
|
99
|
-
|
|
100
|
-
from collections import Counter
|
|
101
|
-
|
|
102
|
-
def get_minority_class_count(y):
|
|
103
|
-
return min(Counter(y.flatten()).values())
|
|
104
|
-
|
|
105
|
-
minority_count = get_minority_class_count(y_train)
|
|
106
|
-
k_neighbors = min(5, minority_count - 1) if minority_count > 1 else 1
|
|
107
|
-
|
|
108
|
-
# Resample the training data to fix the class imbalance
|
|
109
|
-
smote = SMOTE(random_state=42, k_neighbors=k_neighbors)
|
|
110
|
-
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
|
|
111
|
-
|
|
112
|
-
# Scale the data to have zero mean and unit variance
|
|
113
|
-
scaler = StandardScaler()
|
|
114
|
-
X_train_resampled = scaler.fit_transform(X_train_resampled)
|
|
115
|
-
X_test = scaler.transform(X_test)
|
|
116
|
-
|
|
117
|
-
# Convert numpy arrays to PyTorch tensors
|
|
118
|
-
X_train_tensor = torch.FloatTensor(X_train_resampled)
|
|
119
|
-
y_train_tensor = torch.FloatTensor(y_train_resampled).reshape(
|
|
120
|
-
-1, 1
|
|
121
|
-
) # Add this reshape
|
|
122
|
-
X_test_tensor = torch.FloatTensor(X_test)
|
|
123
|
-
y_test_tensor = torch.FloatTensor(y_test).reshape(-1, 1)
|
|
124
|
-
|
|
125
|
-
# Create datasets and dataloaders
|
|
126
|
-
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
|
|
127
|
-
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
|
|
128
|
-
|
|
129
|
-
train_loader = DataLoader(dataset=train_dataset, batch_size=10, shuffle=True)
|
|
130
|
-
test_loader = DataLoader(
|
|
131
|
-
dataset=test_dataset, batch_size=len(test_dataset), shuffle=False
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
return train_loader, test_loader
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def load_syftbox_dataset() -> tuple[DataLoader, DataLoader]:
|
|
138
|
-
import pandas as pd
|
|
139
|
-
|
|
140
|
-
from syft_flwr.utils import get_syftbox_dataset_path
|
|
141
|
-
|
|
142
|
-
data_dir = get_syftbox_dataset_path()
|
|
143
|
-
logger.info(f"Loading dataset from {data_dir}")
|
|
144
|
-
|
|
145
|
-
train_df = pd.read_csv(data_dir / "train.csv")
|
|
146
|
-
test_df = pd.read_csv(data_dir / "test.csv")
|
|
147
|
-
|
|
148
|
-
return dataset_processing(train_df, test_df)
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
fds = None # Cache FederatedDataset
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def load_flwr_data(
|
|
155
|
-
partition_id: int, num_partitions: int
|
|
156
|
-
) -> tuple[DataLoader, DataLoader]:
|
|
157
|
-
"""
|
|
158
|
-
Load the `fl-diabetes-prediction` dataset to memory
|
|
159
|
-
"""
|
|
160
|
-
global fds
|
|
161
|
-
if fds is None:
|
|
162
|
-
partitioner = IidPartitioner(num_partitions=num_partitions)
|
|
163
|
-
fds = FederatedDataset(
|
|
164
|
-
dataset="khoaguin/pima-indians-diabetes-database",
|
|
165
|
-
partitioners={"train": partitioner},
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
partition: DataFrame = fds.load_partition(partition_id, "train").with_format(
|
|
169
|
-
"pandas"
|
|
170
|
-
)[:]
|
|
171
|
-
train_df, test_df = train_test_split(partition, test_size=0.2, random_state=95)
|
|
172
|
-
|
|
173
|
-
return dataset_processing(train_df, test_df)
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
def train(model, train_loader, local_epochs=1):
|
|
177
|
-
criterion = nn.BCELoss()
|
|
178
|
-
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0005)
|
|
179
|
-
history = {"train_loss": [], "train_acc": []}
|
|
180
|
-
model.to(DEVICE)
|
|
181
|
-
|
|
182
|
-
for epoch in range(local_epochs):
|
|
183
|
-
model.train()
|
|
184
|
-
running_loss = 0.0
|
|
185
|
-
correct = 0
|
|
186
|
-
total = 0
|
|
187
|
-
|
|
188
|
-
for inputs, labels in train_loader:
|
|
189
|
-
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
|
|
190
|
-
|
|
191
|
-
optimizer.zero_grad()
|
|
192
|
-
outputs = model(inputs)
|
|
193
|
-
loss = criterion(outputs, labels)
|
|
194
|
-
loss.backward()
|
|
195
|
-
optimizer.step()
|
|
196
|
-
|
|
197
|
-
running_loss += loss.item() * inputs.size(0)
|
|
198
|
-
predicted = (outputs > 0.5).float()
|
|
199
|
-
total += labels.size(0)
|
|
200
|
-
correct += (predicted == labels).sum().item()
|
|
201
|
-
|
|
202
|
-
epoch_loss = running_loss / len(train_loader.dataset)
|
|
203
|
-
epoch_acc = correct / total
|
|
204
|
-
history["train_loss"].append(epoch_loss)
|
|
205
|
-
history["train_acc"].append(epoch_acc)
|
|
206
|
-
|
|
207
|
-
return history
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
def evaluate(model, data_loader):
|
|
211
|
-
model.to(DEVICE)
|
|
212
|
-
model.eval()
|
|
213
|
-
criterion = nn.BCELoss()
|
|
214
|
-
|
|
215
|
-
running_loss = 0.0
|
|
216
|
-
correct = 0
|
|
217
|
-
total = 0
|
|
218
|
-
|
|
219
|
-
with torch.no_grad():
|
|
220
|
-
for inputs, labels in data_loader:
|
|
221
|
-
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
|
|
222
|
-
outputs = model(inputs)
|
|
223
|
-
loss = criterion(outputs, labels)
|
|
224
|
-
running_loss += loss.item() * inputs.size(0)
|
|
225
|
-
predicted = (outputs > 0.5).float()
|
|
226
|
-
total += labels.size(0)
|
|
227
|
-
correct += (predicted == labels).sum().item()
|
|
228
|
-
|
|
229
|
-
epoch_loss = running_loss / len(data_loader.dataset)
|
|
230
|
-
epoch_acc = correct / total
|
|
231
|
-
|
|
232
|
-
return epoch_loss, epoch_acc
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
def set_weights(model, parameters):
|
|
236
|
-
params_dict = zip(model.state_dict().keys(), parameters)
|
|
237
|
-
state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
|
|
238
|
-
model.load_state_dict(state_dict, strict=True)
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
def get_weights(model):
|
|
242
|
-
ndarrays = [val.cpu().numpy() for _, val in model.state_dict().items()]
|
|
243
|
-
return ndarrays
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/examples/simulation-scikit-model/simulation_example.ipynb
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/extern_pyproject_template.toml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/images/dsAggregateModels.png
RENAMED
|
File without changes
|
{fedmodelkit-0.6.1 → fedmodelkit-0.6.6}/src/FedModelKit/templates/images/dsDoneSubmittingJobs.png
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|