MEDfl 0.2.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- MEDfl/LearningManager/__init__.py +13 -13
- MEDfl/LearningManager/client.py +150 -181
- MEDfl/LearningManager/dynamicModal.py +287 -287
- MEDfl/LearningManager/federated_dataset.py +60 -60
- MEDfl/LearningManager/flpipeline.py +192 -192
- MEDfl/LearningManager/model.py +223 -223
- MEDfl/LearningManager/params.yaml +14 -14
- MEDfl/LearningManager/params_optimiser.py +442 -442
- MEDfl/LearningManager/plot.py +229 -229
- MEDfl/LearningManager/server.py +181 -189
- MEDfl/LearningManager/strategy.py +82 -138
- MEDfl/LearningManager/utils.py +331 -331
- MEDfl/NetManager/__init__.py +10 -10
- MEDfl/NetManager/database_connector.py +43 -43
- MEDfl/NetManager/dataset.py +92 -92
- MEDfl/NetManager/flsetup.py +320 -320
- MEDfl/NetManager/net_helper.py +254 -254
- MEDfl/NetManager/net_manager_queries.py +142 -142
- MEDfl/NetManager/network.py +194 -194
- MEDfl/NetManager/node.py +184 -184
- MEDfl/__init__.py +2 -2
- MEDfl/scripts/__init__.py +1 -1
- MEDfl/scripts/base.py +29 -29
- MEDfl/scripts/create_db.py +126 -126
- Medfl/LearningManager/__init__.py +13 -0
- Medfl/LearningManager/client.py +150 -0
- Medfl/LearningManager/dynamicModal.py +287 -0
- Medfl/LearningManager/federated_dataset.py +60 -0
- Medfl/LearningManager/flpipeline.py +192 -0
- Medfl/LearningManager/model.py +223 -0
- Medfl/LearningManager/params.yaml +14 -0
- Medfl/LearningManager/params_optimiser.py +442 -0
- Medfl/LearningManager/plot.py +229 -0
- Medfl/LearningManager/server.py +181 -0
- Medfl/LearningManager/strategy.py +82 -0
- Medfl/LearningManager/utils.py +331 -0
- Medfl/NetManager/__init__.py +10 -0
- Medfl/NetManager/database_connector.py +43 -0
- Medfl/NetManager/dataset.py +92 -0
- Medfl/NetManager/flsetup.py +320 -0
- Medfl/NetManager/net_helper.py +254 -0
- Medfl/NetManager/net_manager_queries.py +142 -0
- Medfl/NetManager/network.py +194 -0
- Medfl/NetManager/node.py +184 -0
- Medfl/__init__.py +3 -0
- Medfl/scripts/__init__.py +2 -0
- Medfl/scripts/base.py +30 -0
- Medfl/scripts/create_db.py +126 -0
- alembic/env.py +61 -61
- {MEDfl-0.2.1.dist-info → medfl-2.0.0.dist-info}/METADATA +120 -108
- medfl-2.0.0.dist-info/RECORD +55 -0
- {MEDfl-0.2.1.dist-info → medfl-2.0.0.dist-info}/WHEEL +1 -1
- {MEDfl-0.2.1.dist-info → medfl-2.0.0.dist-info/licenses}/LICENSE +674 -674
- MEDfl-0.2.1.dist-info/RECORD +0 -31
- {MEDfl-0.2.1.dist-info → medfl-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,223 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# froked from https://github.com/pythonlessons/mltu/blob/main/mltu/torch/model.py
|
3
|
+
|
4
|
+
import typing
|
5
|
+
from collections import OrderedDict
|
6
|
+
from typing import Dict, List, Optional, Tuple
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
import torch
|
10
|
+
import torch.nn as nn
|
11
|
+
from sklearn.metrics import accuracy_score,roc_auc_score
|
12
|
+
|
13
|
+
from .utils import params
|
14
|
+
|
15
|
+
|
16
|
+
class Model:
|
17
|
+
"""
|
18
|
+
Model class for training and testing PyTorch neural networks.
|
19
|
+
|
20
|
+
Attributes:
|
21
|
+
model (torch.nn.Module): PyTorch neural network.
|
22
|
+
optimizer (torch.optim.Optimizer): PyTorch optimizer.
|
23
|
+
criterion (typing.Callable): Loss function.
|
24
|
+
"""
|
25
|
+
|
26
|
+
def __init__(
|
27
|
+
self,
|
28
|
+
model: torch.nn.Module,
|
29
|
+
optimizer: torch.optim.Optimizer,
|
30
|
+
criterion: typing.Callable,
|
31
|
+
) -> None:
|
32
|
+
"""
|
33
|
+
Initialize Model class with the specified model, optimizer, and criterion.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
model (torch.nn.Module): PyTorch neural network.
|
37
|
+
optimizer (torch.optim.Optimizer): PyTorch optimizer.
|
38
|
+
criterion (typing.Callable): Loss function.
|
39
|
+
"""
|
40
|
+
self.model = model
|
41
|
+
self.optimizer = optimizer
|
42
|
+
self.criterion = criterion
|
43
|
+
# Get device on which model is running
|
44
|
+
self.validate()
|
45
|
+
|
46
|
+
def validate(self) -> None:
|
47
|
+
"""
|
48
|
+
Validate model and optimizer.
|
49
|
+
"""
|
50
|
+
if not isinstance(self.model, torch.nn.Module):
|
51
|
+
raise TypeError("model argument must be a torch.nn.Module")
|
52
|
+
|
53
|
+
if not isinstance(self.optimizer, torch.optim.Optimizer):
|
54
|
+
raise TypeError(
|
55
|
+
"optimizer argument must be a torch.optim.Optimizer"
|
56
|
+
)
|
57
|
+
|
58
|
+
def get_parameters(self) -> List[np.ndarray]:
|
59
|
+
"""
|
60
|
+
Get the parameters of the model as a list of NumPy arrays.
|
61
|
+
|
62
|
+
Returns:
|
63
|
+
List[np.ndarray]: The parameters of the model as a list of NumPy arrays.
|
64
|
+
"""
|
65
|
+
return [
|
66
|
+
val.cpu().numpy() for _, val in self.model.state_dict().items()
|
67
|
+
]
|
68
|
+
|
69
|
+
def set_parameters(self, parameters: List[np.ndarray]) -> None:
|
70
|
+
"""
|
71
|
+
Set the parameters of the model from a list of NumPy arrays.
|
72
|
+
|
73
|
+
Args:
|
74
|
+
parameters (List[np.ndarray]): The parameters to be set.
|
75
|
+
"""
|
76
|
+
params_dict = zip(self.model.state_dict().keys(), parameters)
|
77
|
+
state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
|
78
|
+
self.model.load_state_dict(state_dict, strict=True)
|
79
|
+
|
80
|
+
def train(
|
81
|
+
self, train_loader, epoch, device, privacy_engine, diff_priv=False
|
82
|
+
) -> float:
|
83
|
+
"""
|
84
|
+
Train the model on the given train_loader for one epoch.
|
85
|
+
|
86
|
+
Args:
|
87
|
+
train_loader: The data loader for training data.
|
88
|
+
epoch (int): The current epoch number.
|
89
|
+
device: The device on which to perform the training.
|
90
|
+
privacy_engine: The privacy engine used for differential privacy (if enabled).
|
91
|
+
diff_priv (bool, optional): Whether differential privacy is used. Default is False.
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
float: The value of epsilon used in differential privacy.
|
95
|
+
"""
|
96
|
+
self.model.train()
|
97
|
+
epsilon = 0
|
98
|
+
losses = []
|
99
|
+
top1_acc = []
|
100
|
+
|
101
|
+
for i, (X_train, y_train) in enumerate(train_loader):
|
102
|
+
X_train, y_train = X_train.to(device), y_train.to(device)
|
103
|
+
|
104
|
+
self.optimizer.zero_grad()
|
105
|
+
|
106
|
+
# compute output
|
107
|
+
y_hat = torch.squeeze(self.model(X_train), 1)
|
108
|
+
loss = self.criterion(y_hat, y_train)
|
109
|
+
|
110
|
+
preds = np.argmax(y_hat.detach().cpu().numpy(), axis=0)
|
111
|
+
labels = y_train.detach().cpu().numpy()
|
112
|
+
|
113
|
+
# measure accuracy and record loss
|
114
|
+
acc = (preds == labels).mean()
|
115
|
+
|
116
|
+
losses.append(loss.item())
|
117
|
+
top1_acc.append(acc)
|
118
|
+
|
119
|
+
loss.backward()
|
120
|
+
self.optimizer.step()
|
121
|
+
|
122
|
+
if diff_priv:
|
123
|
+
epsilon = privacy_engine.get_epsilon(float(params["DELTA"]))
|
124
|
+
|
125
|
+
if (i + 1) % 10 == 0:
|
126
|
+
if diff_priv:
|
127
|
+
epsilon = privacy_engine.get_epsilon(float(params["DELTA"]))
|
128
|
+
print(
|
129
|
+
f"\tTrain Epoch: {epoch} \t"
|
130
|
+
f"Loss: {np.mean(losses):.6f} "
|
131
|
+
f"Acc@1: {np.mean(top1_acc) * 100:.6f} "
|
132
|
+
f"(ε = {epsilon:.2f}, δ = {params['DELTA']})"
|
133
|
+
)
|
134
|
+
else:
|
135
|
+
print(
|
136
|
+
f"\tTrain Epoch: {epoch} \t"
|
137
|
+
f"Loss: {np.mean(losses):.6f} "
|
138
|
+
f"Acc@1: {np.mean(top1_acc) * 100:.6f}"
|
139
|
+
)
|
140
|
+
|
141
|
+
return epsilon
|
142
|
+
|
143
|
+
def evaluate(self, val_loader, device=torch.device("cpu")) -> Tuple[float, float]:
|
144
|
+
"""
|
145
|
+
Evaluate the model on the given validation data.
|
146
|
+
|
147
|
+
Args:
|
148
|
+
val_loader: The data loader for validation data.
|
149
|
+
device: The device on which to perform the evaluation. Default is 'cpu'.
|
150
|
+
|
151
|
+
Returns:
|
152
|
+
Tuple[float, float]: The evaluation loss and accuracy.
|
153
|
+
"""
|
154
|
+
correct, total, loss, accuracy, auc = 0, 0, 0.0, [], []
|
155
|
+
self.model.eval()
|
156
|
+
|
157
|
+
with torch.no_grad():
|
158
|
+
for X_test, y_test in val_loader:
|
159
|
+
X_test, y_test = X_test.to(device), y_test.to(device) # Move data to device
|
160
|
+
|
161
|
+
y_hat = torch.squeeze(self.model(X_test), 1)
|
162
|
+
|
163
|
+
|
164
|
+
criterion = self.criterion.to(y_hat.device)
|
165
|
+
loss += criterion(y_hat, y_test).item()
|
166
|
+
|
167
|
+
|
168
|
+
# Move y_hat to CPU for accuracy computation
|
169
|
+
y_hat_cpu = y_hat.cpu().detach().numpy()
|
170
|
+
accuracy.append(accuracy_score(y_test.cpu().numpy(), y_hat_cpu.round()))
|
171
|
+
|
172
|
+
# Move y_test to CPU for AUC computation
|
173
|
+
y_test_cpu = y_test.cpu().numpy()
|
174
|
+
y_prob_cpu = y_hat.cpu().detach().numpy()
|
175
|
+
if (len(np.unique(y_test_cpu)) != 1):
|
176
|
+
auc.append(roc_auc_score(y_test_cpu, y_prob_cpu))
|
177
|
+
|
178
|
+
total += y_test.size(0)
|
179
|
+
correct += np.sum(y_hat_cpu.round() == y_test_cpu)
|
180
|
+
|
181
|
+
loss /= len(val_loader.dataset)
|
182
|
+
return loss, np.mean(accuracy), np.mean(auc)
|
183
|
+
|
184
|
+
|
185
|
+
@staticmethod
|
186
|
+
def save_model(model , model_name:str):
|
187
|
+
"""
|
188
|
+
Saves a PyTorch model to a file.
|
189
|
+
|
190
|
+
Args:
|
191
|
+
model (torch.nn.Module): PyTorch model to be saved.
|
192
|
+
model_name (str): Name of the model file.
|
193
|
+
|
194
|
+
Raises:
|
195
|
+
Exception: If there is an issue during the saving process.
|
196
|
+
|
197
|
+
Returns:
|
198
|
+
None
|
199
|
+
"""
|
200
|
+
try:
|
201
|
+
torch.save(model, '../../notebooks/.ipynb_checkpoints/trainedModels/' + model_name + ".pth")
|
202
|
+
except Exception as e:
|
203
|
+
raise Exception(f"Error saving the model: {str(e)}")
|
204
|
+
|
205
|
+
@staticmethod
|
206
|
+
def load_model(model_path: str):
|
207
|
+
"""
|
208
|
+
Loads a PyTorch model from a file.
|
209
|
+
|
210
|
+
Args:
|
211
|
+
model_path (str): Path to the model file to be loaded.
|
212
|
+
|
213
|
+
Returns:
|
214
|
+
torch.nn.Module: Loaded PyTorch model.
|
215
|
+
"""
|
216
|
+
# Ensure models are loaded onto the CPU when CUDA is not available
|
217
|
+
if torch.cuda.is_available():
|
218
|
+
loaded_model = torch.load(model_path)
|
219
|
+
else:
|
220
|
+
loaded_model = torch.load(model_path, map_location=torch.device('cpu'))
|
221
|
+
return loaded_model
|
222
|
+
|
223
|
+
|
@@ -0,0 +1,14 @@
|
|
1
|
+
DELTA: 1.0e-05
|
2
|
+
EPSILON: 5.0
|
3
|
+
MAX_GRAD_NORM: 1.0
|
4
|
+
diff_privacy: true
|
5
|
+
lr: 0.01
|
6
|
+
min_evalclient: 2
|
7
|
+
num_rounds: 12
|
8
|
+
optimizer: SGD
|
9
|
+
path_to_master_csv: /home/local/USHERBROOKE/saho6810/MEDfl/code/MEDfl/notebooks/data/masterDataSet/Mimic_ouael.csv
|
10
|
+
path_to_test_csv: /home/local/USHERBROOKE/saho6810/MEDfl/code/MEDfl/notebooks/data/masterDataSet/Mimic_train.csv
|
11
|
+
task: BinaryClassification
|
12
|
+
test_batch_size: 1
|
13
|
+
train_batch_size: 32
|
14
|
+
train_epochs: 116
|
@@ -0,0 +1,442 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
import matplotlib.pyplot as plt
|
4
|
+
import seaborn as sns
|
5
|
+
import torch
|
6
|
+
import torch.nn as nn
|
7
|
+
import torch.optim as optim
|
8
|
+
import torch.nn.functional as F
|
9
|
+
from torch.utils.data import TensorDataset, DataLoader
|
10
|
+
from sklearn.model_selection import GridSearchCV, train_test_split
|
11
|
+
from sklearn.base import BaseEstimator
|
12
|
+
from sklearn.metrics import make_scorer, precision_score, recall_score, accuracy_score, f1_score,roc_auc_score, balanced_accuracy_score
|
13
|
+
import optuna
|
14
|
+
|
15
|
+
from MEDfl.LearningManager.model import Model
|
16
|
+
from MEDfl.LearningManager.strategy import Strategy
|
17
|
+
from MEDfl.LearningManager.server import FlowerServer
|
18
|
+
from MEDfl.LearningManager.flpipeline import FLpipeline
|
19
|
+
|
20
|
+
class BinaryClassifier(nn.Module):
|
21
|
+
def __init__(self, input_size, num_layers, layer_size):
|
22
|
+
super(BinaryClassifier, self).__init__()
|
23
|
+
|
24
|
+
# Input layer
|
25
|
+
self.layers = [nn.Linear(input_size, layer_size)]
|
26
|
+
|
27
|
+
# Hidden layers
|
28
|
+
for _ in range(num_layers - 1):
|
29
|
+
self.layers.append(nn.Linear(layer_size, layer_size))
|
30
|
+
|
31
|
+
# Output layer
|
32
|
+
self.layers.append(nn.Linear(layer_size, 1))
|
33
|
+
|
34
|
+
# ModuleList to handle dynamic number of layers
|
35
|
+
self.layers = nn.ModuleList(self.layers)
|
36
|
+
|
37
|
+
def forward(self, x):
|
38
|
+
for layer in self.layers[:-1]:
|
39
|
+
x = F.relu(layer(x))
|
40
|
+
x = self.layers[-1](x)
|
41
|
+
return x
|
42
|
+
|
43
|
+
class CustomPyTorchClassifier(BaseEstimator):
|
44
|
+
def __init__(self, hidden_dim=10, lr=0.001, pos_weight=1, th=0.5, max_epochs=10, batch_size=32):
|
45
|
+
self.hidden_dim = hidden_dim
|
46
|
+
self.lr = lr
|
47
|
+
self.pos_weight = pos_weight
|
48
|
+
self.max_epochs = max_epochs
|
49
|
+
self.batch_size = batch_size
|
50
|
+
self.th = th
|
51
|
+
self.model = None
|
52
|
+
|
53
|
+
def fit(self, X, y):
|
54
|
+
if isinstance(X, torch.Tensor):
|
55
|
+
X = X.numpy()
|
56
|
+
if isinstance(y, torch.Tensor):
|
57
|
+
y = y.numpy()
|
58
|
+
|
59
|
+
input_dim = X.shape[1]
|
60
|
+
self.model = nn.Sequential(
|
61
|
+
nn.Linear(input_dim, self.hidden_dim),
|
62
|
+
nn.ReLU(),
|
63
|
+
nn.Linear(self.hidden_dim, 1),
|
64
|
+
nn.Sigmoid()
|
65
|
+
)
|
66
|
+
|
67
|
+
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(self.pos_weight))
|
68
|
+
optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
|
69
|
+
|
70
|
+
train_data = TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y).float())
|
71
|
+
train_loader = DataLoader(train_data, batch_size=self.batch_size, shuffle=True)
|
72
|
+
|
73
|
+
self.model.train()
|
74
|
+
for epoch in range(self.max_epochs):
|
75
|
+
for inputs, labels in train_loader:
|
76
|
+
optimizer.zero_grad()
|
77
|
+
outputs = self.model(inputs)
|
78
|
+
loss = criterion(outputs.squeeze(), labels)
|
79
|
+
loss.backward()
|
80
|
+
optimizer.step()
|
81
|
+
return self
|
82
|
+
|
83
|
+
def predict(self, X):
|
84
|
+
if isinstance(X, torch.Tensor):
|
85
|
+
X = X.numpy()
|
86
|
+
|
87
|
+
self.model.eval()
|
88
|
+
with torch.no_grad():
|
89
|
+
outputs = self.model(torch.from_numpy(X).float())
|
90
|
+
predictions = (outputs.squeeze() > self.th).float().numpy()
|
91
|
+
return predictions
|
92
|
+
|
93
|
+
def score(self, X, y):
|
94
|
+
predictions = self.predict(X)
|
95
|
+
return accuracy_score(y, predictions)
|
96
|
+
|
97
|
+
|
98
|
+
class ParamsOptimiser:
|
99
|
+
def __init__(self, X_train = None, y_train=None, X_test=None, y_test=None):
|
100
|
+
if isinstance(X_train, pd.DataFrame):
|
101
|
+
X_train = X_train.to_numpy()
|
102
|
+
if isinstance(y_train, pd.Series):
|
103
|
+
y_train = y_train.to_numpy()
|
104
|
+
if isinstance(X_test, pd.DataFrame):
|
105
|
+
X_test = X_test.to_numpy()
|
106
|
+
if isinstance(y_test, pd.Series):
|
107
|
+
y_test = y_test.to_numpy()
|
108
|
+
|
109
|
+
self.X_train = X_train
|
110
|
+
self.y_train = y_train
|
111
|
+
self.X_test = X_test
|
112
|
+
self.y_test = y_test
|
113
|
+
|
114
|
+
def perform_grid_search(self, param_grid, scoring_metric='recall', cv=3, verbose=1):
|
115
|
+
pytorch_model = CustomPyTorchClassifier()
|
116
|
+
scorer = make_scorer(recall_score, greater_is_better=True)
|
117
|
+
|
118
|
+
if scoring_metric == 'precision':
|
119
|
+
scorer = make_scorer(precision_score)
|
120
|
+
elif scoring_metric == 'accuracy':
|
121
|
+
scorer = make_scorer(accuracy_score)
|
122
|
+
elif scoring_metric == 'recall':
|
123
|
+
scorer = make_scorer(recall_score)
|
124
|
+
elif scoring_metric == 'f1':
|
125
|
+
scorer = make_scorer(f1_score)
|
126
|
+
|
127
|
+
grid_search = GridSearchCV(pytorch_model, param_grid, scoring=scorer, cv=cv, refit=scoring_metric, verbose=verbose)
|
128
|
+
grid_search.fit(self.X_train, self.y_train)
|
129
|
+
|
130
|
+
self.grid_search_results = grid_search # Save the grid search results
|
131
|
+
|
132
|
+
return grid_search
|
133
|
+
|
134
|
+
# Inside the CustomModelTrainer class
|
135
|
+
def plot_results(self, params_to_plot=None):
|
136
|
+
results = pd.DataFrame(self.grid_search_results.cv_results_)
|
137
|
+
|
138
|
+
if params_to_plot is None:
|
139
|
+
# Create a column for configuration details
|
140
|
+
results['config'] = results['params'].apply(lambda x: str(x))
|
141
|
+
|
142
|
+
# Visualize mean test scores along with configurations
|
143
|
+
plt.figure(figsize=(15, 8))
|
144
|
+
bar_plot = plt.bar(results.index, results['mean_test_score'], color='blue', alpha=0.7)
|
145
|
+
plt.xticks(results.index, results['config'], rotation='vertical', fontsize=8)
|
146
|
+
plt.ylabel('Mean Test Score')
|
147
|
+
plt.title('Mean Test Scores for Each Configuration')
|
148
|
+
plt.tight_layout()
|
149
|
+
|
150
|
+
# Add values on top of bars
|
151
|
+
for bar, score in zip(bar_plot, results['mean_test_score']):
|
152
|
+
plt.text(bar.get_x() + bar.get_width() / 2 - 0.15, bar.get_height() + 0.01, f'{score:.3f}', fontsize=8)
|
153
|
+
|
154
|
+
plt.show()
|
155
|
+
return
|
156
|
+
|
157
|
+
try:
|
158
|
+
# Dynamically get the column names for the specified scoring metric
|
159
|
+
mean_test_col = f'mean_test_{params_to_plot[0]}'
|
160
|
+
param_cols = [f'param_{param}' for param in params_to_plot]
|
161
|
+
|
162
|
+
if len(params_to_plot) == 1:
|
163
|
+
# Plotting the heatmap for a single parameter
|
164
|
+
plt.figure(figsize=(8, 6))
|
165
|
+
sns.heatmap(results.pivot_table(index=param_cols[0]),
|
166
|
+
annot=True, cmap='YlGnBu', fmt=".3f", cbar_kws={'label': mean_test_col})
|
167
|
+
plt.title(mean_test_col.capitalize())
|
168
|
+
plt.show()
|
169
|
+
elif len(params_to_plot) == 2:
|
170
|
+
# Create a pair plot for two parameters
|
171
|
+
plt.figure(figsize=(8, 6))
|
172
|
+
scores = results.pivot_table(index=param_cols[0], columns=param_cols[1], values=f'mean_test_score', aggfunc="mean")
|
173
|
+
sns.heatmap(scores, annot=True, cmap='YlGnBu', fmt=".3f", cbar_kws={'label': mean_test_col})
|
174
|
+
plt.title(mean_test_col.capitalize())
|
175
|
+
plt.show()
|
176
|
+
else:
|
177
|
+
print("Invalid number of parameters to plot. You can provide either one or two parameters.")
|
178
|
+
except KeyError as e:
|
179
|
+
print(f"Error: {e}. Make sure the specified scoring metric exists in the results DataFrame.")
|
180
|
+
|
181
|
+
|
182
|
+
|
183
|
+
def optuna_optimisation(self, direction, params):
|
184
|
+
# Create the data loaders here
|
185
|
+
train_data = TensorDataset(torch.from_numpy(self.X_train).float(), torch.from_numpy(self.y_train).float())
|
186
|
+
test_data = TensorDataset(torch.from_numpy(self.X_test).float(), torch.from_numpy(self.y_test).float())
|
187
|
+
|
188
|
+
|
189
|
+
|
190
|
+
def objective(trial):
|
191
|
+
|
192
|
+
batch_size=trial.suggest_int('batch_size', **params['batch_size'])
|
193
|
+
|
194
|
+
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
|
195
|
+
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
|
196
|
+
|
197
|
+
# Create the model with the suggested hyperparameters
|
198
|
+
model = BinaryClassifier(input_size=self.X_train.shape[1],
|
199
|
+
num_layers=trial.suggest_int('num_layers', **params['num_layers']) ,
|
200
|
+
layer_size=trial.suggest_int('hidden_size', **params['hidden_size']))
|
201
|
+
|
202
|
+
# Define the loss function and optimizer
|
203
|
+
criterion = nn.BCEWithLogitsLoss()
|
204
|
+
optimizer_name = trial.suggest_categorical('optimizer', params['optimizer'])
|
205
|
+
learning_rate = trial.suggest_float('learning_rate', **params['learning_rate'])
|
206
|
+
|
207
|
+
|
208
|
+
if optimizer_name == 'Adam':
|
209
|
+
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
210
|
+
elif optimizer_name == 'SGD':
|
211
|
+
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
|
212
|
+
elif optimizer_name == 'RMSprop':
|
213
|
+
optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
|
214
|
+
|
215
|
+
# Training loop
|
216
|
+
num_epochs = trial.suggest_int('num_epochs', **params['num_epochs'])
|
217
|
+
for epoch in range(num_epochs):
|
218
|
+
model.train()
|
219
|
+
for batch_X, batch_y in train_loader:
|
220
|
+
optimizer.zero_grad()
|
221
|
+
outputs = model(batch_X)
|
222
|
+
loss = criterion(outputs.squeeze(), batch_y)
|
223
|
+
loss.backward()
|
224
|
+
optimizer.step()
|
225
|
+
|
226
|
+
# Evaluation
|
227
|
+
model.eval()
|
228
|
+
predictions = []
|
229
|
+
true_labels = []
|
230
|
+
with torch.no_grad():
|
231
|
+
for batch_X, batch_y in test_loader:
|
232
|
+
outputs = model(batch_X)
|
233
|
+
predictions.extend(torch.sigmoid(outputs).numpy())
|
234
|
+
true_labels.extend(batch_y.numpy())
|
235
|
+
|
236
|
+
# Calculate F1 score
|
237
|
+
# f1 = f1_score(true_labels, (np.array(predictions) > 0.5).astype(int))
|
238
|
+
auc = roc_auc_score(true_labels, predictions)
|
239
|
+
|
240
|
+
trial.report(auc, epoch)
|
241
|
+
|
242
|
+
# Handle pruning based on the intermediate value
|
243
|
+
if trial.should_prune():
|
244
|
+
raise optuna.TrialPruned()
|
245
|
+
|
246
|
+
return auc
|
247
|
+
|
248
|
+
# Create an Optuna study
|
249
|
+
study = optuna.create_study(direction=direction)
|
250
|
+
study.optimize(objective, n_trials=params['n_trials'])
|
251
|
+
|
252
|
+
self.study = study
|
253
|
+
|
254
|
+
# Get the best hyperparameters
|
255
|
+
best_params = study.best_params
|
256
|
+
print(f"Best Hyperparameters: {best_params}")
|
257
|
+
|
258
|
+
return study
|
259
|
+
|
260
|
+
def train_optimized_model(self ,trial ,th_min , th_max):
|
261
|
+
|
262
|
+
best_params = self.study.best_params
|
263
|
+
|
264
|
+
threshold = trial.suggest_float('threashhold', th_min, th_max, log=True)
|
265
|
+
|
266
|
+
train_data = TensorDataset(torch.from_numpy(self.X_train).float(), torch.from_numpy(self.y_train).float())
|
267
|
+
test_data = TensorDataset(torch.from_numpy(self.X_test).float(), torch.from_numpy(self.y_test).float())
|
268
|
+
|
269
|
+
train_loader = DataLoader(train_data, batch_size=best_params['batch_size'], shuffle=True)
|
270
|
+
test_loader = DataLoader(test_data, batch_size=best_params['batch_size'], shuffle=False)
|
271
|
+
|
272
|
+
|
273
|
+
# Use the best hyperparameters to train the final model
|
274
|
+
final_model = BinaryClassifier(input_size=self.X_train.shape[1], layer_size=best_params['hidden_size'] , num_layers=best_params['num_layers'])
|
275
|
+
final_optimizer = self.get_optimizer(best_params['optimizer'], final_model.parameters(), best_params['learning_rate'])
|
276
|
+
final_criterion = nn.BCEWithLogitsLoss()
|
277
|
+
|
278
|
+
num_epochs = best_params['num_epochs']
|
279
|
+
for epoch in range(num_epochs):
|
280
|
+
final_model.train()
|
281
|
+
for batch_X, batch_y in train_loader:
|
282
|
+
final_optimizer.zero_grad()
|
283
|
+
outputs = final_model(batch_X)
|
284
|
+
loss = final_criterion(outputs.squeeze(), batch_y)
|
285
|
+
loss.backward()
|
286
|
+
final_optimizer.step()
|
287
|
+
|
288
|
+
# Evaluate the final model on the test set
|
289
|
+
final_model.eval()
|
290
|
+
with torch.no_grad():
|
291
|
+
predictions = []
|
292
|
+
true_labels = []
|
293
|
+
for batch_X, batch_y in test_loader:
|
294
|
+
outputs = final_model(batch_X)
|
295
|
+
predictions.extend(torch.sigmoid(outputs).numpy())
|
296
|
+
true_labels.extend(batch_y.numpy())
|
297
|
+
|
298
|
+
final_balanced_acc = balanced_accuracy_score(true_labels, (np.array(predictions) > threshold).astype(int))
|
299
|
+
print(f"Model balanced accuracy: {final_balanced_acc}")
|
300
|
+
|
301
|
+
return final_balanced_acc
|
302
|
+
|
303
|
+
def get_optimizer(self, optimizer_name, parameters, learning_rate):
|
304
|
+
if optimizer_name == 'Adam':
|
305
|
+
return optim.Adam(parameters, lr=learning_rate)
|
306
|
+
elif optimizer_name == 'SGD':
|
307
|
+
return optim.SGD(parameters, lr=learning_rate)
|
308
|
+
elif optimizer_name == 'RMSprop':
|
309
|
+
return optim.RMSprop(parameters, lr=learning_rate)
|
310
|
+
else:
|
311
|
+
raise ValueError(f"Unknown optimizer: {optimizer_name}")
|
312
|
+
|
313
|
+
def perform_grid_search(self, param_grid, scoring_metric='recall', cv=3, verbose=1):
|
314
|
+
pytorch_model = CustomPyTorchClassifier()
|
315
|
+
scorer = make_scorer(recall_score, greater_is_better=True)
|
316
|
+
|
317
|
+
if scoring_metric == 'precision':
|
318
|
+
scorer = make_scorer(precision_score)
|
319
|
+
elif scoring_metric == 'accuracy':
|
320
|
+
scorer = make_scorer(accuracy_score)
|
321
|
+
elif scoring_metric == 'recall':
|
322
|
+
scorer = make_scorer(recall_score)
|
323
|
+
elif scoring_metric == 'f1':
|
324
|
+
scorer = make_scorer(f1_score)
|
325
|
+
|
326
|
+
grid_search = GridSearchCV(pytorch_model, param_grid, scoring=scorer, cv=cv, refit=scoring_metric, verbose=verbose)
|
327
|
+
grid_search.fit(self.X_train, self.y_train)
|
328
|
+
|
329
|
+
self.grid_search_results = grid_search # Save the grid search results
|
330
|
+
|
331
|
+
return grid_search
|
332
|
+
|
333
|
+
|
334
|
+
def plot_param_importances(self):
|
335
|
+
return optuna.visualization.plot_param_importances(self.study)
|
336
|
+
|
337
|
+
def plot_slice(self , params):
|
338
|
+
return optuna.visualization.plot_slice(self.study , params=params)
|
339
|
+
|
340
|
+
def plot_parallel_coordinate(self):
|
341
|
+
return optuna.visualization.plot_parallel_coordinate(self.study)
|
342
|
+
|
343
|
+
def plot_rank(self , params=None):
|
344
|
+
return optuna.visualization.plot_rank(self.study , params=params)
|
345
|
+
|
346
|
+
def plot_optimization_history(self):
|
347
|
+
return optuna.visualization.plot_optimization_history(self.study)
|
348
|
+
|
349
|
+
def optimize_model_threashhold(self , n_trials , th_min , th_max):
|
350
|
+
additional_params = {'th_min': th_min, 'th_max': th_max}
|
351
|
+
|
352
|
+
th_study = optuna.create_study(direction='maximize')
|
353
|
+
th_study.optimize(lambda trial: self.train_optimized_model(trial , **additional_params) , n_trials)
|
354
|
+
|
355
|
+
# Get the best hyperparameters
|
356
|
+
best_params = th_study.best_params
|
357
|
+
print(f"Best Hyperparameters: {best_params}")
|
358
|
+
|
359
|
+
return optuna.visualization.plot_rank(th_study , params=['threashhold'])
|
360
|
+
|
361
|
+
def federated_params_iptim(self , params , direction, model, fl_dataset):
|
362
|
+
|
363
|
+
def objective(trial):
|
364
|
+
|
365
|
+
criterion = nn.BCEWithLogitsLoss()
|
366
|
+
|
367
|
+
optimizer_name = trial.suggest_categorical('optimizer', params['optimizer'])
|
368
|
+
learning_rate = trial.suggest_float('learning_rate', **params['learning_rate'])
|
369
|
+
num_rounds = trial.suggest_int('num_rounds', **params['num_rounds'])
|
370
|
+
diff_privacy = trial.suggest_int('diff_privacy', **params['diff_privacy'])
|
371
|
+
diff_privacy = True if diff_privacy == 1 else False
|
372
|
+
|
373
|
+
if optimizer_name == 'Adam':
|
374
|
+
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
375
|
+
elif optimizer_name == 'SGD':
|
376
|
+
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
|
377
|
+
elif optimizer_name == 'RMSprop':
|
378
|
+
optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
|
379
|
+
|
380
|
+
# Creating a new Model instance using the specific model created by DynamicModel
|
381
|
+
global_model = Model(model, optimizer, criterion)
|
382
|
+
|
383
|
+
# Get the initial params of the model
|
384
|
+
init_params = global_model.get_parameters()
|
385
|
+
|
386
|
+
fl_strategy = trial.suggest_categorical('fl_strategy', params['fl_strategy'])
|
387
|
+
|
388
|
+
learning_strategy = Strategy(fl_strategy,
|
389
|
+
fraction_fit = 1.0 ,
|
390
|
+
fraction_evaluate = 1.0,
|
391
|
+
min_fit_clients = 2,
|
392
|
+
min_evaluate_clients = 2,
|
393
|
+
min_available_clients = 2 ,
|
394
|
+
initial_parameters=init_params)
|
395
|
+
|
396
|
+
learning_strategy.create_strategy()
|
397
|
+
|
398
|
+
# Create The server
|
399
|
+
server = FlowerServer(global_model, strategy = learning_strategy, num_rounds = num_rounds,
|
400
|
+
num_clients = len(fl_dataset.trainloaders),
|
401
|
+
fed_dataset = fl_dataset,diff_privacy = diff_privacy,
|
402
|
+
# You can change the resources alocated for each client based on your machine
|
403
|
+
client_resources={'num_cpus': 1.0, 'num_gpus': 0.0}
|
404
|
+
)
|
405
|
+
|
406
|
+
ppl_1 = FLpipeline( name ="the first fl_pipeline",description = "this is our first FL pipeline",
|
407
|
+
server = server)
|
408
|
+
|
409
|
+
# Run the Traning of the model
|
410
|
+
history = ppl_1.server.run()
|
411
|
+
|
412
|
+
return server.auc[len(server.auc)-1]
|
413
|
+
|
414
|
+
|
415
|
+
|
416
|
+
study = optuna.create_study(direction=direction)
|
417
|
+
study.optimize(objective, n_trials=params['n_trials'])
|
418
|
+
|
419
|
+
self.study = study
|
420
|
+
|
421
|
+
# Get the best hyperparameters
|
422
|
+
best_params = study.best_params
|
423
|
+
print(f"Best Hyperparameters: {best_params}")
|
424
|
+
|
425
|
+
return study
|
426
|
+
|
427
|
+
|
428
|
+
|
429
|
+
|
430
|
+
|
431
|
+
|
432
|
+
|
433
|
+
|
434
|
+
|
435
|
+
|
436
|
+
|
437
|
+
|
438
|
+
|
439
|
+
|
440
|
+
|
441
|
+
|
442
|
+
|