MEDfl 2.0.4.dev1__py3-none-any.whl → 2.0.4.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- MEDfl/rw/client.py +98 -29
- MEDfl/rw/model.py +28 -0
- MEDfl/rw/server.py +71 -18
- MEDfl/rw/strategy.py +72 -78
- {MEDfl-2.0.4.dev1.dist-info → MEDfl-2.0.4.dev3.dist-info}/METADATA +1 -1
- MEDfl-2.0.4.dev3.dist-info/RECORD +36 -0
- MEDfl/rw/rwConfig.py +0 -21
- MEDfl/rw/verbose_server.py +0 -21
- MEDfl-2.0.4.dev1.dist-info/RECORD +0 -62
- Medfl/LearningManager/__init__.py +0 -13
- Medfl/LearningManager/client.py +0 -150
- Medfl/LearningManager/dynamicModal.py +0 -287
- Medfl/LearningManager/federated_dataset.py +0 -60
- Medfl/LearningManager/flpipeline.py +0 -192
- Medfl/LearningManager/model.py +0 -223
- Medfl/LearningManager/params.yaml +0 -14
- Medfl/LearningManager/params_optimiser.py +0 -442
- Medfl/LearningManager/plot.py +0 -229
- Medfl/LearningManager/server.py +0 -181
- Medfl/LearningManager/strategy.py +0 -82
- Medfl/LearningManager/utils.py +0 -331
- Medfl/NetManager/__init__.py +0 -10
- Medfl/NetManager/database_connector.py +0 -43
- Medfl/NetManager/dataset.py +0 -92
- Medfl/NetManager/flsetup.py +0 -320
- Medfl/NetManager/net_helper.py +0 -254
- Medfl/NetManager/net_manager_queries.py +0 -142
- Medfl/NetManager/network.py +0 -194
- Medfl/NetManager/node.py +0 -184
- Medfl/__init__.py +0 -3
- Medfl/scripts/__init__.py +0 -2
- Medfl/scripts/base.py +0 -30
- Medfl/scripts/create_db.py +0 -126
- {MEDfl-2.0.4.dev1.dist-info → MEDfl-2.0.4.dev3.dist-info}/LICENSE +0 -0
- {MEDfl-2.0.4.dev1.dist-info → MEDfl-2.0.4.dev3.dist-info}/WHEEL +0 -0
- {MEDfl-2.0.4.dev1.dist-info → MEDfl-2.0.4.dev3.dist-info}/top_level.txt +0 -0
@@ -1,442 +0,0 @@
|
|
1
|
-
import numpy as np
|
2
|
-
import pandas as pd
|
3
|
-
import matplotlib.pyplot as plt
|
4
|
-
import seaborn as sns
|
5
|
-
import torch
|
6
|
-
import torch.nn as nn
|
7
|
-
import torch.optim as optim
|
8
|
-
import torch.nn.functional as F
|
9
|
-
from torch.utils.data import TensorDataset, DataLoader
|
10
|
-
from sklearn.model_selection import GridSearchCV, train_test_split
|
11
|
-
from sklearn.base import BaseEstimator
|
12
|
-
from sklearn.metrics import make_scorer, precision_score, recall_score, accuracy_score, f1_score,roc_auc_score, balanced_accuracy_score
|
13
|
-
import optuna
|
14
|
-
|
15
|
-
from MEDfl.LearningManager.model import Model
|
16
|
-
from MEDfl.LearningManager.strategy import Strategy
|
17
|
-
from MEDfl.LearningManager.server import FlowerServer
|
18
|
-
from MEDfl.LearningManager.flpipeline import FLpipeline
|
19
|
-
|
20
|
-
class BinaryClassifier(nn.Module):
|
21
|
-
def __init__(self, input_size, num_layers, layer_size):
|
22
|
-
super(BinaryClassifier, self).__init__()
|
23
|
-
|
24
|
-
# Input layer
|
25
|
-
self.layers = [nn.Linear(input_size, layer_size)]
|
26
|
-
|
27
|
-
# Hidden layers
|
28
|
-
for _ in range(num_layers - 1):
|
29
|
-
self.layers.append(nn.Linear(layer_size, layer_size))
|
30
|
-
|
31
|
-
# Output layer
|
32
|
-
self.layers.append(nn.Linear(layer_size, 1))
|
33
|
-
|
34
|
-
# ModuleList to handle dynamic number of layers
|
35
|
-
self.layers = nn.ModuleList(self.layers)
|
36
|
-
|
37
|
-
def forward(self, x):
|
38
|
-
for layer in self.layers[:-1]:
|
39
|
-
x = F.relu(layer(x))
|
40
|
-
x = self.layers[-1](x)
|
41
|
-
return x
|
42
|
-
|
43
|
-
class CustomPyTorchClassifier(BaseEstimator):
|
44
|
-
def __init__(self, hidden_dim=10, lr=0.001, pos_weight=1, th=0.5, max_epochs=10, batch_size=32):
|
45
|
-
self.hidden_dim = hidden_dim
|
46
|
-
self.lr = lr
|
47
|
-
self.pos_weight = pos_weight
|
48
|
-
self.max_epochs = max_epochs
|
49
|
-
self.batch_size = batch_size
|
50
|
-
self.th = th
|
51
|
-
self.model = None
|
52
|
-
|
53
|
-
def fit(self, X, y):
|
54
|
-
if isinstance(X, torch.Tensor):
|
55
|
-
X = X.numpy()
|
56
|
-
if isinstance(y, torch.Tensor):
|
57
|
-
y = y.numpy()
|
58
|
-
|
59
|
-
input_dim = X.shape[1]
|
60
|
-
self.model = nn.Sequential(
|
61
|
-
nn.Linear(input_dim, self.hidden_dim),
|
62
|
-
nn.ReLU(),
|
63
|
-
nn.Linear(self.hidden_dim, 1),
|
64
|
-
nn.Sigmoid()
|
65
|
-
)
|
66
|
-
|
67
|
-
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(self.pos_weight))
|
68
|
-
optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
|
69
|
-
|
70
|
-
train_data = TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y).float())
|
71
|
-
train_loader = DataLoader(train_data, batch_size=self.batch_size, shuffle=True)
|
72
|
-
|
73
|
-
self.model.train()
|
74
|
-
for epoch in range(self.max_epochs):
|
75
|
-
for inputs, labels in train_loader:
|
76
|
-
optimizer.zero_grad()
|
77
|
-
outputs = self.model(inputs)
|
78
|
-
loss = criterion(outputs.squeeze(), labels)
|
79
|
-
loss.backward()
|
80
|
-
optimizer.step()
|
81
|
-
return self
|
82
|
-
|
83
|
-
def predict(self, X):
|
84
|
-
if isinstance(X, torch.Tensor):
|
85
|
-
X = X.numpy()
|
86
|
-
|
87
|
-
self.model.eval()
|
88
|
-
with torch.no_grad():
|
89
|
-
outputs = self.model(torch.from_numpy(X).float())
|
90
|
-
predictions = (outputs.squeeze() > self.th).float().numpy()
|
91
|
-
return predictions
|
92
|
-
|
93
|
-
def score(self, X, y):
|
94
|
-
predictions = self.predict(X)
|
95
|
-
return accuracy_score(y, predictions)
|
96
|
-
|
97
|
-
|
98
|
-
class ParamsOptimiser:
|
99
|
-
def __init__(self, X_train = None, y_train=None, X_test=None, y_test=None):
|
100
|
-
if isinstance(X_train, pd.DataFrame):
|
101
|
-
X_train = X_train.to_numpy()
|
102
|
-
if isinstance(y_train, pd.Series):
|
103
|
-
y_train = y_train.to_numpy()
|
104
|
-
if isinstance(X_test, pd.DataFrame):
|
105
|
-
X_test = X_test.to_numpy()
|
106
|
-
if isinstance(y_test, pd.Series):
|
107
|
-
y_test = y_test.to_numpy()
|
108
|
-
|
109
|
-
self.X_train = X_train
|
110
|
-
self.y_train = y_train
|
111
|
-
self.X_test = X_test
|
112
|
-
self.y_test = y_test
|
113
|
-
|
114
|
-
def perform_grid_search(self, param_grid, scoring_metric='recall', cv=3, verbose=1):
|
115
|
-
pytorch_model = CustomPyTorchClassifier()
|
116
|
-
scorer = make_scorer(recall_score, greater_is_better=True)
|
117
|
-
|
118
|
-
if scoring_metric == 'precision':
|
119
|
-
scorer = make_scorer(precision_score)
|
120
|
-
elif scoring_metric == 'accuracy':
|
121
|
-
scorer = make_scorer(accuracy_score)
|
122
|
-
elif scoring_metric == 'recall':
|
123
|
-
scorer = make_scorer(recall_score)
|
124
|
-
elif scoring_metric == 'f1':
|
125
|
-
scorer = make_scorer(f1_score)
|
126
|
-
|
127
|
-
grid_search = GridSearchCV(pytorch_model, param_grid, scoring=scorer, cv=cv, refit=scoring_metric, verbose=verbose)
|
128
|
-
grid_search.fit(self.X_train, self.y_train)
|
129
|
-
|
130
|
-
self.grid_search_results = grid_search # Save the grid search results
|
131
|
-
|
132
|
-
return grid_search
|
133
|
-
|
134
|
-
# Inside the CustomModelTrainer class
|
135
|
-
def plot_results(self, params_to_plot=None):
|
136
|
-
results = pd.DataFrame(self.grid_search_results.cv_results_)
|
137
|
-
|
138
|
-
if params_to_plot is None:
|
139
|
-
# Create a column for configuration details
|
140
|
-
results['config'] = results['params'].apply(lambda x: str(x))
|
141
|
-
|
142
|
-
# Visualize mean test scores along with configurations
|
143
|
-
plt.figure(figsize=(15, 8))
|
144
|
-
bar_plot = plt.bar(results.index, results['mean_test_score'], color='blue', alpha=0.7)
|
145
|
-
plt.xticks(results.index, results['config'], rotation='vertical', fontsize=8)
|
146
|
-
plt.ylabel('Mean Test Score')
|
147
|
-
plt.title('Mean Test Scores for Each Configuration')
|
148
|
-
plt.tight_layout()
|
149
|
-
|
150
|
-
# Add values on top of bars
|
151
|
-
for bar, score in zip(bar_plot, results['mean_test_score']):
|
152
|
-
plt.text(bar.get_x() + bar.get_width() / 2 - 0.15, bar.get_height() + 0.01, f'{score:.3f}', fontsize=8)
|
153
|
-
|
154
|
-
plt.show()
|
155
|
-
return
|
156
|
-
|
157
|
-
try:
|
158
|
-
# Dynamically get the column names for the specified scoring metric
|
159
|
-
mean_test_col = f'mean_test_{params_to_plot[0]}'
|
160
|
-
param_cols = [f'param_{param}' for param in params_to_plot]
|
161
|
-
|
162
|
-
if len(params_to_plot) == 1:
|
163
|
-
# Plotting the heatmap for a single parameter
|
164
|
-
plt.figure(figsize=(8, 6))
|
165
|
-
sns.heatmap(results.pivot_table(index=param_cols[0]),
|
166
|
-
annot=True, cmap='YlGnBu', fmt=".3f", cbar_kws={'label': mean_test_col})
|
167
|
-
plt.title(mean_test_col.capitalize())
|
168
|
-
plt.show()
|
169
|
-
elif len(params_to_plot) == 2:
|
170
|
-
# Create a pair plot for two parameters
|
171
|
-
plt.figure(figsize=(8, 6))
|
172
|
-
scores = results.pivot_table(index=param_cols[0], columns=param_cols[1], values=f'mean_test_score', aggfunc="mean")
|
173
|
-
sns.heatmap(scores, annot=True, cmap='YlGnBu', fmt=".3f", cbar_kws={'label': mean_test_col})
|
174
|
-
plt.title(mean_test_col.capitalize())
|
175
|
-
plt.show()
|
176
|
-
else:
|
177
|
-
print("Invalid number of parameters to plot. You can provide either one or two parameters.")
|
178
|
-
except KeyError as e:
|
179
|
-
print(f"Error: {e}. Make sure the specified scoring metric exists in the results DataFrame.")
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
def optuna_optimisation(self, direction, params):
|
184
|
-
# Create the data loaders here
|
185
|
-
train_data = TensorDataset(torch.from_numpy(self.X_train).float(), torch.from_numpy(self.y_train).float())
|
186
|
-
test_data = TensorDataset(torch.from_numpy(self.X_test).float(), torch.from_numpy(self.y_test).float())
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
def objective(trial):
|
191
|
-
|
192
|
-
batch_size=trial.suggest_int('batch_size', **params['batch_size'])
|
193
|
-
|
194
|
-
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
|
195
|
-
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
|
196
|
-
|
197
|
-
# Create the model with the suggested hyperparameters
|
198
|
-
model = BinaryClassifier(input_size=self.X_train.shape[1],
|
199
|
-
num_layers=trial.suggest_int('num_layers', **params['num_layers']) ,
|
200
|
-
layer_size=trial.suggest_int('hidden_size', **params['hidden_size']))
|
201
|
-
|
202
|
-
# Define the loss function and optimizer
|
203
|
-
criterion = nn.BCEWithLogitsLoss()
|
204
|
-
optimizer_name = trial.suggest_categorical('optimizer', params['optimizer'])
|
205
|
-
learning_rate = trial.suggest_float('learning_rate', **params['learning_rate'])
|
206
|
-
|
207
|
-
|
208
|
-
if optimizer_name == 'Adam':
|
209
|
-
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
210
|
-
elif optimizer_name == 'SGD':
|
211
|
-
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
|
212
|
-
elif optimizer_name == 'RMSprop':
|
213
|
-
optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
|
214
|
-
|
215
|
-
# Training loop
|
216
|
-
num_epochs = trial.suggest_int('num_epochs', **params['num_epochs'])
|
217
|
-
for epoch in range(num_epochs):
|
218
|
-
model.train()
|
219
|
-
for batch_X, batch_y in train_loader:
|
220
|
-
optimizer.zero_grad()
|
221
|
-
outputs = model(batch_X)
|
222
|
-
loss = criterion(outputs.squeeze(), batch_y)
|
223
|
-
loss.backward()
|
224
|
-
optimizer.step()
|
225
|
-
|
226
|
-
# Evaluation
|
227
|
-
model.eval()
|
228
|
-
predictions = []
|
229
|
-
true_labels = []
|
230
|
-
with torch.no_grad():
|
231
|
-
for batch_X, batch_y in test_loader:
|
232
|
-
outputs = model(batch_X)
|
233
|
-
predictions.extend(torch.sigmoid(outputs).numpy())
|
234
|
-
true_labels.extend(batch_y.numpy())
|
235
|
-
|
236
|
-
# Calculate F1 score
|
237
|
-
# f1 = f1_score(true_labels, (np.array(predictions) > 0.5).astype(int))
|
238
|
-
auc = roc_auc_score(true_labels, predictions)
|
239
|
-
|
240
|
-
trial.report(auc, epoch)
|
241
|
-
|
242
|
-
# Handle pruning based on the intermediate value
|
243
|
-
if trial.should_prune():
|
244
|
-
raise optuna.TrialPruned()
|
245
|
-
|
246
|
-
return auc
|
247
|
-
|
248
|
-
# Create an Optuna study
|
249
|
-
study = optuna.create_study(direction=direction)
|
250
|
-
study.optimize(objective, n_trials=params['n_trials'])
|
251
|
-
|
252
|
-
self.study = study
|
253
|
-
|
254
|
-
# Get the best hyperparameters
|
255
|
-
best_params = study.best_params
|
256
|
-
print(f"Best Hyperparameters: {best_params}")
|
257
|
-
|
258
|
-
return study
|
259
|
-
|
260
|
-
def train_optimized_model(self ,trial ,th_min , th_max):
|
261
|
-
|
262
|
-
best_params = self.study.best_params
|
263
|
-
|
264
|
-
threshold = trial.suggest_float('threashhold', th_min, th_max, log=True)
|
265
|
-
|
266
|
-
train_data = TensorDataset(torch.from_numpy(self.X_train).float(), torch.from_numpy(self.y_train).float())
|
267
|
-
test_data = TensorDataset(torch.from_numpy(self.X_test).float(), torch.from_numpy(self.y_test).float())
|
268
|
-
|
269
|
-
train_loader = DataLoader(train_data, batch_size=best_params['batch_size'], shuffle=True)
|
270
|
-
test_loader = DataLoader(test_data, batch_size=best_params['batch_size'], shuffle=False)
|
271
|
-
|
272
|
-
|
273
|
-
# Use the best hyperparameters to train the final model
|
274
|
-
final_model = BinaryClassifier(input_size=self.X_train.shape[1], layer_size=best_params['hidden_size'] , num_layers=best_params['num_layers'])
|
275
|
-
final_optimizer = self.get_optimizer(best_params['optimizer'], final_model.parameters(), best_params['learning_rate'])
|
276
|
-
final_criterion = nn.BCEWithLogitsLoss()
|
277
|
-
|
278
|
-
num_epochs = best_params['num_epochs']
|
279
|
-
for epoch in range(num_epochs):
|
280
|
-
final_model.train()
|
281
|
-
for batch_X, batch_y in train_loader:
|
282
|
-
final_optimizer.zero_grad()
|
283
|
-
outputs = final_model(batch_X)
|
284
|
-
loss = final_criterion(outputs.squeeze(), batch_y)
|
285
|
-
loss.backward()
|
286
|
-
final_optimizer.step()
|
287
|
-
|
288
|
-
# Evaluate the final model on the test set
|
289
|
-
final_model.eval()
|
290
|
-
with torch.no_grad():
|
291
|
-
predictions = []
|
292
|
-
true_labels = []
|
293
|
-
for batch_X, batch_y in test_loader:
|
294
|
-
outputs = final_model(batch_X)
|
295
|
-
predictions.extend(torch.sigmoid(outputs).numpy())
|
296
|
-
true_labels.extend(batch_y.numpy())
|
297
|
-
|
298
|
-
final_balanced_acc = balanced_accuracy_score(true_labels, (np.array(predictions) > threshold).astype(int))
|
299
|
-
print(f"Model balanced accuracy: {final_balanced_acc}")
|
300
|
-
|
301
|
-
return final_balanced_acc
|
302
|
-
|
303
|
-
def get_optimizer(self, optimizer_name, parameters, learning_rate):
|
304
|
-
if optimizer_name == 'Adam':
|
305
|
-
return optim.Adam(parameters, lr=learning_rate)
|
306
|
-
elif optimizer_name == 'SGD':
|
307
|
-
return optim.SGD(parameters, lr=learning_rate)
|
308
|
-
elif optimizer_name == 'RMSprop':
|
309
|
-
return optim.RMSprop(parameters, lr=learning_rate)
|
310
|
-
else:
|
311
|
-
raise ValueError(f"Unknown optimizer: {optimizer_name}")
|
312
|
-
|
313
|
-
def perform_grid_search(self, param_grid, scoring_metric='recall', cv=3, verbose=1):
|
314
|
-
pytorch_model = CustomPyTorchClassifier()
|
315
|
-
scorer = make_scorer(recall_score, greater_is_better=True)
|
316
|
-
|
317
|
-
if scoring_metric == 'precision':
|
318
|
-
scorer = make_scorer(precision_score)
|
319
|
-
elif scoring_metric == 'accuracy':
|
320
|
-
scorer = make_scorer(accuracy_score)
|
321
|
-
elif scoring_metric == 'recall':
|
322
|
-
scorer = make_scorer(recall_score)
|
323
|
-
elif scoring_metric == 'f1':
|
324
|
-
scorer = make_scorer(f1_score)
|
325
|
-
|
326
|
-
grid_search = GridSearchCV(pytorch_model, param_grid, scoring=scorer, cv=cv, refit=scoring_metric, verbose=verbose)
|
327
|
-
grid_search.fit(self.X_train, self.y_train)
|
328
|
-
|
329
|
-
self.grid_search_results = grid_search # Save the grid search results
|
330
|
-
|
331
|
-
return grid_search
|
332
|
-
|
333
|
-
|
334
|
-
def plot_param_importances(self):
|
335
|
-
return optuna.visualization.plot_param_importances(self.study)
|
336
|
-
|
337
|
-
def plot_slice(self , params):
|
338
|
-
return optuna.visualization.plot_slice(self.study , params=params)
|
339
|
-
|
340
|
-
def plot_parallel_coordinate(self):
|
341
|
-
return optuna.visualization.plot_parallel_coordinate(self.study)
|
342
|
-
|
343
|
-
def plot_rank(self , params=None):
|
344
|
-
return optuna.visualization.plot_rank(self.study , params=params)
|
345
|
-
|
346
|
-
def plot_optimization_history(self):
|
347
|
-
return optuna.visualization.plot_optimization_history(self.study)
|
348
|
-
|
349
|
-
def optimize_model_threashhold(self , n_trials , th_min , th_max):
|
350
|
-
additional_params = {'th_min': th_min, 'th_max': th_max}
|
351
|
-
|
352
|
-
th_study = optuna.create_study(direction='maximize')
|
353
|
-
th_study.optimize(lambda trial: self.train_optimized_model(trial , **additional_params) , n_trials)
|
354
|
-
|
355
|
-
# Get the best hyperparameters
|
356
|
-
best_params = th_study.best_params
|
357
|
-
print(f"Best Hyperparameters: {best_params}")
|
358
|
-
|
359
|
-
return optuna.visualization.plot_rank(th_study , params=['threashhold'])
|
360
|
-
|
361
|
-
def federated_params_iptim(self , params , direction, model, fl_dataset):
|
362
|
-
|
363
|
-
def objective(trial):
|
364
|
-
|
365
|
-
criterion = nn.BCEWithLogitsLoss()
|
366
|
-
|
367
|
-
optimizer_name = trial.suggest_categorical('optimizer', params['optimizer'])
|
368
|
-
learning_rate = trial.suggest_float('learning_rate', **params['learning_rate'])
|
369
|
-
num_rounds = trial.suggest_int('num_rounds', **params['num_rounds'])
|
370
|
-
diff_privacy = trial.suggest_int('diff_privacy', **params['diff_privacy'])
|
371
|
-
diff_privacy = True if diff_privacy == 1 else False
|
372
|
-
|
373
|
-
if optimizer_name == 'Adam':
|
374
|
-
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
375
|
-
elif optimizer_name == 'SGD':
|
376
|
-
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
|
377
|
-
elif optimizer_name == 'RMSprop':
|
378
|
-
optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
|
379
|
-
|
380
|
-
# Creating a new Model instance using the specific model created by DynamicModel
|
381
|
-
global_model = Model(model, optimizer, criterion)
|
382
|
-
|
383
|
-
# Get the initial params of the model
|
384
|
-
init_params = global_model.get_parameters()
|
385
|
-
|
386
|
-
fl_strategy = trial.suggest_categorical('fl_strategy', params['fl_strategy'])
|
387
|
-
|
388
|
-
learning_strategy = Strategy(fl_strategy,
|
389
|
-
fraction_fit = 1.0 ,
|
390
|
-
fraction_evaluate = 1.0,
|
391
|
-
min_fit_clients = 2,
|
392
|
-
min_evaluate_clients = 2,
|
393
|
-
min_available_clients = 2 ,
|
394
|
-
initial_parameters=init_params)
|
395
|
-
|
396
|
-
learning_strategy.create_strategy()
|
397
|
-
|
398
|
-
# Create The server
|
399
|
-
server = FlowerServer(global_model, strategy = learning_strategy, num_rounds = num_rounds,
|
400
|
-
num_clients = len(fl_dataset.trainloaders),
|
401
|
-
fed_dataset = fl_dataset,diff_privacy = diff_privacy,
|
402
|
-
# You can change the resources alocated for each client based on your machine
|
403
|
-
client_resources={'num_cpus': 1.0, 'num_gpus': 0.0}
|
404
|
-
)
|
405
|
-
|
406
|
-
ppl_1 = FLpipeline( name ="the first fl_pipeline",description = "this is our first FL pipeline",
|
407
|
-
server = server)
|
408
|
-
|
409
|
-
# Run the Traning of the model
|
410
|
-
history = ppl_1.server.run()
|
411
|
-
|
412
|
-
return server.auc[len(server.auc)-1]
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
study = optuna.create_study(direction=direction)
|
417
|
-
study.optimize(objective, n_trials=params['n_trials'])
|
418
|
-
|
419
|
-
self.study = study
|
420
|
-
|
421
|
-
# Get the best hyperparameters
|
422
|
-
best_params = study.best_params
|
423
|
-
print(f"Best Hyperparameters: {best_params}")
|
424
|
-
|
425
|
-
return study
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
Medfl/LearningManager/plot.py
DELETED
@@ -1,229 +0,0 @@
|
|
1
|
-
import matplotlib.pyplot as plt
|
2
|
-
import numpy as np
|
3
|
-
import seaborn as sns
|
4
|
-
|
5
|
-
from .utils import *
|
6
|
-
|
7
|
-
# Replace this with your actual code for data collection
|
8
|
-
results_dict = {
|
9
|
-
("LR: 0.001, Optimizer: Adam", "accuracy"): [0.85, 0.89, 0.92, 0.94, ...],
|
10
|
-
("LR: 0.001, Optimizer: Adam", "loss"): [0.2, 0.15, 0.1, 0.08, ...],
|
11
|
-
("LR: 0.01, Optimizer: SGD", "accuracy"): [0.88, 0.91, 0.93, 0.95, ...],
|
12
|
-
("LR: 0.01, Optimizer: SGD", "loss"): [0.18, 0.13, 0.09, 0.07, ...],
|
13
|
-
("LR: 0.1, Optimizer: Adam", "accuracy"): [0.82, 0.87, 0.91, 0.93, ...],
|
14
|
-
("LR: 0.1, Optimizer: Adam", "loss"): [0.25, 0.2, 0.15, 0.12, ...],
|
15
|
-
}
|
16
|
-
"""
|
17
|
-
server should have:
|
18
|
-
#len = num of rounds
|
19
|
-
self.accuracies
|
20
|
-
self.losses
|
21
|
-
|
22
|
-
Client should have
|
23
|
-
# len = num of epochs
|
24
|
-
self.accuracies
|
25
|
-
self.losses
|
26
|
-
self.epsilons
|
27
|
-
self.deltas
|
28
|
-
|
29
|
-
#common things : LR,SGD, Aggregation
|
30
|
-
|
31
|
-
"""
|
32
|
-
|
33
|
-
|
34
|
-
class AccuracyLossPlotter:
|
35
|
-
"""
|
36
|
-
A utility class for plotting accuracy and loss metrics based on experiment results.
|
37
|
-
|
38
|
-
Args:
|
39
|
-
results_dict (dict): Dictionary containing experiment results organized by parameters and metrics.
|
40
|
-
|
41
|
-
Attributes:
|
42
|
-
results_dict (dict): Dictionary containing experiment results organized by parameters and metrics.
|
43
|
-
parameters (list): List of unique parameters in the experiment results.
|
44
|
-
metrics (list): List of unique metrics in the experiment results.
|
45
|
-
iterations (range): Range of iterations (rounds or epochs) in the experiment.
|
46
|
-
"""
|
47
|
-
|
48
|
-
def __init__(self, results_dict):
|
49
|
-
"""
|
50
|
-
Initialize the AccuracyLossPlotter with experiment results.
|
51
|
-
|
52
|
-
Args:
|
53
|
-
results_dict (dict): Dictionary containing experiment results organized by parameters and metrics.
|
54
|
-
"""
|
55
|
-
self.results_dict = results_dict
|
56
|
-
self.parameters = list(
|
57
|
-
set([param[0] for param in results_dict.keys()])
|
58
|
-
)
|
59
|
-
self.metrics = list(set([param[1] for param in results_dict.keys()]))
|
60
|
-
self.iterations = range(1, len(list(results_dict.values())[0]) + 1)
|
61
|
-
|
62
|
-
def plot_accuracy_loss(self):
|
63
|
-
"""
|
64
|
-
Plot accuracy and loss metrics for different parameters.
|
65
|
-
"""
|
66
|
-
|
67
|
-
plt.figure(figsize=(8, 6))
|
68
|
-
|
69
|
-
for param in self.parameters:
|
70
|
-
for metric in self.metrics:
|
71
|
-
key = (param, metric)
|
72
|
-
values = self.results_dict[key]
|
73
|
-
plt.plot(
|
74
|
-
self.iterations,
|
75
|
-
values,
|
76
|
-
label=f"{param} ({metric})",
|
77
|
-
marker="o",
|
78
|
-
linestyle="-",
|
79
|
-
)
|
80
|
-
|
81
|
-
plt.xlabel("Rounds")
|
82
|
-
plt.ylabel("Accuracy / Loss")
|
83
|
-
plt.title("Accuracy and Loss by Parameters")
|
84
|
-
plt.legend()
|
85
|
-
plt.grid(True)
|
86
|
-
plt.show()
|
87
|
-
|
88
|
-
@staticmethod
|
89
|
-
def plot_global_confusion_matrix(pipeline_name: str):
|
90
|
-
"""
|
91
|
-
Plot a global confusion matrix based on pipeline results.
|
92
|
-
|
93
|
-
Args:
|
94
|
-
pipeline_name (str): Name of the pipeline.
|
95
|
-
|
96
|
-
Returns:
|
97
|
-
None
|
98
|
-
"""
|
99
|
-
# Get the id of the pipeline by name
|
100
|
-
pipeline_id = get_pipeline_from_name(pipeline_name)
|
101
|
-
# get the confusion matrix pf the pipeline
|
102
|
-
confusion_matrix = get_pipeline_confusion_matrix(pipeline_id)
|
103
|
-
|
104
|
-
# Extracting confusion matrix values
|
105
|
-
TP = confusion_matrix['TP']
|
106
|
-
FP = confusion_matrix['FP']
|
107
|
-
FN = confusion_matrix['FN']
|
108
|
-
TN = confusion_matrix['TN']
|
109
|
-
|
110
|
-
# Creating a matrix for visualization
|
111
|
-
matrix = [[TN, FP],
|
112
|
-
[FN, TP]]
|
113
|
-
|
114
|
-
# Plotting the confusion matrix as a heatmap
|
115
|
-
plt.figure(figsize=(6, 4))
|
116
|
-
sns.heatmap(matrix, annot=True, fmt='d', cmap='Blues',
|
117
|
-
xticklabels=['Predicted Negative', 'Predicted Positive'],
|
118
|
-
yticklabels=['Actual Negative', 'Actual Positive'])
|
119
|
-
plt.title('Global Confusion Matrix')
|
120
|
-
plt.xlabel('Predicted label')
|
121
|
-
plt.ylabel('True label')
|
122
|
-
plt.tight_layout()
|
123
|
-
|
124
|
-
# Display the confusion matrix heatmap
|
125
|
-
plt.show()
|
126
|
-
|
127
|
-
@staticmethod
|
128
|
-
def plot_confusion_Matrix_by_node(node_name: str, pipeline_name: str):
|
129
|
-
"""
|
130
|
-
Plot a confusion matrix for a specific node in the pipeline.
|
131
|
-
|
132
|
-
Args:
|
133
|
-
node_name (str): Name of the node.
|
134
|
-
pipeline_name (str): Name of the pipeline.
|
135
|
-
|
136
|
-
Returns:
|
137
|
-
None
|
138
|
-
"""
|
139
|
-
|
140
|
-
# Get the id of the pipeline by name
|
141
|
-
pipeline_id = get_pipeline_from_name(pipeline_name)
|
142
|
-
# get the confusion matrix pf the pipeline
|
143
|
-
confusion_matrix = get_node_confusion_matrix(
|
144
|
-
pipeline_id, node_name=node_name)
|
145
|
-
|
146
|
-
# Extracting confusion matrix values
|
147
|
-
TP = confusion_matrix['TP']
|
148
|
-
FP = confusion_matrix['FP']
|
149
|
-
FN = confusion_matrix['FN']
|
150
|
-
TN = confusion_matrix['TN']
|
151
|
-
|
152
|
-
# Creating a matrix for visualization
|
153
|
-
matrix = [[TN, FP],
|
154
|
-
[FN, TP]]
|
155
|
-
|
156
|
-
# Plotting the confusion matrix as a heatmap
|
157
|
-
plt.figure(figsize=(6, 4))
|
158
|
-
sns.heatmap(matrix, annot=True, fmt='d', cmap='Blues',
|
159
|
-
xticklabels=['Predicted Negative', 'Predicted Positive'],
|
160
|
-
yticklabels=['Actual Negative', 'Actual Positive'])
|
161
|
-
plt.title('Confusion Matrix of node: '+node_name)
|
162
|
-
plt.xlabel('Predicted label')
|
163
|
-
plt.ylabel('True label')
|
164
|
-
plt.tight_layout()
|
165
|
-
|
166
|
-
# Display the confusion matrix heatmap
|
167
|
-
plt.show()
|
168
|
-
return
|
169
|
-
|
170
|
-
@staticmethod
|
171
|
-
def plot_classification_report(pipeline_name: str):
|
172
|
-
"""
|
173
|
-
Plot a comparison of classification report metrics between nodes.
|
174
|
-
|
175
|
-
Args:
|
176
|
-
pipeline_name (str): Name of the pipeline.
|
177
|
-
|
178
|
-
Returns:
|
179
|
-
None
|
180
|
-
"""
|
181
|
-
|
182
|
-
colors = ['#FF5733', '#6A5ACD', '#3CB371', '#FFD700', '#FFA500', '#8A2BE2', '#00FFFF', '#FF00FF', '#A52A2A', '#00FF00']
|
183
|
-
|
184
|
-
# Get the id of the pipeline by name
|
185
|
-
pipeline_id = get_pipeline_from_name(pipeline_name)
|
186
|
-
|
187
|
-
pipeline_results = get_pipeline_result(pipeline_id)
|
188
|
-
|
189
|
-
nodesList = pipeline_results['nodename']
|
190
|
-
classificationReports = []
|
191
|
-
|
192
|
-
for index, node in enumerate(nodesList):
|
193
|
-
classificationReports.append({
|
194
|
-
'Accuracy': pipeline_results['accuracy'][index],
|
195
|
-
'Sensitivity/Recall': pipeline_results['sensivity'][index],
|
196
|
-
'PPV/Precision': pipeline_results['ppv'][index],
|
197
|
-
'NPV': pipeline_results['npv'][index],
|
198
|
-
'F1-score': pipeline_results['f1score'][index],
|
199
|
-
'False positive rate': pipeline_results['fpr'][index],
|
200
|
-
'True positive rate': pipeline_results['tpr'][index]
|
201
|
-
})
|
202
|
-
|
203
|
-
metric_labels = list(classificationReports[0].keys()) # Assuming both reports have the same keys
|
204
|
-
|
205
|
-
# Set the positions of the bars on the x-axis
|
206
|
-
x = np.arange(len(metric_labels))
|
207
|
-
|
208
|
-
# Set the width of the bars
|
209
|
-
width = 0.35
|
210
|
-
|
211
|
-
plt.figure(figsize=(12, 6))
|
212
|
-
|
213
|
-
for index, report in enumerate(classificationReports):
|
214
|
-
metric = list(report.values())
|
215
|
-
plt.bar(x + (index - len(nodesList) / 2) * width / len(nodesList), metric, width / len(nodesList),
|
216
|
-
label=nodesList[index], color=colors[index % len(colors)])
|
217
|
-
|
218
|
-
# Adding labels, title, and legend
|
219
|
-
plt.xlabel('Metrics')
|
220
|
-
plt.ylabel('Values')
|
221
|
-
plt.title('Comparison of Classification Report Metrics between Nodes')
|
222
|
-
plt.xticks(ticks=x, labels=metric_labels, rotation=45)
|
223
|
-
plt.legend()
|
224
|
-
|
225
|
-
# Show plot
|
226
|
-
plt.tight_layout()
|
227
|
-
plt.show()
|
228
|
-
|
229
|
-
return
|