MEDfl 0.2.1__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. MEDfl/LearningManager/__init__.py +13 -13
  2. MEDfl/LearningManager/client.py +150 -181
  3. MEDfl/LearningManager/dynamicModal.py +287 -287
  4. MEDfl/LearningManager/federated_dataset.py +60 -60
  5. MEDfl/LearningManager/flpipeline.py +192 -192
  6. MEDfl/LearningManager/model.py +223 -223
  7. MEDfl/LearningManager/params.yaml +14 -14
  8. MEDfl/LearningManager/params_optimiser.py +442 -442
  9. MEDfl/LearningManager/plot.py +229 -229
  10. MEDfl/LearningManager/server.py +181 -189
  11. MEDfl/LearningManager/strategy.py +82 -138
  12. MEDfl/LearningManager/utils.py +331 -331
  13. MEDfl/NetManager/__init__.py +10 -10
  14. MEDfl/NetManager/database_connector.py +43 -43
  15. MEDfl/NetManager/dataset.py +92 -92
  16. MEDfl/NetManager/flsetup.py +320 -320
  17. MEDfl/NetManager/net_helper.py +254 -254
  18. MEDfl/NetManager/net_manager_queries.py +142 -142
  19. MEDfl/NetManager/network.py +194 -194
  20. MEDfl/NetManager/node.py +184 -184
  21. MEDfl/__init__.py +4 -3
  22. MEDfl/scripts/__init__.py +1 -1
  23. MEDfl/scripts/base.py +29 -29
  24. MEDfl/scripts/create_db.py +126 -126
  25. Medfl/LearningManager/__init__.py +13 -0
  26. Medfl/LearningManager/client.py +150 -0
  27. Medfl/LearningManager/dynamicModal.py +287 -0
  28. Medfl/LearningManager/federated_dataset.py +60 -0
  29. Medfl/LearningManager/flpipeline.py +192 -0
  30. Medfl/LearningManager/model.py +223 -0
  31. Medfl/LearningManager/params.yaml +14 -0
  32. Medfl/LearningManager/params_optimiser.py +442 -0
  33. Medfl/LearningManager/plot.py +229 -0
  34. Medfl/LearningManager/server.py +181 -0
  35. Medfl/LearningManager/strategy.py +82 -0
  36. Medfl/LearningManager/utils.py +331 -0
  37. Medfl/NetManager/__init__.py +10 -0
  38. Medfl/NetManager/database_connector.py +43 -0
  39. Medfl/NetManager/dataset.py +92 -0
  40. Medfl/NetManager/flsetup.py +320 -0
  41. Medfl/NetManager/net_helper.py +254 -0
  42. Medfl/NetManager/net_manager_queries.py +142 -0
  43. Medfl/NetManager/network.py +194 -0
  44. Medfl/NetManager/node.py +184 -0
  45. Medfl/__init__.py +3 -0
  46. Medfl/scripts/__init__.py +2 -0
  47. Medfl/scripts/base.py +30 -0
  48. Medfl/scripts/create_db.py +126 -0
  49. alembic/env.py +61 -61
  50. {MEDfl-0.2.1.dist-info → medfl-2.0.1.dist-info}/METADATA +120 -108
  51. medfl-2.0.1.dist-info/RECORD +55 -0
  52. {MEDfl-0.2.1.dist-info → medfl-2.0.1.dist-info}/WHEEL +1 -1
  53. {MEDfl-0.2.1.dist-info → medfl-2.0.1.dist-info/licenses}/LICENSE +674 -674
  54. MEDfl-0.2.1.dist-info/RECORD +0 -31
  55. {MEDfl-0.2.1.dist-info → medfl-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,442 +1,442 @@
1
- import numpy as np
2
- import pandas as pd
3
- import matplotlib.pyplot as plt
4
- import seaborn as sns
5
- import torch
6
- import torch.nn as nn
7
- import torch.optim as optim
8
- import torch.nn.functional as F
9
- from torch.utils.data import TensorDataset, DataLoader
10
- from sklearn.model_selection import GridSearchCV, train_test_split
11
- from sklearn.base import BaseEstimator
12
- from sklearn.metrics import make_scorer, precision_score, recall_score, accuracy_score, f1_score,roc_auc_score, balanced_accuracy_score
13
- import optuna
14
-
15
- from MEDfl.LearningManager.model import Model
16
- from MEDfl.LearningManager.strategy import Strategy
17
- from MEDfl.LearningManager.server import FlowerServer
18
- from MEDfl.LearningManager.flpipeline import FLpipeline
19
-
20
- class BinaryClassifier(nn.Module):
21
- def __init__(self, input_size, num_layers, layer_size):
22
- super(BinaryClassifier, self).__init__()
23
-
24
- # Input layer
25
- self.layers = [nn.Linear(input_size, layer_size)]
26
-
27
- # Hidden layers
28
- for _ in range(num_layers - 1):
29
- self.layers.append(nn.Linear(layer_size, layer_size))
30
-
31
- # Output layer
32
- self.layers.append(nn.Linear(layer_size, 1))
33
-
34
- # ModuleList to handle dynamic number of layers
35
- self.layers = nn.ModuleList(self.layers)
36
-
37
- def forward(self, x):
38
- for layer in self.layers[:-1]:
39
- x = F.relu(layer(x))
40
- x = self.layers[-1](x)
41
- return x
42
-
43
- class CustomPyTorchClassifier(BaseEstimator):
44
- def __init__(self, hidden_dim=10, lr=0.001, pos_weight=1, th=0.5, max_epochs=10, batch_size=32):
45
- self.hidden_dim = hidden_dim
46
- self.lr = lr
47
- self.pos_weight = pos_weight
48
- self.max_epochs = max_epochs
49
- self.batch_size = batch_size
50
- self.th = th
51
- self.model = None
52
-
53
- def fit(self, X, y):
54
- if isinstance(X, torch.Tensor):
55
- X = X.numpy()
56
- if isinstance(y, torch.Tensor):
57
- y = y.numpy()
58
-
59
- input_dim = X.shape[1]
60
- self.model = nn.Sequential(
61
- nn.Linear(input_dim, self.hidden_dim),
62
- nn.ReLU(),
63
- nn.Linear(self.hidden_dim, 1),
64
- nn.Sigmoid()
65
- )
66
-
67
- criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(self.pos_weight))
68
- optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
69
-
70
- train_data = TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y).float())
71
- train_loader = DataLoader(train_data, batch_size=self.batch_size, shuffle=True)
72
-
73
- self.model.train()
74
- for epoch in range(self.max_epochs):
75
- for inputs, labels in train_loader:
76
- optimizer.zero_grad()
77
- outputs = self.model(inputs)
78
- loss = criterion(outputs.squeeze(), labels)
79
- loss.backward()
80
- optimizer.step()
81
- return self
82
-
83
- def predict(self, X):
84
- if isinstance(X, torch.Tensor):
85
- X = X.numpy()
86
-
87
- self.model.eval()
88
- with torch.no_grad():
89
- outputs = self.model(torch.from_numpy(X).float())
90
- predictions = (outputs.squeeze() > self.th).float().numpy()
91
- return predictions
92
-
93
- def score(self, X, y):
94
- predictions = self.predict(X)
95
- return accuracy_score(y, predictions)
96
-
97
-
98
- class ParamsOptimiser:
99
- def __init__(self, X_train = None, y_train=None, X_test=None, y_test=None):
100
- if isinstance(X_train, pd.DataFrame):
101
- X_train = X_train.to_numpy()
102
- if isinstance(y_train, pd.Series):
103
- y_train = y_train.to_numpy()
104
- if isinstance(X_test, pd.DataFrame):
105
- X_test = X_test.to_numpy()
106
- if isinstance(y_test, pd.Series):
107
- y_test = y_test.to_numpy()
108
-
109
- self.X_train = X_train
110
- self.y_train = y_train
111
- self.X_test = X_test
112
- self.y_test = y_test
113
-
114
- def perform_grid_search(self, param_grid, scoring_metric='recall', cv=3, verbose=1):
115
- pytorch_model = CustomPyTorchClassifier()
116
- scorer = make_scorer(recall_score, greater_is_better=True)
117
-
118
- if scoring_metric == 'precision':
119
- scorer = make_scorer(precision_score)
120
- elif scoring_metric == 'accuracy':
121
- scorer = make_scorer(accuracy_score)
122
- elif scoring_metric == 'recall':
123
- scorer = make_scorer(recall_score)
124
- elif scoring_metric == 'f1':
125
- scorer = make_scorer(f1_score)
126
-
127
- grid_search = GridSearchCV(pytorch_model, param_grid, scoring=scorer, cv=cv, refit=scoring_metric, verbose=verbose)
128
- grid_search.fit(self.X_train, self.y_train)
129
-
130
- self.grid_search_results = grid_search # Save the grid search results
131
-
132
- return grid_search
133
-
134
- # Inside the CustomModelTrainer class
135
- def plot_results(self, params_to_plot=None):
136
- results = pd.DataFrame(self.grid_search_results.cv_results_)
137
-
138
- if params_to_plot is None:
139
- # Create a column for configuration details
140
- results['config'] = results['params'].apply(lambda x: str(x))
141
-
142
- # Visualize mean test scores along with configurations
143
- plt.figure(figsize=(15, 8))
144
- bar_plot = plt.bar(results.index, results['mean_test_score'], color='blue', alpha=0.7)
145
- plt.xticks(results.index, results['config'], rotation='vertical', fontsize=8)
146
- plt.ylabel('Mean Test Score')
147
- plt.title('Mean Test Scores for Each Configuration')
148
- plt.tight_layout()
149
-
150
- # Add values on top of bars
151
- for bar, score in zip(bar_plot, results['mean_test_score']):
152
- plt.text(bar.get_x() + bar.get_width() / 2 - 0.15, bar.get_height() + 0.01, f'{score:.3f}', fontsize=8)
153
-
154
- plt.show()
155
- return
156
-
157
- try:
158
- # Dynamically get the column names for the specified scoring metric
159
- mean_test_col = f'mean_test_{params_to_plot[0]}'
160
- param_cols = [f'param_{param}' for param in params_to_plot]
161
-
162
- if len(params_to_plot) == 1:
163
- # Plotting the heatmap for a single parameter
164
- plt.figure(figsize=(8, 6))
165
- sns.heatmap(results.pivot_table(index=param_cols[0]),
166
- annot=True, cmap='YlGnBu', fmt=".3f", cbar_kws={'label': mean_test_col})
167
- plt.title(mean_test_col.capitalize())
168
- plt.show()
169
- elif len(params_to_plot) == 2:
170
- # Create a pair plot for two parameters
171
- plt.figure(figsize=(8, 6))
172
- scores = results.pivot_table(index=param_cols[0], columns=param_cols[1], values=f'mean_test_score', aggfunc="mean")
173
- sns.heatmap(scores, annot=True, cmap='YlGnBu', fmt=".3f", cbar_kws={'label': mean_test_col})
174
- plt.title(mean_test_col.capitalize())
175
- plt.show()
176
- else:
177
- print("Invalid number of parameters to plot. You can provide either one or two parameters.")
178
- except KeyError as e:
179
- print(f"Error: {e}. Make sure the specified scoring metric exists in the results DataFrame.")
180
-
181
-
182
-
183
- def optuna_optimisation(self, direction, params):
184
- # Create the data loaders here
185
- train_data = TensorDataset(torch.from_numpy(self.X_train).float(), torch.from_numpy(self.y_train).float())
186
- test_data = TensorDataset(torch.from_numpy(self.X_test).float(), torch.from_numpy(self.y_test).float())
187
-
188
-
189
-
190
- def objective(trial):
191
-
192
- batch_size=trial.suggest_int('batch_size', **params['batch_size'])
193
-
194
- train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
195
- test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
196
-
197
- # Create the model with the suggested hyperparameters
198
- model = BinaryClassifier(input_size=self.X_train.shape[1],
199
- num_layers=trial.suggest_int('num_layers', **params['num_layers']) ,
200
- layer_size=trial.suggest_int('hidden_size', **params['hidden_size']))
201
-
202
- # Define the loss function and optimizer
203
- criterion = nn.BCEWithLogitsLoss()
204
- optimizer_name = trial.suggest_categorical('optimizer', params['optimizer'])
205
- learning_rate = trial.suggest_float('learning_rate', **params['learning_rate'])
206
-
207
-
208
- if optimizer_name == 'Adam':
209
- optimizer = optim.Adam(model.parameters(), lr=learning_rate)
210
- elif optimizer_name == 'SGD':
211
- optimizer = optim.SGD(model.parameters(), lr=learning_rate)
212
- elif optimizer_name == 'RMSprop':
213
- optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
214
-
215
- # Training loop
216
- num_epochs = trial.suggest_int('num_epochs', **params['num_epochs'])
217
- for epoch in range(num_epochs):
218
- model.train()
219
- for batch_X, batch_y in train_loader:
220
- optimizer.zero_grad()
221
- outputs = model(batch_X)
222
- loss = criterion(outputs.squeeze(), batch_y)
223
- loss.backward()
224
- optimizer.step()
225
-
226
- # Evaluation
227
- model.eval()
228
- predictions = []
229
- true_labels = []
230
- with torch.no_grad():
231
- for batch_X, batch_y in test_loader:
232
- outputs = model(batch_X)
233
- predictions.extend(torch.sigmoid(outputs).numpy())
234
- true_labels.extend(batch_y.numpy())
235
-
236
- # Calculate F1 score
237
- # f1 = f1_score(true_labels, (np.array(predictions) > 0.5).astype(int))
238
- auc = roc_auc_score(true_labels, predictions)
239
-
240
- trial.report(auc, epoch)
241
-
242
- # Handle pruning based on the intermediate value
243
- if trial.should_prune():
244
- raise optuna.TrialPruned()
245
-
246
- return auc
247
-
248
- # Create an Optuna study
249
- study = optuna.create_study(direction=direction)
250
- study.optimize(objective, n_trials=params['n_trials'])
251
-
252
- self.study = study
253
-
254
- # Get the best hyperparameters
255
- best_params = study.best_params
256
- print(f"Best Hyperparameters: {best_params}")
257
-
258
- return study
259
-
260
- def train_optimized_model(self ,trial ,th_min , th_max):
261
-
262
- best_params = self.study.best_params
263
-
264
- threshold = trial.suggest_float('threashhold', th_min, th_max, log=True)
265
-
266
- train_data = TensorDataset(torch.from_numpy(self.X_train).float(), torch.from_numpy(self.y_train).float())
267
- test_data = TensorDataset(torch.from_numpy(self.X_test).float(), torch.from_numpy(self.y_test).float())
268
-
269
- train_loader = DataLoader(train_data, batch_size=best_params['batch_size'], shuffle=True)
270
- test_loader = DataLoader(test_data, batch_size=best_params['batch_size'], shuffle=False)
271
-
272
-
273
- # Use the best hyperparameters to train the final model
274
- final_model = BinaryClassifier(input_size=self.X_train.shape[1], layer_size=best_params['hidden_size'] , num_layers=best_params['num_layers'])
275
- final_optimizer = self.get_optimizer(best_params['optimizer'], final_model.parameters(), best_params['learning_rate'])
276
- final_criterion = nn.BCEWithLogitsLoss()
277
-
278
- num_epochs = best_params['num_epochs']
279
- for epoch in range(num_epochs):
280
- final_model.train()
281
- for batch_X, batch_y in train_loader:
282
- final_optimizer.zero_grad()
283
- outputs = final_model(batch_X)
284
- loss = final_criterion(outputs.squeeze(), batch_y)
285
- loss.backward()
286
- final_optimizer.step()
287
-
288
- # Evaluate the final model on the test set
289
- final_model.eval()
290
- with torch.no_grad():
291
- predictions = []
292
- true_labels = []
293
- for batch_X, batch_y in test_loader:
294
- outputs = final_model(batch_X)
295
- predictions.extend(torch.sigmoid(outputs).numpy())
296
- true_labels.extend(batch_y.numpy())
297
-
298
- final_balanced_acc = balanced_accuracy_score(true_labels, (np.array(predictions) > threshold).astype(int))
299
- print(f"Model balanced accuracy: {final_balanced_acc}")
300
-
301
- return final_balanced_acc
302
-
303
- def get_optimizer(self, optimizer_name, parameters, learning_rate):
304
- if optimizer_name == 'Adam':
305
- return optim.Adam(parameters, lr=learning_rate)
306
- elif optimizer_name == 'SGD':
307
- return optim.SGD(parameters, lr=learning_rate)
308
- elif optimizer_name == 'RMSprop':
309
- return optim.RMSprop(parameters, lr=learning_rate)
310
- else:
311
- raise ValueError(f"Unknown optimizer: {optimizer_name}")
312
-
313
- def perform_grid_search(self, param_grid, scoring_metric='recall', cv=3, verbose=1):
314
- pytorch_model = CustomPyTorchClassifier()
315
- scorer = make_scorer(recall_score, greater_is_better=True)
316
-
317
- if scoring_metric == 'precision':
318
- scorer = make_scorer(precision_score)
319
- elif scoring_metric == 'accuracy':
320
- scorer = make_scorer(accuracy_score)
321
- elif scoring_metric == 'recall':
322
- scorer = make_scorer(recall_score)
323
- elif scoring_metric == 'f1':
324
- scorer = make_scorer(f1_score)
325
-
326
- grid_search = GridSearchCV(pytorch_model, param_grid, scoring=scorer, cv=cv, refit=scoring_metric, verbose=verbose)
327
- grid_search.fit(self.X_train, self.y_train)
328
-
329
- self.grid_search_results = grid_search # Save the grid search results
330
-
331
- return grid_search
332
-
333
-
334
- def plot_param_importances(self):
335
- return optuna.visualization.plot_param_importances(self.study)
336
-
337
- def plot_slice(self , params):
338
- return optuna.visualization.plot_slice(self.study , params=params)
339
-
340
- def plot_parallel_coordinate(self):
341
- return optuna.visualization.plot_parallel_coordinate(self.study)
342
-
343
- def plot_rank(self , params=None):
344
- return optuna.visualization.plot_rank(self.study , params=params)
345
-
346
- def plot_optimization_history(self):
347
- return optuna.visualization.plot_optimization_history(self.study)
348
-
349
- def optimize_model_threashhold(self , n_trials , th_min , th_max):
350
- additional_params = {'th_min': th_min, 'th_max': th_max}
351
-
352
- th_study = optuna.create_study(direction='maximize')
353
- th_study.optimize(lambda trial: self.train_optimized_model(trial , **additional_params) , n_trials)
354
-
355
- # Get the best hyperparameters
356
- best_params = th_study.best_params
357
- print(f"Best Hyperparameters: {best_params}")
358
-
359
- return optuna.visualization.plot_rank(th_study , params=['threashhold'])
360
-
361
- def federated_params_iptim(self , params , direction, model, fl_dataset):
362
-
363
- def objective(trial):
364
-
365
- criterion = nn.BCEWithLogitsLoss()
366
-
367
- optimizer_name = trial.suggest_categorical('optimizer', params['optimizer'])
368
- learning_rate = trial.suggest_float('learning_rate', **params['learning_rate'])
369
- num_rounds = trial.suggest_int('num_rounds', **params['num_rounds'])
370
- diff_privacy = trial.suggest_int('diff_privacy', **params['diff_privacy'])
371
- diff_privacy = True if diff_privacy == 1 else False
372
-
373
- if optimizer_name == 'Adam':
374
- optimizer = optim.Adam(model.parameters(), lr=learning_rate)
375
- elif optimizer_name == 'SGD':
376
- optimizer = optim.SGD(model.parameters(), lr=learning_rate)
377
- elif optimizer_name == 'RMSprop':
378
- optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
379
-
380
- # Creating a new Model instance using the specific model created by DynamicModel
381
- global_model = Model(model, optimizer, criterion)
382
-
383
- # Get the initial params of the model
384
- init_params = global_model.get_parameters()
385
-
386
- fl_strategy = trial.suggest_categorical('fl_strategy', params['fl_strategy'])
387
-
388
- learning_strategy = Strategy(fl_strategy,
389
- fraction_fit = 1.0 ,
390
- fraction_evaluate = 1.0,
391
- min_fit_clients = 2,
392
- min_evaluate_clients = 2,
393
- min_available_clients = 2 ,
394
- initial_parameters=init_params)
395
-
396
- learning_strategy.create_strategy()
397
-
398
- # Create The server
399
- server = FlowerServer(global_model, strategy = learning_strategy, num_rounds = num_rounds,
400
- num_clients = len(fl_dataset.trainloaders),
401
- fed_dataset = fl_dataset,diff_privacy = diff_privacy,
402
- # You can change the resources alocated for each client based on your machine
403
- client_resources={'num_cpus': 1.0, 'num_gpus': 0.0}
404
- )
405
-
406
- ppl_1 = FLpipeline( name ="the first fl_pipeline",description = "this is our first FL pipeline",
407
- server = server)
408
-
409
- # Run the Traning of the model
410
- history = ppl_1.server.run()
411
-
412
- return server.auc[len(server.auc)-1]
413
-
414
-
415
-
416
- study = optuna.create_study(direction=direction)
417
- study.optimize(objective, n_trials=params['n_trials'])
418
-
419
- self.study = study
420
-
421
- # Get the best hyperparameters
422
- best_params = study.best_params
423
- print(f"Best Hyperparameters: {best_params}")
424
-
425
- return study
426
-
427
-
428
-
429
-
430
-
431
-
432
-
433
-
434
-
435
-
436
-
437
-
438
-
439
-
440
-
441
-
442
-
1
+ import numpy as np
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import torch
6
+ import torch.nn as nn
7
+ import torch.optim as optim
8
+ import torch.nn.functional as F
9
+ from torch.utils.data import TensorDataset, DataLoader
10
+ from sklearn.model_selection import GridSearchCV, train_test_split
11
+ from sklearn.base import BaseEstimator
12
+ from sklearn.metrics import make_scorer, precision_score, recall_score, accuracy_score, f1_score,roc_auc_score, balanced_accuracy_score
13
+ import optuna
14
+
15
+ from MEDfl.LearningManager.model import Model
16
+ from MEDfl.LearningManager.strategy import Strategy
17
+ from MEDfl.LearningManager.server import FlowerServer
18
+ from MEDfl.LearningManager.flpipeline import FLpipeline
19
+
20
+ class BinaryClassifier(nn.Module):
21
+ def __init__(self, input_size, num_layers, layer_size):
22
+ super(BinaryClassifier, self).__init__()
23
+
24
+ # Input layer
25
+ self.layers = [nn.Linear(input_size, layer_size)]
26
+
27
+ # Hidden layers
28
+ for _ in range(num_layers - 1):
29
+ self.layers.append(nn.Linear(layer_size, layer_size))
30
+
31
+ # Output layer
32
+ self.layers.append(nn.Linear(layer_size, 1))
33
+
34
+ # ModuleList to handle dynamic number of layers
35
+ self.layers = nn.ModuleList(self.layers)
36
+
37
+ def forward(self, x):
38
+ for layer in self.layers[:-1]:
39
+ x = F.relu(layer(x))
40
+ x = self.layers[-1](x)
41
+ return x
42
+
43
+ class CustomPyTorchClassifier(BaseEstimator):
44
+ def __init__(self, hidden_dim=10, lr=0.001, pos_weight=1, th=0.5, max_epochs=10, batch_size=32):
45
+ self.hidden_dim = hidden_dim
46
+ self.lr = lr
47
+ self.pos_weight = pos_weight
48
+ self.max_epochs = max_epochs
49
+ self.batch_size = batch_size
50
+ self.th = th
51
+ self.model = None
52
+
53
+ def fit(self, X, y):
54
+ if isinstance(X, torch.Tensor):
55
+ X = X.numpy()
56
+ if isinstance(y, torch.Tensor):
57
+ y = y.numpy()
58
+
59
+ input_dim = X.shape[1]
60
+ self.model = nn.Sequential(
61
+ nn.Linear(input_dim, self.hidden_dim),
62
+ nn.ReLU(),
63
+ nn.Linear(self.hidden_dim, 1),
64
+ nn.Sigmoid()
65
+ )
66
+
67
+ criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(self.pos_weight))
68
+ optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
69
+
70
+ train_data = TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y).float())
71
+ train_loader = DataLoader(train_data, batch_size=self.batch_size, shuffle=True)
72
+
73
+ self.model.train()
74
+ for epoch in range(self.max_epochs):
75
+ for inputs, labels in train_loader:
76
+ optimizer.zero_grad()
77
+ outputs = self.model(inputs)
78
+ loss = criterion(outputs.squeeze(), labels)
79
+ loss.backward()
80
+ optimizer.step()
81
+ return self
82
+
83
+ def predict(self, X):
84
+ if isinstance(X, torch.Tensor):
85
+ X = X.numpy()
86
+
87
+ self.model.eval()
88
+ with torch.no_grad():
89
+ outputs = self.model(torch.from_numpy(X).float())
90
+ predictions = (outputs.squeeze() > self.th).float().numpy()
91
+ return predictions
92
+
93
+ def score(self, X, y):
94
+ predictions = self.predict(X)
95
+ return accuracy_score(y, predictions)
96
+
97
+
98
+ class ParamsOptimiser:
99
+ def __init__(self, X_train = None, y_train=None, X_test=None, y_test=None):
100
+ if isinstance(X_train, pd.DataFrame):
101
+ X_train = X_train.to_numpy()
102
+ if isinstance(y_train, pd.Series):
103
+ y_train = y_train.to_numpy()
104
+ if isinstance(X_test, pd.DataFrame):
105
+ X_test = X_test.to_numpy()
106
+ if isinstance(y_test, pd.Series):
107
+ y_test = y_test.to_numpy()
108
+
109
+ self.X_train = X_train
110
+ self.y_train = y_train
111
+ self.X_test = X_test
112
+ self.y_test = y_test
113
+
114
+ def perform_grid_search(self, param_grid, scoring_metric='recall', cv=3, verbose=1):
115
+ pytorch_model = CustomPyTorchClassifier()
116
+ scorer = make_scorer(recall_score, greater_is_better=True)
117
+
118
+ if scoring_metric == 'precision':
119
+ scorer = make_scorer(precision_score)
120
+ elif scoring_metric == 'accuracy':
121
+ scorer = make_scorer(accuracy_score)
122
+ elif scoring_metric == 'recall':
123
+ scorer = make_scorer(recall_score)
124
+ elif scoring_metric == 'f1':
125
+ scorer = make_scorer(f1_score)
126
+
127
+ grid_search = GridSearchCV(pytorch_model, param_grid, scoring=scorer, cv=cv, refit=scoring_metric, verbose=verbose)
128
+ grid_search.fit(self.X_train, self.y_train)
129
+
130
+ self.grid_search_results = grid_search # Save the grid search results
131
+
132
+ return grid_search
133
+
134
+ # Inside the CustomModelTrainer class
135
+ def plot_results(self, params_to_plot=None):
136
+ results = pd.DataFrame(self.grid_search_results.cv_results_)
137
+
138
+ if params_to_plot is None:
139
+ # Create a column for configuration details
140
+ results['config'] = results['params'].apply(lambda x: str(x))
141
+
142
+ # Visualize mean test scores along with configurations
143
+ plt.figure(figsize=(15, 8))
144
+ bar_plot = plt.bar(results.index, results['mean_test_score'], color='blue', alpha=0.7)
145
+ plt.xticks(results.index, results['config'], rotation='vertical', fontsize=8)
146
+ plt.ylabel('Mean Test Score')
147
+ plt.title('Mean Test Scores for Each Configuration')
148
+ plt.tight_layout()
149
+
150
+ # Add values on top of bars
151
+ for bar, score in zip(bar_plot, results['mean_test_score']):
152
+ plt.text(bar.get_x() + bar.get_width() / 2 - 0.15, bar.get_height() + 0.01, f'{score:.3f}', fontsize=8)
153
+
154
+ plt.show()
155
+ return
156
+
157
+ try:
158
+ # Dynamically get the column names for the specified scoring metric
159
+ mean_test_col = f'mean_test_{params_to_plot[0]}'
160
+ param_cols = [f'param_{param}' for param in params_to_plot]
161
+
162
+ if len(params_to_plot) == 1:
163
+ # Plotting the heatmap for a single parameter
164
+ plt.figure(figsize=(8, 6))
165
+ sns.heatmap(results.pivot_table(index=param_cols[0]),
166
+ annot=True, cmap='YlGnBu', fmt=".3f", cbar_kws={'label': mean_test_col})
167
+ plt.title(mean_test_col.capitalize())
168
+ plt.show()
169
+ elif len(params_to_plot) == 2:
170
+ # Create a pair plot for two parameters
171
+ plt.figure(figsize=(8, 6))
172
+ scores = results.pivot_table(index=param_cols[0], columns=param_cols[1], values=f'mean_test_score', aggfunc="mean")
173
+ sns.heatmap(scores, annot=True, cmap='YlGnBu', fmt=".3f", cbar_kws={'label': mean_test_col})
174
+ plt.title(mean_test_col.capitalize())
175
+ plt.show()
176
+ else:
177
+ print("Invalid number of parameters to plot. You can provide either one or two parameters.")
178
+ except KeyError as e:
179
+ print(f"Error: {e}. Make sure the specified scoring metric exists in the results DataFrame.")
180
+
181
+
182
+
183
+ def optuna_optimisation(self, direction, params):
184
+ # Create the data loaders here
185
+ train_data = TensorDataset(torch.from_numpy(self.X_train).float(), torch.from_numpy(self.y_train).float())
186
+ test_data = TensorDataset(torch.from_numpy(self.X_test).float(), torch.from_numpy(self.y_test).float())
187
+
188
+
189
+
190
+ def objective(trial):
191
+
192
+ batch_size=trial.suggest_int('batch_size', **params['batch_size'])
193
+
194
+ train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
195
+ test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
196
+
197
+ # Create the model with the suggested hyperparameters
198
+ model = BinaryClassifier(input_size=self.X_train.shape[1],
199
+ num_layers=trial.suggest_int('num_layers', **params['num_layers']) ,
200
+ layer_size=trial.suggest_int('hidden_size', **params['hidden_size']))
201
+
202
+ # Define the loss function and optimizer
203
+ criterion = nn.BCEWithLogitsLoss()
204
+ optimizer_name = trial.suggest_categorical('optimizer', params['optimizer'])
205
+ learning_rate = trial.suggest_float('learning_rate', **params['learning_rate'])
206
+
207
+
208
+ if optimizer_name == 'Adam':
209
+ optimizer = optim.Adam(model.parameters(), lr=learning_rate)
210
+ elif optimizer_name == 'SGD':
211
+ optimizer = optim.SGD(model.parameters(), lr=learning_rate)
212
+ elif optimizer_name == 'RMSprop':
213
+ optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
214
+
215
+ # Training loop
216
+ num_epochs = trial.suggest_int('num_epochs', **params['num_epochs'])
217
+ for epoch in range(num_epochs):
218
+ model.train()
219
+ for batch_X, batch_y in train_loader:
220
+ optimizer.zero_grad()
221
+ outputs = model(batch_X)
222
+ loss = criterion(outputs.squeeze(), batch_y)
223
+ loss.backward()
224
+ optimizer.step()
225
+
226
+ # Evaluation
227
+ model.eval()
228
+ predictions = []
229
+ true_labels = []
230
+ with torch.no_grad():
231
+ for batch_X, batch_y in test_loader:
232
+ outputs = model(batch_X)
233
+ predictions.extend(torch.sigmoid(outputs).numpy())
234
+ true_labels.extend(batch_y.numpy())
235
+
236
+ # Calculate F1 score
237
+ # f1 = f1_score(true_labels, (np.array(predictions) > 0.5).astype(int))
238
+ auc = roc_auc_score(true_labels, predictions)
239
+
240
+ trial.report(auc, epoch)
241
+
242
+ # Handle pruning based on the intermediate value
243
+ if trial.should_prune():
244
+ raise optuna.TrialPruned()
245
+
246
+ return auc
247
+
248
+ # Create an Optuna study
249
+ study = optuna.create_study(direction=direction)
250
+ study.optimize(objective, n_trials=params['n_trials'])
251
+
252
+ self.study = study
253
+
254
+ # Get the best hyperparameters
255
+ best_params = study.best_params
256
+ print(f"Best Hyperparameters: {best_params}")
257
+
258
+ return study
259
+
260
+ def train_optimized_model(self ,trial ,th_min , th_max):
261
+
262
+ best_params = self.study.best_params
263
+
264
+ threshold = trial.suggest_float('threashhold', th_min, th_max, log=True)
265
+
266
+ train_data = TensorDataset(torch.from_numpy(self.X_train).float(), torch.from_numpy(self.y_train).float())
267
+ test_data = TensorDataset(torch.from_numpy(self.X_test).float(), torch.from_numpy(self.y_test).float())
268
+
269
+ train_loader = DataLoader(train_data, batch_size=best_params['batch_size'], shuffle=True)
270
+ test_loader = DataLoader(test_data, batch_size=best_params['batch_size'], shuffle=False)
271
+
272
+
273
+ # Use the best hyperparameters to train the final model
274
+ final_model = BinaryClassifier(input_size=self.X_train.shape[1], layer_size=best_params['hidden_size'] , num_layers=best_params['num_layers'])
275
+ final_optimizer = self.get_optimizer(best_params['optimizer'], final_model.parameters(), best_params['learning_rate'])
276
+ final_criterion = nn.BCEWithLogitsLoss()
277
+
278
+ num_epochs = best_params['num_epochs']
279
+ for epoch in range(num_epochs):
280
+ final_model.train()
281
+ for batch_X, batch_y in train_loader:
282
+ final_optimizer.zero_grad()
283
+ outputs = final_model(batch_X)
284
+ loss = final_criterion(outputs.squeeze(), batch_y)
285
+ loss.backward()
286
+ final_optimizer.step()
287
+
288
+ # Evaluate the final model on the test set
289
+ final_model.eval()
290
+ with torch.no_grad():
291
+ predictions = []
292
+ true_labels = []
293
+ for batch_X, batch_y in test_loader:
294
+ outputs = final_model(batch_X)
295
+ predictions.extend(torch.sigmoid(outputs).numpy())
296
+ true_labels.extend(batch_y.numpy())
297
+
298
+ final_balanced_acc = balanced_accuracy_score(true_labels, (np.array(predictions) > threshold).astype(int))
299
+ print(f"Model balanced accuracy: {final_balanced_acc}")
300
+
301
+ return final_balanced_acc
302
+
303
+ def get_optimizer(self, optimizer_name, parameters, learning_rate):
304
+ if optimizer_name == 'Adam':
305
+ return optim.Adam(parameters, lr=learning_rate)
306
+ elif optimizer_name == 'SGD':
307
+ return optim.SGD(parameters, lr=learning_rate)
308
+ elif optimizer_name == 'RMSprop':
309
+ return optim.RMSprop(parameters, lr=learning_rate)
310
+ else:
311
+ raise ValueError(f"Unknown optimizer: {optimizer_name}")
312
+
313
+ def perform_grid_search(self, param_grid, scoring_metric='recall', cv=3, verbose=1):
314
+ pytorch_model = CustomPyTorchClassifier()
315
+ scorer = make_scorer(recall_score, greater_is_better=True)
316
+
317
+ if scoring_metric == 'precision':
318
+ scorer = make_scorer(precision_score)
319
+ elif scoring_metric == 'accuracy':
320
+ scorer = make_scorer(accuracy_score)
321
+ elif scoring_metric == 'recall':
322
+ scorer = make_scorer(recall_score)
323
+ elif scoring_metric == 'f1':
324
+ scorer = make_scorer(f1_score)
325
+
326
+ grid_search = GridSearchCV(pytorch_model, param_grid, scoring=scorer, cv=cv, refit=scoring_metric, verbose=verbose)
327
+ grid_search.fit(self.X_train, self.y_train)
328
+
329
+ self.grid_search_results = grid_search # Save the grid search results
330
+
331
+ return grid_search
332
+
333
+
334
+ def plot_param_importances(self):
335
+ return optuna.visualization.plot_param_importances(self.study)
336
+
337
+ def plot_slice(self , params):
338
+ return optuna.visualization.plot_slice(self.study , params=params)
339
+
340
+ def plot_parallel_coordinate(self):
341
+ return optuna.visualization.plot_parallel_coordinate(self.study)
342
+
343
+ def plot_rank(self , params=None):
344
+ return optuna.visualization.plot_rank(self.study , params=params)
345
+
346
+ def plot_optimization_history(self):
347
+ return optuna.visualization.plot_optimization_history(self.study)
348
+
349
+ def optimize_model_threashhold(self , n_trials , th_min , th_max):
350
+ additional_params = {'th_min': th_min, 'th_max': th_max}
351
+
352
+ th_study = optuna.create_study(direction='maximize')
353
+ th_study.optimize(lambda trial: self.train_optimized_model(trial , **additional_params) , n_trials)
354
+
355
+ # Get the best hyperparameters
356
+ best_params = th_study.best_params
357
+ print(f"Best Hyperparameters: {best_params}")
358
+
359
+ return optuna.visualization.plot_rank(th_study , params=['threashhold'])
360
+
361
+ def federated_params_iptim(self , params , direction, model, fl_dataset):
362
+
363
+ def objective(trial):
364
+
365
+ criterion = nn.BCEWithLogitsLoss()
366
+
367
+ optimizer_name = trial.suggest_categorical('optimizer', params['optimizer'])
368
+ learning_rate = trial.suggest_float('learning_rate', **params['learning_rate'])
369
+ num_rounds = trial.suggest_int('num_rounds', **params['num_rounds'])
370
+ diff_privacy = trial.suggest_int('diff_privacy', **params['diff_privacy'])
371
+ diff_privacy = True if diff_privacy == 1 else False
372
+
373
+ if optimizer_name == 'Adam':
374
+ optimizer = optim.Adam(model.parameters(), lr=learning_rate)
375
+ elif optimizer_name == 'SGD':
376
+ optimizer = optim.SGD(model.parameters(), lr=learning_rate)
377
+ elif optimizer_name == 'RMSprop':
378
+ optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
379
+
380
+ # Creating a new Model instance using the specific model created by DynamicModel
381
+ global_model = Model(model, optimizer, criterion)
382
+
383
+ # Get the initial params of the model
384
+ init_params = global_model.get_parameters()
385
+
386
+ fl_strategy = trial.suggest_categorical('fl_strategy', params['fl_strategy'])
387
+
388
+ learning_strategy = Strategy(fl_strategy,
389
+ fraction_fit = 1.0 ,
390
+ fraction_evaluate = 1.0,
391
+ min_fit_clients = 2,
392
+ min_evaluate_clients = 2,
393
+ min_available_clients = 2 ,
394
+ initial_parameters=init_params)
395
+
396
+ learning_strategy.create_strategy()
397
+
398
+ # Create The server
399
+ server = FlowerServer(global_model, strategy = learning_strategy, num_rounds = num_rounds,
400
+ num_clients = len(fl_dataset.trainloaders),
401
+ fed_dataset = fl_dataset,diff_privacy = diff_privacy,
402
+ # You can change the resources alocated for each client based on your machine
403
+ client_resources={'num_cpus': 1.0, 'num_gpus': 0.0}
404
+ )
405
+
406
+ ppl_1 = FLpipeline( name ="the first fl_pipeline",description = "this is our first FL pipeline",
407
+ server = server)
408
+
409
+ # Run the Traning of the model
410
+ history = ppl_1.server.run()
411
+
412
+ return server.auc[len(server.auc)-1]
413
+
414
+
415
+
416
+ study = optuna.create_study(direction=direction)
417
+ study.optimize(objective, n_trials=params['n_trials'])
418
+
419
+ self.study = study
420
+
421
+ # Get the best hyperparameters
422
+ best_params = study.best_params
423
+ print(f"Best Hyperparameters: {best_params}")
424
+
425
+ return study
426
+
427
+
428
+
429
+
430
+
431
+
432
+
433
+
434
+
435
+
436
+
437
+
438
+
439
+
440
+
441
+
442
+