MEDfl 2.0.4.dev0__py3-none-any.whl → 2.0.4.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. MEDfl/rw/client.py +98 -29
  2. MEDfl/rw/model.py +46 -74
  3. MEDfl/rw/server.py +71 -18
  4. MEDfl/rw/strategy.py +73 -78
  5. {medfl-2.0.4.dev0.dist-info → MEDfl-2.0.4.dev2.dist-info}/METADATA +2 -14
  6. MEDfl-2.0.4.dev2.dist-info/RECORD +36 -0
  7. {medfl-2.0.4.dev0.dist-info → MEDfl-2.0.4.dev2.dist-info}/WHEEL +1 -1
  8. MEDfl/rw/rwConfig.py +0 -21
  9. MEDfl/rw/verbose_server.py +0 -21
  10. Medfl/LearningManager/__init__.py +0 -13
  11. Medfl/LearningManager/client.py +0 -150
  12. Medfl/LearningManager/dynamicModal.py +0 -287
  13. Medfl/LearningManager/federated_dataset.py +0 -60
  14. Medfl/LearningManager/flpipeline.py +0 -192
  15. Medfl/LearningManager/model.py +0 -223
  16. Medfl/LearningManager/params.yaml +0 -14
  17. Medfl/LearningManager/params_optimiser.py +0 -442
  18. Medfl/LearningManager/plot.py +0 -229
  19. Medfl/LearningManager/server.py +0 -181
  20. Medfl/LearningManager/strategy.py +0 -82
  21. Medfl/LearningManager/utils.py +0 -331
  22. Medfl/NetManager/__init__.py +0 -10
  23. Medfl/NetManager/database_connector.py +0 -43
  24. Medfl/NetManager/dataset.py +0 -92
  25. Medfl/NetManager/flsetup.py +0 -320
  26. Medfl/NetManager/net_helper.py +0 -254
  27. Medfl/NetManager/net_manager_queries.py +0 -142
  28. Medfl/NetManager/network.py +0 -194
  29. Medfl/NetManager/node.py +0 -184
  30. Medfl/__init__.py +0 -3
  31. Medfl/scripts/__init__.py +0 -2
  32. Medfl/scripts/base.py +0 -30
  33. Medfl/scripts/create_db.py +0 -126
  34. medfl-2.0.4.dev0.dist-info/RECORD +0 -62
  35. {medfl-2.0.4.dev0.dist-info/licenses → MEDfl-2.0.4.dev2.dist-info}/LICENSE +0 -0
  36. {medfl-2.0.4.dev0.dist-info → MEDfl-2.0.4.dev2.dist-info}/top_level.txt +0 -0
@@ -1,442 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- import matplotlib.pyplot as plt
4
- import seaborn as sns
5
- import torch
6
- import torch.nn as nn
7
- import torch.optim as optim
8
- import torch.nn.functional as F
9
- from torch.utils.data import TensorDataset, DataLoader
10
- from sklearn.model_selection import GridSearchCV, train_test_split
11
- from sklearn.base import BaseEstimator
12
- from sklearn.metrics import make_scorer, precision_score, recall_score, accuracy_score, f1_score,roc_auc_score, balanced_accuracy_score
13
- import optuna
14
-
15
- from MEDfl.LearningManager.model import Model
16
- from MEDfl.LearningManager.strategy import Strategy
17
- from MEDfl.LearningManager.server import FlowerServer
18
- from MEDfl.LearningManager.flpipeline import FLpipeline
19
-
20
- class BinaryClassifier(nn.Module):
21
- def __init__(self, input_size, num_layers, layer_size):
22
- super(BinaryClassifier, self).__init__()
23
-
24
- # Input layer
25
- self.layers = [nn.Linear(input_size, layer_size)]
26
-
27
- # Hidden layers
28
- for _ in range(num_layers - 1):
29
- self.layers.append(nn.Linear(layer_size, layer_size))
30
-
31
- # Output layer
32
- self.layers.append(nn.Linear(layer_size, 1))
33
-
34
- # ModuleList to handle dynamic number of layers
35
- self.layers = nn.ModuleList(self.layers)
36
-
37
- def forward(self, x):
38
- for layer in self.layers[:-1]:
39
- x = F.relu(layer(x))
40
- x = self.layers[-1](x)
41
- return x
42
-
43
- class CustomPyTorchClassifier(BaseEstimator):
44
- def __init__(self, hidden_dim=10, lr=0.001, pos_weight=1, th=0.5, max_epochs=10, batch_size=32):
45
- self.hidden_dim = hidden_dim
46
- self.lr = lr
47
- self.pos_weight = pos_weight
48
- self.max_epochs = max_epochs
49
- self.batch_size = batch_size
50
- self.th = th
51
- self.model = None
52
-
53
- def fit(self, X, y):
54
- if isinstance(X, torch.Tensor):
55
- X = X.numpy()
56
- if isinstance(y, torch.Tensor):
57
- y = y.numpy()
58
-
59
- input_dim = X.shape[1]
60
- self.model = nn.Sequential(
61
- nn.Linear(input_dim, self.hidden_dim),
62
- nn.ReLU(),
63
- nn.Linear(self.hidden_dim, 1),
64
- nn.Sigmoid()
65
- )
66
-
67
- criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(self.pos_weight))
68
- optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
69
-
70
- train_data = TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y).float())
71
- train_loader = DataLoader(train_data, batch_size=self.batch_size, shuffle=True)
72
-
73
- self.model.train()
74
- for epoch in range(self.max_epochs):
75
- for inputs, labels in train_loader:
76
- optimizer.zero_grad()
77
- outputs = self.model(inputs)
78
- loss = criterion(outputs.squeeze(), labels)
79
- loss.backward()
80
- optimizer.step()
81
- return self
82
-
83
- def predict(self, X):
84
- if isinstance(X, torch.Tensor):
85
- X = X.numpy()
86
-
87
- self.model.eval()
88
- with torch.no_grad():
89
- outputs = self.model(torch.from_numpy(X).float())
90
- predictions = (outputs.squeeze() > self.th).float().numpy()
91
- return predictions
92
-
93
- def score(self, X, y):
94
- predictions = self.predict(X)
95
- return accuracy_score(y, predictions)
96
-
97
-
98
- class ParamsOptimiser:
99
- def __init__(self, X_train = None, y_train=None, X_test=None, y_test=None):
100
- if isinstance(X_train, pd.DataFrame):
101
- X_train = X_train.to_numpy()
102
- if isinstance(y_train, pd.Series):
103
- y_train = y_train.to_numpy()
104
- if isinstance(X_test, pd.DataFrame):
105
- X_test = X_test.to_numpy()
106
- if isinstance(y_test, pd.Series):
107
- y_test = y_test.to_numpy()
108
-
109
- self.X_train = X_train
110
- self.y_train = y_train
111
- self.X_test = X_test
112
- self.y_test = y_test
113
-
114
- def perform_grid_search(self, param_grid, scoring_metric='recall', cv=3, verbose=1):
115
- pytorch_model = CustomPyTorchClassifier()
116
- scorer = make_scorer(recall_score, greater_is_better=True)
117
-
118
- if scoring_metric == 'precision':
119
- scorer = make_scorer(precision_score)
120
- elif scoring_metric == 'accuracy':
121
- scorer = make_scorer(accuracy_score)
122
- elif scoring_metric == 'recall':
123
- scorer = make_scorer(recall_score)
124
- elif scoring_metric == 'f1':
125
- scorer = make_scorer(f1_score)
126
-
127
- grid_search = GridSearchCV(pytorch_model, param_grid, scoring=scorer, cv=cv, refit=scoring_metric, verbose=verbose)
128
- grid_search.fit(self.X_train, self.y_train)
129
-
130
- self.grid_search_results = grid_search # Save the grid search results
131
-
132
- return grid_search
133
-
134
- # Inside the CustomModelTrainer class
135
- def plot_results(self, params_to_plot=None):
136
- results = pd.DataFrame(self.grid_search_results.cv_results_)
137
-
138
- if params_to_plot is None:
139
- # Create a column for configuration details
140
- results['config'] = results['params'].apply(lambda x: str(x))
141
-
142
- # Visualize mean test scores along with configurations
143
- plt.figure(figsize=(15, 8))
144
- bar_plot = plt.bar(results.index, results['mean_test_score'], color='blue', alpha=0.7)
145
- plt.xticks(results.index, results['config'], rotation='vertical', fontsize=8)
146
- plt.ylabel('Mean Test Score')
147
- plt.title('Mean Test Scores for Each Configuration')
148
- plt.tight_layout()
149
-
150
- # Add values on top of bars
151
- for bar, score in zip(bar_plot, results['mean_test_score']):
152
- plt.text(bar.get_x() + bar.get_width() / 2 - 0.15, bar.get_height() + 0.01, f'{score:.3f}', fontsize=8)
153
-
154
- plt.show()
155
- return
156
-
157
- try:
158
- # Dynamically get the column names for the specified scoring metric
159
- mean_test_col = f'mean_test_{params_to_plot[0]}'
160
- param_cols = [f'param_{param}' for param in params_to_plot]
161
-
162
- if len(params_to_plot) == 1:
163
- # Plotting the heatmap for a single parameter
164
- plt.figure(figsize=(8, 6))
165
- sns.heatmap(results.pivot_table(index=param_cols[0]),
166
- annot=True, cmap='YlGnBu', fmt=".3f", cbar_kws={'label': mean_test_col})
167
- plt.title(mean_test_col.capitalize())
168
- plt.show()
169
- elif len(params_to_plot) == 2:
170
- # Create a pair plot for two parameters
171
- plt.figure(figsize=(8, 6))
172
- scores = results.pivot_table(index=param_cols[0], columns=param_cols[1], values=f'mean_test_score', aggfunc="mean")
173
- sns.heatmap(scores, annot=True, cmap='YlGnBu', fmt=".3f", cbar_kws={'label': mean_test_col})
174
- plt.title(mean_test_col.capitalize())
175
- plt.show()
176
- else:
177
- print("Invalid number of parameters to plot. You can provide either one or two parameters.")
178
- except KeyError as e:
179
- print(f"Error: {e}. Make sure the specified scoring metric exists in the results DataFrame.")
180
-
181
-
182
-
183
- def optuna_optimisation(self, direction, params):
184
- # Create the data loaders here
185
- train_data = TensorDataset(torch.from_numpy(self.X_train).float(), torch.from_numpy(self.y_train).float())
186
- test_data = TensorDataset(torch.from_numpy(self.X_test).float(), torch.from_numpy(self.y_test).float())
187
-
188
-
189
-
190
- def objective(trial):
191
-
192
- batch_size=trial.suggest_int('batch_size', **params['batch_size'])
193
-
194
- train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
195
- test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
196
-
197
- # Create the model with the suggested hyperparameters
198
- model = BinaryClassifier(input_size=self.X_train.shape[1],
199
- num_layers=trial.suggest_int('num_layers', **params['num_layers']) ,
200
- layer_size=trial.suggest_int('hidden_size', **params['hidden_size']))
201
-
202
- # Define the loss function and optimizer
203
- criterion = nn.BCEWithLogitsLoss()
204
- optimizer_name = trial.suggest_categorical('optimizer', params['optimizer'])
205
- learning_rate = trial.suggest_float('learning_rate', **params['learning_rate'])
206
-
207
-
208
- if optimizer_name == 'Adam':
209
- optimizer = optim.Adam(model.parameters(), lr=learning_rate)
210
- elif optimizer_name == 'SGD':
211
- optimizer = optim.SGD(model.parameters(), lr=learning_rate)
212
- elif optimizer_name == 'RMSprop':
213
- optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
214
-
215
- # Training loop
216
- num_epochs = trial.suggest_int('num_epochs', **params['num_epochs'])
217
- for epoch in range(num_epochs):
218
- model.train()
219
- for batch_X, batch_y in train_loader:
220
- optimizer.zero_grad()
221
- outputs = model(batch_X)
222
- loss = criterion(outputs.squeeze(), batch_y)
223
- loss.backward()
224
- optimizer.step()
225
-
226
- # Evaluation
227
- model.eval()
228
- predictions = []
229
- true_labels = []
230
- with torch.no_grad():
231
- for batch_X, batch_y in test_loader:
232
- outputs = model(batch_X)
233
- predictions.extend(torch.sigmoid(outputs).numpy())
234
- true_labels.extend(batch_y.numpy())
235
-
236
- # Calculate F1 score
237
- # f1 = f1_score(true_labels, (np.array(predictions) > 0.5).astype(int))
238
- auc = roc_auc_score(true_labels, predictions)
239
-
240
- trial.report(auc, epoch)
241
-
242
- # Handle pruning based on the intermediate value
243
- if trial.should_prune():
244
- raise optuna.TrialPruned()
245
-
246
- return auc
247
-
248
- # Create an Optuna study
249
- study = optuna.create_study(direction=direction)
250
- study.optimize(objective, n_trials=params['n_trials'])
251
-
252
- self.study = study
253
-
254
- # Get the best hyperparameters
255
- best_params = study.best_params
256
- print(f"Best Hyperparameters: {best_params}")
257
-
258
- return study
259
-
260
- def train_optimized_model(self ,trial ,th_min , th_max):
261
-
262
- best_params = self.study.best_params
263
-
264
- threshold = trial.suggest_float('threashhold', th_min, th_max, log=True)
265
-
266
- train_data = TensorDataset(torch.from_numpy(self.X_train).float(), torch.from_numpy(self.y_train).float())
267
- test_data = TensorDataset(torch.from_numpy(self.X_test).float(), torch.from_numpy(self.y_test).float())
268
-
269
- train_loader = DataLoader(train_data, batch_size=best_params['batch_size'], shuffle=True)
270
- test_loader = DataLoader(test_data, batch_size=best_params['batch_size'], shuffle=False)
271
-
272
-
273
- # Use the best hyperparameters to train the final model
274
- final_model = BinaryClassifier(input_size=self.X_train.shape[1], layer_size=best_params['hidden_size'] , num_layers=best_params['num_layers'])
275
- final_optimizer = self.get_optimizer(best_params['optimizer'], final_model.parameters(), best_params['learning_rate'])
276
- final_criterion = nn.BCEWithLogitsLoss()
277
-
278
- num_epochs = best_params['num_epochs']
279
- for epoch in range(num_epochs):
280
- final_model.train()
281
- for batch_X, batch_y in train_loader:
282
- final_optimizer.zero_grad()
283
- outputs = final_model(batch_X)
284
- loss = final_criterion(outputs.squeeze(), batch_y)
285
- loss.backward()
286
- final_optimizer.step()
287
-
288
- # Evaluate the final model on the test set
289
- final_model.eval()
290
- with torch.no_grad():
291
- predictions = []
292
- true_labels = []
293
- for batch_X, batch_y in test_loader:
294
- outputs = final_model(batch_X)
295
- predictions.extend(torch.sigmoid(outputs).numpy())
296
- true_labels.extend(batch_y.numpy())
297
-
298
- final_balanced_acc = balanced_accuracy_score(true_labels, (np.array(predictions) > threshold).astype(int))
299
- print(f"Model balanced accuracy: {final_balanced_acc}")
300
-
301
- return final_balanced_acc
302
-
303
- def get_optimizer(self, optimizer_name, parameters, learning_rate):
304
- if optimizer_name == 'Adam':
305
- return optim.Adam(parameters, lr=learning_rate)
306
- elif optimizer_name == 'SGD':
307
- return optim.SGD(parameters, lr=learning_rate)
308
- elif optimizer_name == 'RMSprop':
309
- return optim.RMSprop(parameters, lr=learning_rate)
310
- else:
311
- raise ValueError(f"Unknown optimizer: {optimizer_name}")
312
-
313
- def perform_grid_search(self, param_grid, scoring_metric='recall', cv=3, verbose=1):
314
- pytorch_model = CustomPyTorchClassifier()
315
- scorer = make_scorer(recall_score, greater_is_better=True)
316
-
317
- if scoring_metric == 'precision':
318
- scorer = make_scorer(precision_score)
319
- elif scoring_metric == 'accuracy':
320
- scorer = make_scorer(accuracy_score)
321
- elif scoring_metric == 'recall':
322
- scorer = make_scorer(recall_score)
323
- elif scoring_metric == 'f1':
324
- scorer = make_scorer(f1_score)
325
-
326
- grid_search = GridSearchCV(pytorch_model, param_grid, scoring=scorer, cv=cv, refit=scoring_metric, verbose=verbose)
327
- grid_search.fit(self.X_train, self.y_train)
328
-
329
- self.grid_search_results = grid_search # Save the grid search results
330
-
331
- return grid_search
332
-
333
-
334
- def plot_param_importances(self):
335
- return optuna.visualization.plot_param_importances(self.study)
336
-
337
- def plot_slice(self , params):
338
- return optuna.visualization.plot_slice(self.study , params=params)
339
-
340
- def plot_parallel_coordinate(self):
341
- return optuna.visualization.plot_parallel_coordinate(self.study)
342
-
343
- def plot_rank(self , params=None):
344
- return optuna.visualization.plot_rank(self.study , params=params)
345
-
346
- def plot_optimization_history(self):
347
- return optuna.visualization.plot_optimization_history(self.study)
348
-
349
- def optimize_model_threashhold(self , n_trials , th_min , th_max):
350
- additional_params = {'th_min': th_min, 'th_max': th_max}
351
-
352
- th_study = optuna.create_study(direction='maximize')
353
- th_study.optimize(lambda trial: self.train_optimized_model(trial , **additional_params) , n_trials)
354
-
355
- # Get the best hyperparameters
356
- best_params = th_study.best_params
357
- print(f"Best Hyperparameters: {best_params}")
358
-
359
- return optuna.visualization.plot_rank(th_study , params=['threashhold'])
360
-
361
- def federated_params_iptim(self , params , direction, model, fl_dataset):
362
-
363
- def objective(trial):
364
-
365
- criterion = nn.BCEWithLogitsLoss()
366
-
367
- optimizer_name = trial.suggest_categorical('optimizer', params['optimizer'])
368
- learning_rate = trial.suggest_float('learning_rate', **params['learning_rate'])
369
- num_rounds = trial.suggest_int('num_rounds', **params['num_rounds'])
370
- diff_privacy = trial.suggest_int('diff_privacy', **params['diff_privacy'])
371
- diff_privacy = True if diff_privacy == 1 else False
372
-
373
- if optimizer_name == 'Adam':
374
- optimizer = optim.Adam(model.parameters(), lr=learning_rate)
375
- elif optimizer_name == 'SGD':
376
- optimizer = optim.SGD(model.parameters(), lr=learning_rate)
377
- elif optimizer_name == 'RMSprop':
378
- optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
379
-
380
- # Creating a new Model instance using the specific model created by DynamicModel
381
- global_model = Model(model, optimizer, criterion)
382
-
383
- # Get the initial params of the model
384
- init_params = global_model.get_parameters()
385
-
386
- fl_strategy = trial.suggest_categorical('fl_strategy', params['fl_strategy'])
387
-
388
- learning_strategy = Strategy(fl_strategy,
389
- fraction_fit = 1.0 ,
390
- fraction_evaluate = 1.0,
391
- min_fit_clients = 2,
392
- min_evaluate_clients = 2,
393
- min_available_clients = 2 ,
394
- initial_parameters=init_params)
395
-
396
- learning_strategy.create_strategy()
397
-
398
- # Create The server
399
- server = FlowerServer(global_model, strategy = learning_strategy, num_rounds = num_rounds,
400
- num_clients = len(fl_dataset.trainloaders),
401
- fed_dataset = fl_dataset,diff_privacy = diff_privacy,
402
- # You can change the resources alocated for each client based on your machine
403
- client_resources={'num_cpus': 1.0, 'num_gpus': 0.0}
404
- )
405
-
406
- ppl_1 = FLpipeline( name ="the first fl_pipeline",description = "this is our first FL pipeline",
407
- server = server)
408
-
409
- # Run the Traning of the model
410
- history = ppl_1.server.run()
411
-
412
- return server.auc[len(server.auc)-1]
413
-
414
-
415
-
416
- study = optuna.create_study(direction=direction)
417
- study.optimize(objective, n_trials=params['n_trials'])
418
-
419
- self.study = study
420
-
421
- # Get the best hyperparameters
422
- best_params = study.best_params
423
- print(f"Best Hyperparameters: {best_params}")
424
-
425
- return study
426
-
427
-
428
-
429
-
430
-
431
-
432
-
433
-
434
-
435
-
436
-
437
-
438
-
439
-
440
-
441
-
442
-
@@ -1,229 +0,0 @@
1
- import matplotlib.pyplot as plt
2
- import numpy as np
3
- import seaborn as sns
4
-
5
- from .utils import *
6
-
7
- # Replace this with your actual code for data collection
8
- results_dict = {
9
- ("LR: 0.001, Optimizer: Adam", "accuracy"): [0.85, 0.89, 0.92, 0.94, ...],
10
- ("LR: 0.001, Optimizer: Adam", "loss"): [0.2, 0.15, 0.1, 0.08, ...],
11
- ("LR: 0.01, Optimizer: SGD", "accuracy"): [0.88, 0.91, 0.93, 0.95, ...],
12
- ("LR: 0.01, Optimizer: SGD", "loss"): [0.18, 0.13, 0.09, 0.07, ...],
13
- ("LR: 0.1, Optimizer: Adam", "accuracy"): [0.82, 0.87, 0.91, 0.93, ...],
14
- ("LR: 0.1, Optimizer: Adam", "loss"): [0.25, 0.2, 0.15, 0.12, ...],
15
- }
16
- """
17
- server should have:
18
- #len = num of rounds
19
- self.accuracies
20
- self.losses
21
-
22
- Client should have
23
- # len = num of epochs
24
- self.accuracies
25
- self.losses
26
- self.epsilons
27
- self.deltas
28
-
29
- #common things : LR,SGD, Aggregation
30
-
31
- """
32
-
33
-
34
- class AccuracyLossPlotter:
35
- """
36
- A utility class for plotting accuracy and loss metrics based on experiment results.
37
-
38
- Args:
39
- results_dict (dict): Dictionary containing experiment results organized by parameters and metrics.
40
-
41
- Attributes:
42
- results_dict (dict): Dictionary containing experiment results organized by parameters and metrics.
43
- parameters (list): List of unique parameters in the experiment results.
44
- metrics (list): List of unique metrics in the experiment results.
45
- iterations (range): Range of iterations (rounds or epochs) in the experiment.
46
- """
47
-
48
- def __init__(self, results_dict):
49
- """
50
- Initialize the AccuracyLossPlotter with experiment results.
51
-
52
- Args:
53
- results_dict (dict): Dictionary containing experiment results organized by parameters and metrics.
54
- """
55
- self.results_dict = results_dict
56
- self.parameters = list(
57
- set([param[0] for param in results_dict.keys()])
58
- )
59
- self.metrics = list(set([param[1] for param in results_dict.keys()]))
60
- self.iterations = range(1, len(list(results_dict.values())[0]) + 1)
61
-
62
- def plot_accuracy_loss(self):
63
- """
64
- Plot accuracy and loss metrics for different parameters.
65
- """
66
-
67
- plt.figure(figsize=(8, 6))
68
-
69
- for param in self.parameters:
70
- for metric in self.metrics:
71
- key = (param, metric)
72
- values = self.results_dict[key]
73
- plt.plot(
74
- self.iterations,
75
- values,
76
- label=f"{param} ({metric})",
77
- marker="o",
78
- linestyle="-",
79
- )
80
-
81
- plt.xlabel("Rounds")
82
- plt.ylabel("Accuracy / Loss")
83
- plt.title("Accuracy and Loss by Parameters")
84
- plt.legend()
85
- plt.grid(True)
86
- plt.show()
87
-
88
- @staticmethod
89
- def plot_global_confusion_matrix(pipeline_name: str):
90
- """
91
- Plot a global confusion matrix based on pipeline results.
92
-
93
- Args:
94
- pipeline_name (str): Name of the pipeline.
95
-
96
- Returns:
97
- None
98
- """
99
- # Get the id of the pipeline by name
100
- pipeline_id = get_pipeline_from_name(pipeline_name)
101
- # get the confusion matrix pf the pipeline
102
- confusion_matrix = get_pipeline_confusion_matrix(pipeline_id)
103
-
104
- # Extracting confusion matrix values
105
- TP = confusion_matrix['TP']
106
- FP = confusion_matrix['FP']
107
- FN = confusion_matrix['FN']
108
- TN = confusion_matrix['TN']
109
-
110
- # Creating a matrix for visualization
111
- matrix = [[TN, FP],
112
- [FN, TP]]
113
-
114
- # Plotting the confusion matrix as a heatmap
115
- plt.figure(figsize=(6, 4))
116
- sns.heatmap(matrix, annot=True, fmt='d', cmap='Blues',
117
- xticklabels=['Predicted Negative', 'Predicted Positive'],
118
- yticklabels=['Actual Negative', 'Actual Positive'])
119
- plt.title('Global Confusion Matrix')
120
- plt.xlabel('Predicted label')
121
- plt.ylabel('True label')
122
- plt.tight_layout()
123
-
124
- # Display the confusion matrix heatmap
125
- plt.show()
126
-
127
- @staticmethod
128
- def plot_confusion_Matrix_by_node(node_name: str, pipeline_name: str):
129
- """
130
- Plot a confusion matrix for a specific node in the pipeline.
131
-
132
- Args:
133
- node_name (str): Name of the node.
134
- pipeline_name (str): Name of the pipeline.
135
-
136
- Returns:
137
- None
138
- """
139
-
140
- # Get the id of the pipeline by name
141
- pipeline_id = get_pipeline_from_name(pipeline_name)
142
- # get the confusion matrix pf the pipeline
143
- confusion_matrix = get_node_confusion_matrix(
144
- pipeline_id, node_name=node_name)
145
-
146
- # Extracting confusion matrix values
147
- TP = confusion_matrix['TP']
148
- FP = confusion_matrix['FP']
149
- FN = confusion_matrix['FN']
150
- TN = confusion_matrix['TN']
151
-
152
- # Creating a matrix for visualization
153
- matrix = [[TN, FP],
154
- [FN, TP]]
155
-
156
- # Plotting the confusion matrix as a heatmap
157
- plt.figure(figsize=(6, 4))
158
- sns.heatmap(matrix, annot=True, fmt='d', cmap='Blues',
159
- xticklabels=['Predicted Negative', 'Predicted Positive'],
160
- yticklabels=['Actual Negative', 'Actual Positive'])
161
- plt.title('Confusion Matrix of node: '+node_name)
162
- plt.xlabel('Predicted label')
163
- plt.ylabel('True label')
164
- plt.tight_layout()
165
-
166
- # Display the confusion matrix heatmap
167
- plt.show()
168
- return
169
-
170
- @staticmethod
171
- def plot_classification_report(pipeline_name: str):
172
- """
173
- Plot a comparison of classification report metrics between nodes.
174
-
175
- Args:
176
- pipeline_name (str): Name of the pipeline.
177
-
178
- Returns:
179
- None
180
- """
181
-
182
- colors = ['#FF5733', '#6A5ACD', '#3CB371', '#FFD700', '#FFA500', '#8A2BE2', '#00FFFF', '#FF00FF', '#A52A2A', '#00FF00']
183
-
184
- # Get the id of the pipeline by name
185
- pipeline_id = get_pipeline_from_name(pipeline_name)
186
-
187
- pipeline_results = get_pipeline_result(pipeline_id)
188
-
189
- nodesList = pipeline_results['nodename']
190
- classificationReports = []
191
-
192
- for index, node in enumerate(nodesList):
193
- classificationReports.append({
194
- 'Accuracy': pipeline_results['accuracy'][index],
195
- 'Sensitivity/Recall': pipeline_results['sensivity'][index],
196
- 'PPV/Precision': pipeline_results['ppv'][index],
197
- 'NPV': pipeline_results['npv'][index],
198
- 'F1-score': pipeline_results['f1score'][index],
199
- 'False positive rate': pipeline_results['fpr'][index],
200
- 'True positive rate': pipeline_results['tpr'][index]
201
- })
202
-
203
- metric_labels = list(classificationReports[0].keys()) # Assuming both reports have the same keys
204
-
205
- # Set the positions of the bars on the x-axis
206
- x = np.arange(len(metric_labels))
207
-
208
- # Set the width of the bars
209
- width = 0.35
210
-
211
- plt.figure(figsize=(12, 6))
212
-
213
- for index, report in enumerate(classificationReports):
214
- metric = list(report.values())
215
- plt.bar(x + (index - len(nodesList) / 2) * width / len(nodesList), metric, width / len(nodesList),
216
- label=nodesList[index], color=colors[index % len(colors)])
217
-
218
- # Adding labels, title, and legend
219
- plt.xlabel('Metrics')
220
- plt.ylabel('Values')
221
- plt.title('Comparison of Classification Report Metrics between Nodes')
222
- plt.xticks(ticks=x, labels=metric_labels, rotation=45)
223
- plt.legend()
224
-
225
- # Show plot
226
- plt.tight_layout()
227
- plt.show()
228
-
229
- return