mediml 0.9.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. MEDiml/MEDscan.py +1696 -0
  2. MEDiml/__init__.py +21 -0
  3. MEDiml/biomarkers/BatchExtractor.py +806 -0
  4. MEDiml/biomarkers/BatchExtractorTexturalFilters.py +840 -0
  5. MEDiml/biomarkers/__init__.py +16 -0
  6. MEDiml/biomarkers/diagnostics.py +125 -0
  7. MEDiml/biomarkers/get_oriented_bound_box.py +158 -0
  8. MEDiml/biomarkers/glcm.py +1602 -0
  9. MEDiml/biomarkers/gldzm.py +523 -0
  10. MEDiml/biomarkers/glrlm.py +1315 -0
  11. MEDiml/biomarkers/glszm.py +555 -0
  12. MEDiml/biomarkers/int_vol_hist.py +527 -0
  13. MEDiml/biomarkers/intensity_histogram.py +615 -0
  14. MEDiml/biomarkers/local_intensity.py +89 -0
  15. MEDiml/biomarkers/morph.py +1756 -0
  16. MEDiml/biomarkers/ngldm.py +780 -0
  17. MEDiml/biomarkers/ngtdm.py +414 -0
  18. MEDiml/biomarkers/stats.py +373 -0
  19. MEDiml/biomarkers/utils.py +389 -0
  20. MEDiml/filters/TexturalFilter.py +299 -0
  21. MEDiml/filters/__init__.py +9 -0
  22. MEDiml/filters/apply_filter.py +134 -0
  23. MEDiml/filters/gabor.py +215 -0
  24. MEDiml/filters/laws.py +283 -0
  25. MEDiml/filters/log.py +147 -0
  26. MEDiml/filters/mean.py +121 -0
  27. MEDiml/filters/textural_filters_kernels.py +1738 -0
  28. MEDiml/filters/utils.py +107 -0
  29. MEDiml/filters/wavelet.py +237 -0
  30. MEDiml/learning/DataCleaner.py +198 -0
  31. MEDiml/learning/DesignExperiment.py +480 -0
  32. MEDiml/learning/FSR.py +667 -0
  33. MEDiml/learning/Normalization.py +112 -0
  34. MEDiml/learning/RadiomicsLearner.py +714 -0
  35. MEDiml/learning/Results.py +2237 -0
  36. MEDiml/learning/Stats.py +694 -0
  37. MEDiml/learning/__init__.py +10 -0
  38. MEDiml/learning/cleaning_utils.py +107 -0
  39. MEDiml/learning/ml_utils.py +1015 -0
  40. MEDiml/processing/__init__.py +6 -0
  41. MEDiml/processing/compute_suv_map.py +121 -0
  42. MEDiml/processing/discretisation.py +149 -0
  43. MEDiml/processing/interpolation.py +275 -0
  44. MEDiml/processing/resegmentation.py +66 -0
  45. MEDiml/processing/segmentation.py +912 -0
  46. MEDiml/utils/__init__.py +25 -0
  47. MEDiml/utils/batch_patients.py +45 -0
  48. MEDiml/utils/create_radiomics_table.py +131 -0
  49. MEDiml/utils/data_frame_export.py +42 -0
  50. MEDiml/utils/find_process_names.py +16 -0
  51. MEDiml/utils/get_file_paths.py +34 -0
  52. MEDiml/utils/get_full_rad_names.py +21 -0
  53. MEDiml/utils/get_institutions_from_ids.py +16 -0
  54. MEDiml/utils/get_patient_id_from_scan_name.py +22 -0
  55. MEDiml/utils/get_patient_names.py +26 -0
  56. MEDiml/utils/get_radiomic_names.py +27 -0
  57. MEDiml/utils/get_scan_name_from_rad_name.py +22 -0
  58. MEDiml/utils/image_reader_SITK.py +37 -0
  59. MEDiml/utils/image_volume_obj.py +22 -0
  60. MEDiml/utils/imref.py +340 -0
  61. MEDiml/utils/initialize_features_names.py +62 -0
  62. MEDiml/utils/inpolygon.py +159 -0
  63. MEDiml/utils/interp3.py +43 -0
  64. MEDiml/utils/json_utils.py +78 -0
  65. MEDiml/utils/mode.py +31 -0
  66. MEDiml/utils/parse_contour_string.py +58 -0
  67. MEDiml/utils/save_MEDscan.py +30 -0
  68. MEDiml/utils/strfind.py +32 -0
  69. MEDiml/utils/textureTools.py +188 -0
  70. MEDiml/utils/texture_features_names.py +115 -0
  71. MEDiml/utils/write_radiomics_csv.py +47 -0
  72. MEDiml/wrangling/DataManager.py +1724 -0
  73. MEDiml/wrangling/ProcessDICOM.py +512 -0
  74. MEDiml/wrangling/__init__.py +3 -0
  75. mediml-0.9.9.dist-info/LICENSE.md +674 -0
  76. mediml-0.9.9.dist-info/METADATA +232 -0
  77. mediml-0.9.9.dist-info/RECORD +78 -0
  78. mediml-0.9.9.dist-info/WHEEL +4 -0
@@ -0,0 +1,694 @@
1
+ # Description: All the functions related to statistics (p-values, metrics, etc.)
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import List, Tuple
6
+ import warnings
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ import scipy
11
+ from sklearn import metrics
12
+
13
+ from MEDiml.utils.json_utils import load_json
14
+
15
+
16
+ class Stats:
17
+ """
18
+ A class to perform statistical analysis on experiment results.
19
+
20
+ This class provides methods to retrieve patient IDs, predictions, and metrics from experiment data,
21
+ as well as compute the p-values for model comparison using various methods.
22
+
23
+ Args:
24
+ path_experiment (Path): Path to the folder containing the experiment data.
25
+ experiment (str): Name of the experiment.
26
+ levels (List): List of radiomics levels to analyze.
27
+ modalities (List): List of modalities to analyze.
28
+
29
+ Attributes:
30
+ path_experiment (Path): Path to the folder containing the experiment data.
31
+ experiment (str): Name of the experiment.
32
+ levels (List): List of radiomics levels to analyze.
33
+ modalities (List): List of modalities to analyze.
34
+ """
35
+ def __init__(self, path_experiment: Path, experiment: str = "", levels: List = [], modalities: List = []):
36
+ # Initialization
37
+ self.path_experiment = path_experiment
38
+ self.experiment = experiment
39
+ self.levels = levels
40
+ self.modalities = modalities
41
+
42
+ # Safety assertion
43
+ self.__safety_assertion()
44
+
45
+ def __get_models_dicts(self, split_idx: int) -> Path:
46
+ """
47
+ Retrieves the models dictionaries for a given split.
48
+
49
+ Args:
50
+ split_idx (int): Index of the split.
51
+
52
+ Returns:
53
+ List: List of paths to the models dictionaries.
54
+ """
55
+ # Get level and modality
56
+ if len(self.modalities) == 1:
57
+ # Load ground truths and predictions
58
+ path_json_1 = self.__get_path_json(self.levels[0], self.modalities[0], split_idx)
59
+ path_json_2 = self.__get_path_json(self.levels[1], self.modalities[0], split_idx)
60
+ else:
61
+ # Load ground truths and predictions
62
+ path_json_1 = self.__get_path_json(self.levels[0], self.modalities[0], split_idx)
63
+ path_json_2 = self.__get_path_json(self.levels[0], self.modalities[1], split_idx)
64
+ return path_json_1, path_json_2
65
+
66
+ def __safety_assertion(self):
67
+ """
68
+ Asserts that the input parameters are correct.
69
+ """
70
+ if len(self.modalities) == 1:
71
+ assert len(self.levels) == 2, \
72
+ "For statistical analysis, the number of levels must be 2 for a single modality, or 1 for two modalities"
73
+ elif len(self.modalities) == 2:
74
+ assert len(self.levels) == 1, \
75
+ "For statistical analysis, the number of levels must be 1 for two modalities, or 2 for a single modality"
76
+ else:
77
+ raise ValueError("The number of modalities must be 1 or 2")
78
+
79
+ def __get_path_json(self, level: str, modality: str, split_idx: int) -> Path:
80
+ """
81
+ Retrieves the path to the models dictionary for a given split.
82
+
83
+ Args:
84
+ level (str): Radiomics level.
85
+ modality (str): Modality.
86
+ split_idx (int): Index of the split.
87
+
88
+ Returns:
89
+ Path: Path to the models dictionary.
90
+ """
91
+ return self.path_experiment / f'learn__{self.experiment}_{level}_{modality}' / f'test__{split_idx:03d}' / 'run_results.json'
92
+
93
+ def __get_patients_and_predictions(
94
+ self,
95
+ split_idx: int
96
+ ) -> tuple:
97
+ """
98
+ Retrieves patient IDs, predictions of both models for a given split.
99
+
100
+ Args:
101
+ split_idx (int): Index of the split.
102
+
103
+ Returns:
104
+ tuple: Tuple containing the patient IDs, predictions of the first model and predictions of the second model.
105
+ """
106
+ # Get models dicts
107
+ path_json_1, path_json_2 = self.__get_models_dicts(split_idx)
108
+
109
+ # Load models dicts
110
+ model_one = load_json(path_json_1)
111
+ model_two = load_json(path_json_2)
112
+
113
+ # Get name models
114
+ name_model_one = list(model_one.keys())[0]
115
+ name_model_two = list(model_two.keys())[0]
116
+
117
+ # Get predictions
118
+ predictions_one = np.array(model_one[name_model_one]['test']['response'])
119
+ predictions_one = np.reshape(predictions_one, (predictions_one.shape[0])).tolist()
120
+ predictions_two = np.array(model_two[name_model_two]['test']['response'])
121
+ predictions_two = np.reshape(predictions_two, (predictions_two.shape[0])).tolist()
122
+
123
+ # Get patients ids
124
+ patients_ids_one = model_one[name_model_one]['test']['patients']
125
+ patients_ids_two = model_two[name_model_two]['test']['patients']
126
+
127
+ # Check if the number of patients is the same
128
+ patients_delete = []
129
+ if len(patients_ids_one) > len(patients_ids_two):
130
+ # Warn the user
131
+ warnings.warn("The number of patients is different for both models. Patients will be deleted to match the number of patients.")
132
+
133
+ # Delete patients
134
+ for patient_id in patients_ids_one:
135
+ if patient_id not in patients_ids_two:
136
+ patients_delete.append(patient_id)
137
+ predictions_one.pop(patients_ids_one.index(patient_id))
138
+ for patient in patients_delete:
139
+ patients_ids_one.remove(patient)
140
+ elif len(patients_ids_one) < len(patients_ids_two):
141
+ # Warn the user
142
+ warnings.warn("The number of patients is different for both models. Patients will be deleted to match the number of patients.")
143
+
144
+ # Delete patients
145
+ for patient_id in patients_ids_two:
146
+ if patient_id not in patients_ids_one:
147
+ patients_delete.append(patient_id)
148
+ predictions_two.pop(patients_ids_two.index(patient_id))
149
+ for patient in patients_delete:
150
+ patients_ids_two.remove(patient)
151
+
152
+ # Check if the patient IDs are the same
153
+ if patients_ids_one != patients_ids_two:
154
+ raise ValueError("The patient IDs must be the same for both models")
155
+
156
+ # Check if the number of predictions is the same
157
+ if len(predictions_one) != len(predictions_two):
158
+ raise ValueError("The number of predictions must be the same for both models")
159
+
160
+ return patients_ids_one, predictions_one, predictions_two
161
+
162
+ def __calc_pvalue(self, aucs: np.array, sigma: float) -> float:
163
+ """
164
+ Computes p-values of the AUCs distribution.
165
+
166
+ Args:
167
+ aucs(np.array): 1D array of AUCs.
168
+ sigma (flaot): AUC DeLong covariances
169
+
170
+ Returns:
171
+ flaot: p-value of the AUCs.
172
+ """
173
+ l = np.array([[1, -1]])
174
+ z = np.abs(np.diff(aucs)) / np.sqrt(np.dot(np.dot(l, sigma), l.T))
175
+ p_value = 2 * scipy.stats.norm.sf(z, loc=0, scale=1)
176
+ return p_value
177
+
178
+ def __corrected_std(self, differences: np.array, n_train: int, n_test: int) -> float:
179
+ """
180
+ Corrects standard deviation using Nadeau and Bengio's approach.
181
+
182
+ Args:
183
+ differences (np.array): Vector containing the differences in the score metrics of two models.
184
+ n_train (int): Number of samples in the training set.
185
+ n_test (int): Number of samples in the testing set.
186
+
187
+ Returns:
188
+ float: Variance-corrected standard deviation of the set of differences.
189
+
190
+ Reference:
191
+ `Statistical comparison of models
192
+ <https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_stats.html#comparing-two-models-frequentist-approach>.`
193
+ """
194
+ # kr = k times r, r times repeated k-fold crossvalidation,
195
+ # kr equals the number of times the model was evaluated
196
+ kr = len(differences)
197
+ corrected_var = np.var(differences, ddof=1) * (1 / kr + n_test / n_train)
198
+ corrected_std = np.sqrt(corrected_var)
199
+ return corrected_std
200
+
201
+ def __compute_midrank(self, x: np.array) -> np.array:
202
+ """
203
+ Computes midranks for Delong p-value.
204
+ Args:
205
+ x(np.array): 1D array of probabilities.
206
+
207
+ Returns:
208
+ np.array: Midranks.
209
+ """
210
+ J = np.argsort(x)
211
+ Z = x[J]
212
+ N = len(x)
213
+ T = np.zeros(N, dtype=np.float)
214
+ i = 0
215
+ while i < N:
216
+ j = i
217
+ while j < N and Z[j] == Z[i]:
218
+ j += 1
219
+ T[i:j] = 0.5*(i + j - 1)
220
+ i = j
221
+ T2 = np.empty(N, dtype=np.float)
222
+ # Note(kazeevn) +1 is due to Python using 0-based indexing
223
+ # instead of 1-based in the AUC formula in the paper
224
+ T2[J] = T + 1
225
+ return T2
226
+
227
+ def __fast_delong(self, predictions_sorted_transposed: np.array, label_1_count: int) -> Tuple[float, float]:
228
+ """
229
+ Computes the empricial AUC and its covariance using the fast version of DeLong's method.
230
+
231
+ Args:
232
+ predictions_sorted_transposed (np.array): a 2D numpy.array[n_classifiers, n_examples]
233
+ sorted such as the examples with label "1" are first.
234
+ label_1_count (int): number of examples with label "1".
235
+
236
+ Returns:
237
+ Tuple(float, float): (AUC value, DeLong covariance)
238
+
239
+ Reference:
240
+ `Python fast delong implementation <https://github.com/yandexdataschool/roc_comparison/tree/master>.`
241
+ @article{sun2014fast,
242
+ title={Fast Implementation of DeLong's Algorithm for
243
+ Comparing the Areas Under Correlated Receiver Operating Characteristic Curves},
244
+ author={Xu Sun and Weichao Xu},
245
+ journal={IEEE Signal Processing Letters},
246
+ volume={21},
247
+ number={11},
248
+ pages={1389--1393},
249
+ year={2014},
250
+ publisher={IEEE}
251
+ }
252
+ """
253
+ # Short variables are named as they are in the paper
254
+ m = label_1_count
255
+ n = predictions_sorted_transposed.shape[1] - m
256
+ positive_examples = predictions_sorted_transposed[:, :m]
257
+ negative_examples = predictions_sorted_transposed[:, m:]
258
+ k = predictions_sorted_transposed.shape[0]
259
+
260
+ tx = np.empty([k, m], dtype=np.float)
261
+ ty = np.empty([k, n], dtype=np.float)
262
+ tz = np.empty([k, m + n], dtype=np.float)
263
+ for r in range(k):
264
+ tx[r, :] = self.__compute_midrank(positive_examples[r, :])
265
+ ty[r, :] = self.__compute_midrank(negative_examples[r, :])
266
+ tz[r, :] = self.__compute_midrank(predictions_sorted_transposed[r, :])
267
+ aucs = tz[:, :m].sum(axis=1) / m / n - float(m + 1.0) / 2.0 / n
268
+ v01 = (tz[:, :m] - tx[:, :]) / n
269
+ v10 = 1.0 - (tz[:, m:] - ty[:, :]) / m
270
+ sx = np.cov(v01)
271
+ sy = np.cov(v10)
272
+ delongcov = sx / m + sy / n
273
+
274
+ return aucs, delongcov
275
+
276
+ def __compute_ground_truth_statistics(self, ground_truth: np.array) -> Tuple[np.array, int]:
277
+ """
278
+ Computes the order of the ground truth and the number of positive examples.
279
+
280
+ Args:
281
+ ground_truth(np.array): np.array of 0 and 1.
282
+
283
+ Returns:
284
+ Tuple[np.array, int]: ground truth ordered and the number of positive examples.
285
+ """
286
+ assert np.array_equal(np.unique(ground_truth), [0, 1])
287
+ order = (-ground_truth).argsort()
288
+ label_1_count = int(ground_truth.sum())
289
+ return order, label_1_count
290
+
291
+ def __get_metrics(self, metric: str, split_idx: int) -> tuple:
292
+ """
293
+ Initializes the p-value information that will be used to compute the p-values across all different methods.
294
+
295
+ Args:
296
+ metric (str): Metric to retrieve.
297
+ split_idx (int): Index of the split.
298
+
299
+ Returns:
300
+ tuple: Tuple containing the metrics of the first model and metrics of the second model.
301
+ """
302
+ # Get models dicts
303
+ path_json_1, path_json_2 = self.__get_models_dicts(split_idx)
304
+
305
+ # Load models dicts
306
+ model_one = load_json(path_json_1)
307
+ model_two = load_json(path_json_2)
308
+
309
+ # Get name models
310
+ name_model_one = list(model_one.keys())[0]
311
+ name_model_two = list(model_two.keys())[0]
312
+
313
+ # Get predictions
314
+ metric_one = model_one[name_model_one]['test']['metrics'][metric]
315
+ metric_two = model_two[name_model_two]['test']['metrics'][metric]
316
+
317
+ return metric_one, metric_two
318
+
319
+ def __delong_roc_test(self, ground_truth: np.array, predictions_one: list, predictions_two: list) -> float:
320
+ """
321
+ Computes log(p-value) for hypothesis that two ROC AUCs are different
322
+
323
+ Args:
324
+ ground_truth(np.array): np.array of 0 and 1
325
+ predictions_one(np.array): np.array of floats of the probability of being class 1 for the first model.
326
+ predictions_two(np.array): np.array of floats of the probability of being class 1 for the second model.
327
+
328
+ Returns:
329
+ flaot: p-value of the AUCs.
330
+ """
331
+ order, label_1_count = self.__compute_ground_truth_statistics(ground_truth)
332
+ predictions_sorted_transposed = np.vstack((predictions_one, predictions_two))[:, order]
333
+ aucs, delongcov = self.__fast_delong(predictions_sorted_transposed, label_1_count)
334
+ return self.__calc_pvalue(aucs, delongcov)
335
+
336
+ @staticmethod
337
+ def get_aggregated_metric(
338
+ path_experiment: Path,
339
+ experiment: str,
340
+ level: str,
341
+ modality: str,
342
+ metric: str
343
+ ) -> float:
344
+ """
345
+ Calculates the p-value of the Delong test for the given experiment.
346
+
347
+ Args:
348
+ path_experiment (Path): Path to the folder containing the experiment.
349
+ experiment (str): Name of the experiment.
350
+ level (str): Radiomics level. For example: 'morph'.
351
+ modality (str): Modality to analyze.
352
+ metric (str): Metric to analyze.
353
+
354
+ Returns:
355
+ float: p-value of the Delong test.
356
+ """
357
+
358
+ # Load outcomes dataframe
359
+ try:
360
+ outcomes = pd.read_csv(path_experiment / "outcomes.csv", sep=',')
361
+ except:
362
+ outcomes = pd.read_csv(path_experiment.parent / "outcomes.csv", sep=',')
363
+
364
+ # Initialization
365
+ predictions_all = list()
366
+ patients_ids_all = list()
367
+ nb_split = len([x[0] for x in os.walk(path_experiment / f'learn__{experiment}_{level}_{modality}')]) - 1
368
+
369
+ # For each split
370
+ for i in range(1, nb_split + 1):
371
+ # Load ground truths and predictions
372
+ path_json = path_experiment / f'learn__{experiment}_{level}_{modality}' / f'test__{i:03d}' / 'run_results.json'
373
+
374
+ # Load models dicts
375
+ model = load_json(path_json)
376
+
377
+ # Get name models
378
+ name_model = list(model.keys())[0]
379
+
380
+ # Get Model's threshold
381
+ thresh = model[name_model]['threshold']
382
+
383
+ # Get predictions
384
+ predictions = np.array(model[name_model]['test']['response'])
385
+ predictions = np.reshape(predictions, (predictions.shape[0])).tolist()
386
+
387
+ # Bring all predictions to 0.5
388
+ predictions = [prediction - thresh + 0.5 if thresh >= 0.5 else prediction + 0.5 - thresh for prediction in predictions]
389
+ predictions_all.extend(predictions)
390
+
391
+ # Get patients ids
392
+ patients_ids = model[name_model]['test']['patients']
393
+
394
+ # After verification, add-up patients IDs
395
+ patients_ids_all.extend(patients_ids)
396
+
397
+ # Get ground truth for selected patients
398
+ ground_truth = []
399
+ for patient in patients_ids_all:
400
+ ground_truth.append(outcomes[outcomes['PatientID'] == patient][outcomes.columns[-1]].values[0])
401
+
402
+ # to numpy array
403
+ ground_truth = np.array(ground_truth)
404
+
405
+ # Get aggregated metric
406
+ # AUC
407
+ if metric == 'AUC':
408
+ auc = metrics.roc_auc_score(ground_truth, predictions_all)
409
+ return auc
410
+
411
+ # AUPRC
412
+ elif metric == 'AUPRC':
413
+ auc = metrics.average_precision_score(ground_truth, predictions_all)
414
+
415
+ # Confusion matrix-based metrics
416
+ else:
417
+ TP = ((np.array(predictions_all) >= 0.5) & (ground_truth == 1)).sum()
418
+ TN = ((np.array(predictions_all) < 0.5) & (ground_truth == 0)).sum()
419
+ FP = ((np.array(predictions_all) >= 0.5) & (ground_truth == 0)).sum()
420
+ FN = ((np.array(predictions_all) < 0.5) & (ground_truth == 1)).sum()
421
+
422
+ # Asserts
423
+ assert TP + FN != 0, "TP + FN = 0, Division by 0"
424
+ assert TN + FP != 0, "TN + FP = 0, Division by 0"
425
+
426
+ # Sensitivity
427
+ if metric == 'Sensitivity':
428
+ sensitivity = TP / (TP + FN)
429
+ return sensitivity
430
+
431
+ # Specificity
432
+ elif metric == 'Specificity':
433
+ specificity = TN / (TN + FP)
434
+ return specificity
435
+
436
+ else:
437
+ raise ValueError(f"Metric {metric} not supported. Supported metrics: AUC, AUPRC, Sensitivity, Specificity.\
438
+ Update file Stats.py to add the new metric.")
439
+
440
+ def get_aggregated_delong_p_value(self) -> float:
441
+ """
442
+ Calculates the p-value of the Delong test for the given experiment.
443
+
444
+ Returns:
445
+ float: p-value of the Delong test.
446
+ """
447
+
448
+ # Load outcomes dataframe
449
+ try:
450
+ outcomes = pd.read_csv(self.path_experiment / "outcomes.csv", sep=',')
451
+ except:
452
+ outcomes = pd.read_csv(self.path_experiment.parent / "outcomes.csv", sep=',')
453
+
454
+ # Initialization
455
+ predictions_one_all = list()
456
+ predictions_two_all = list()
457
+ patients_ids_all = list()
458
+ nb_split = len([x[0] for x in os.walk(self.path_experiment / f'learn__{self.experiment}_{self.levels[0]}_{self.modalities[0]}')]) - 1
459
+
460
+ # For each split
461
+ for i in range(1, nb_split + 1):
462
+ # Get predictions and patients ids
463
+ patients_ids, predictions_one, predictions_two = self.__get_patients_and_predictions(i)
464
+
465
+ # Add-up all information
466
+ predictions_one_all.extend(predictions_one)
467
+ predictions_two_all.extend(predictions_two)
468
+ patients_ids_all.extend(patients_ids)
469
+
470
+ # Get ground truth for selected patients
471
+ ground_truth = []
472
+ for patient in patients_ids_all:
473
+ ground_truth.append(outcomes[outcomes['PatientID'] == patient][outcomes.columns[-1]].values[0])
474
+
475
+ # to numpy array
476
+ ground_truth = np.array(ground_truth)
477
+
478
+ # Get p-value
479
+ pvalue = self.__delong_roc_test(ground_truth, predictions_one_all, predictions_two_all).item()
480
+
481
+ # Compute the median p-value of all splits
482
+ return pvalue
483
+
484
+ def get_bengio_p_value(self) -> float:
485
+ """
486
+ Computes Bengio's right-tailed paired t-test with corrected variance.
487
+
488
+ Returns:
489
+ float: p-value of the Bengio test.
490
+ """
491
+
492
+ # Initialization
493
+ metrics_one_all = list()
494
+ metrics_two_all = list()
495
+ nb_split = len([x[0] for x in os.walk(self.path_experiment / f'learn__{self.experiment}_{self.levels[0]}_{self.modalities[0]}')]) - 1
496
+
497
+ # For each split
498
+ for i in range(1, nb_split + 1):
499
+ # Get models dicts
500
+ path_json_1, path_json_2 = self.__get_models_dicts(i)
501
+
502
+ # Load patients train and test lists
503
+ patients_train = load_json(path_json_1.parent / 'patientsTrain.json')
504
+ patients_test = load_json(path_json_1.parent / 'patientsTest.json')
505
+ n_train = len(patients_train)
506
+ n_test = len(patients_test)
507
+
508
+ # Load models dicts
509
+ model_one = load_json(path_json_1)
510
+ model_two = load_json(path_json_2)
511
+
512
+ # Get name models
513
+ name_model_one = list(model_one.keys())[0]
514
+ name_model_two = list(model_two.keys())[0]
515
+
516
+ # Get predictions
517
+ metric_one = model_one[name_model_one]['test']['metrics']['AUC']
518
+ metric_two = model_two[name_model_two]['test']['metrics']['AUC']
519
+
520
+ # Add-up all information
521
+ metrics_one_all.append(metric_one)
522
+ metrics_two_all.append(metric_two)
523
+
524
+ # Check if the number of predictions is the same
525
+ if len(metrics_one_all) != len(metrics_two_all):
526
+ raise ValueError("The number of metrics must be the same for both models")
527
+
528
+ # Get differences
529
+ differences = np.array(metrics_one_all) - np.array(metrics_two_all)
530
+ df = differences.shape[0] - 1
531
+
532
+ # Get corrected std
533
+ mean = np.mean(differences)
534
+ std = self.__corrected_std(differences, n_train, n_test)
535
+
536
+ # Get p-value
537
+ t_stat = mean / std
538
+ p_val = scipy.stats.t.sf(np.abs(t_stat), df) # right-tailed t-test
539
+
540
+ return p_val
541
+
542
+ def get_delong_p_value(
543
+ self,
544
+ aggregate: bool = False,
545
+ ) -> float:
546
+ """
547
+ Calculates the p-value of the Delong test for the given experiment.
548
+
549
+ Args:
550
+ aggregate (bool, optional): If True, aggregates the results of all the splits and computes one final p-value.
551
+
552
+ Returns:
553
+ float: p-value of the Delong test.
554
+ """
555
+
556
+ # Check if aggregation is needed
557
+ if aggregate:
558
+ return self.get_aggregated_delong_p_value()
559
+
560
+ # Load outcomes dataframe
561
+ try:
562
+ outcomes = pd.read_csv(self.path_experiment / "outcomes.csv", sep=',')
563
+ except:
564
+ outcomes = pd.read_csv(self.path_experiment.parent / "outcomes.csv", sep=',')
565
+
566
+ # Initialization
567
+ nb_split = len([x[0] for x in os.walk(self.path_experiment / f'learn__{self.experiment}_{self.levels[0]}_{self.modalities[0]}')]) - 1
568
+ list_p_values_temp = list()
569
+
570
+ # For each split
571
+ for i in range(1, nb_split + 1):
572
+ # Get predictions and patients ids
573
+ patients_ids, predictions_one, predictions_two = self.__get_patients_and_predictions(i)
574
+
575
+ # Get ground truth for selected patients
576
+ ground_truth = []
577
+ for patient in patients_ids:
578
+ ground_truth.append(outcomes[outcomes['PatientID'] == patient][outcomes.columns[-1]].values[0])
579
+
580
+ # to numpy array
581
+ ground_truth = np.array(ground_truth)
582
+
583
+ # Get p-value
584
+ pvalue = self.__delong_roc_test(ground_truth, predictions_one, predictions_two).item()
585
+
586
+ list_p_values_temp.append(pvalue)
587
+
588
+ # Compute the median p-value of all splits
589
+ return np.median(list_p_values_temp)
590
+
591
+ def get_ttest_p_value(self, metric: str = 'AUC',) -> float:
592
+ """
593
+ Calculates the p-value using the t-test for two related samples of scores.
594
+
595
+ Args:
596
+ metric (str, optional): Metric to use for comparison. Defaults to 'AUC'.
597
+
598
+ Returns:
599
+ float: p-value of the Delong test.
600
+ """
601
+
602
+ # Initialization
603
+ metric = metric.split('_')[0] if '_' in metric else metric
604
+ metrics_one_all = list()
605
+ metrics_two_all = list()
606
+ nb_split = len([x[0] for x in os.walk(self.path_experiment / f'learn__{self.experiment}_{self.levels[0]}_{self.modalities[0]}')]) - 1
607
+
608
+ # For each split
609
+ for i in range(1, nb_split + 1):
610
+ # Get metrics of the first and second model
611
+ metric_one, metric_two = self.__get_metrics(metric, i)
612
+
613
+ # Add-up all information
614
+ metrics_one_all.append(metric_one)
615
+ metrics_two_all.append(metric_two)
616
+
617
+ # Check if the number of predictions is the same
618
+ if len(metrics_one_all) != len(metrics_two_all):
619
+ raise ValueError("The number of metrics must be the same for both models")
620
+
621
+ # Compute p-value by performing paired t-test
622
+ _, p_value = scipy.stats.ttest_rel(metrics_one_all, metrics_two_all)
623
+
624
+ return p_value
625
+
626
+ def get_wilcoxin_p_value(self, metric: str = 'AUC',) -> float:
627
+ """
628
+ Calculates the p-value using the t-test for two related samples of scores.
629
+
630
+ Args:
631
+ metric (str, optional): Metric to analyze. Defaults to 'AUC'.
632
+
633
+ Returns:
634
+ float: p-value of the Delong test.
635
+ """
636
+
637
+ # Initialization
638
+ metric = metric.split('_')[0] if '_' in metric else metric
639
+ metrics_one_all = list()
640
+ metrics_two_all = list()
641
+ nb_split = len([x[0] for x in os.walk(self.path_experiment / f'learn__{self.experiment}_{self.levels[0]}_{self.modalities[0]}')]) - 1
642
+
643
+ # For each split
644
+ for i in range(1, nb_split + 1):
645
+ # Get metrics of the first and second model
646
+ metric_one, metric_two = self.__get_metrics(metric, i)
647
+
648
+ # Add-up all information
649
+ metrics_one_all.append(metric_one)
650
+ metrics_two_all.append(metric_two)
651
+
652
+ # Check if the number of predictions is the same
653
+ if len(metrics_one_all) != len(metrics_two_all):
654
+ raise ValueError("The number of metrics must be the same for both models")
655
+
656
+ # Compute p-value by performing wilcoxon signed rank test
657
+ _, p_value = scipy.stats.wilcoxon(metrics_one_all, metrics_two_all)
658
+
659
+ return p_value
660
+
661
+ def get_p_value(
662
+ self,
663
+ method: str,
664
+ metric: str = 'AUC',
665
+ aggregate: bool = False
666
+ ) -> float:
667
+ """
668
+ Calculates the p-value of the given method.
669
+
670
+ Args:
671
+ method (str): Method to use to calculate the p-value. Available options:
672
+ - 'delong': Delong test.
673
+ - 'ttest': T-test.
674
+ - 'wilcoxon': Wilcoxon signed rank test.
675
+ - 'bengio': Bengio and Nadeau corrected t-test.
676
+ metric (str, optional): Metric to analyze. Defaults to 'AUC'.
677
+ aggregate (bool, optional): If True, aggregates the results of all the splits and computes one final p-value.
678
+
679
+ Returns:
680
+ float: p-value of the Delong test.
681
+ """
682
+ # Assertions
683
+ assert method in ['delong', 'ttest', 'wilcoxon', 'bengio'], \
684
+ f'method must be either "delong", "ttest", "wilcoxon" or "bengio". Given: {method}'
685
+
686
+ # Get p-value
687
+ if method == 'delong':
688
+ return self.get_delong_p_value(aggregate)
689
+ elif method == 'ttest':
690
+ return self.get_ttest_p_value(metric)
691
+ elif method == 'wilcoxon':
692
+ return self.get_wilcoxin_p_value(metric)
693
+ elif method == 'bengio':
694
+ return self.get_bengio_p_value()
@@ -0,0 +1,10 @@
1
+ from . import *
2
+ from .cleaning_utils import *
3
+ from .DataCleaner import DataCleaner
4
+ from .DesignExperiment import DesignExperiment
5
+ from .FSR import FSR
6
+ from .ml_utils import *
7
+ from .Normalization import Normalization
8
+ from .RadiomicsLearner import RadiomicsLearner
9
+ from .Results import Results
10
+ from .Stats import Stats