mediml 0.9.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. MEDiml/MEDscan.py +1696 -0
  2. MEDiml/__init__.py +21 -0
  3. MEDiml/biomarkers/BatchExtractor.py +806 -0
  4. MEDiml/biomarkers/BatchExtractorTexturalFilters.py +840 -0
  5. MEDiml/biomarkers/__init__.py +16 -0
  6. MEDiml/biomarkers/diagnostics.py +125 -0
  7. MEDiml/biomarkers/get_oriented_bound_box.py +158 -0
  8. MEDiml/biomarkers/glcm.py +1602 -0
  9. MEDiml/biomarkers/gldzm.py +523 -0
  10. MEDiml/biomarkers/glrlm.py +1315 -0
  11. MEDiml/biomarkers/glszm.py +555 -0
  12. MEDiml/biomarkers/int_vol_hist.py +527 -0
  13. MEDiml/biomarkers/intensity_histogram.py +615 -0
  14. MEDiml/biomarkers/local_intensity.py +89 -0
  15. MEDiml/biomarkers/morph.py +1756 -0
  16. MEDiml/biomarkers/ngldm.py +780 -0
  17. MEDiml/biomarkers/ngtdm.py +414 -0
  18. MEDiml/biomarkers/stats.py +373 -0
  19. MEDiml/biomarkers/utils.py +389 -0
  20. MEDiml/filters/TexturalFilter.py +299 -0
  21. MEDiml/filters/__init__.py +9 -0
  22. MEDiml/filters/apply_filter.py +134 -0
  23. MEDiml/filters/gabor.py +215 -0
  24. MEDiml/filters/laws.py +283 -0
  25. MEDiml/filters/log.py +147 -0
  26. MEDiml/filters/mean.py +121 -0
  27. MEDiml/filters/textural_filters_kernels.py +1738 -0
  28. MEDiml/filters/utils.py +107 -0
  29. MEDiml/filters/wavelet.py +237 -0
  30. MEDiml/learning/DataCleaner.py +198 -0
  31. MEDiml/learning/DesignExperiment.py +480 -0
  32. MEDiml/learning/FSR.py +667 -0
  33. MEDiml/learning/Normalization.py +112 -0
  34. MEDiml/learning/RadiomicsLearner.py +714 -0
  35. MEDiml/learning/Results.py +2237 -0
  36. MEDiml/learning/Stats.py +694 -0
  37. MEDiml/learning/__init__.py +10 -0
  38. MEDiml/learning/cleaning_utils.py +107 -0
  39. MEDiml/learning/ml_utils.py +1015 -0
  40. MEDiml/processing/__init__.py +6 -0
  41. MEDiml/processing/compute_suv_map.py +121 -0
  42. MEDiml/processing/discretisation.py +149 -0
  43. MEDiml/processing/interpolation.py +275 -0
  44. MEDiml/processing/resegmentation.py +66 -0
  45. MEDiml/processing/segmentation.py +912 -0
  46. MEDiml/utils/__init__.py +25 -0
  47. MEDiml/utils/batch_patients.py +45 -0
  48. MEDiml/utils/create_radiomics_table.py +131 -0
  49. MEDiml/utils/data_frame_export.py +42 -0
  50. MEDiml/utils/find_process_names.py +16 -0
  51. MEDiml/utils/get_file_paths.py +34 -0
  52. MEDiml/utils/get_full_rad_names.py +21 -0
  53. MEDiml/utils/get_institutions_from_ids.py +16 -0
  54. MEDiml/utils/get_patient_id_from_scan_name.py +22 -0
  55. MEDiml/utils/get_patient_names.py +26 -0
  56. MEDiml/utils/get_radiomic_names.py +27 -0
  57. MEDiml/utils/get_scan_name_from_rad_name.py +22 -0
  58. MEDiml/utils/image_reader_SITK.py +37 -0
  59. MEDiml/utils/image_volume_obj.py +22 -0
  60. MEDiml/utils/imref.py +340 -0
  61. MEDiml/utils/initialize_features_names.py +62 -0
  62. MEDiml/utils/inpolygon.py +159 -0
  63. MEDiml/utils/interp3.py +43 -0
  64. MEDiml/utils/json_utils.py +78 -0
  65. MEDiml/utils/mode.py +31 -0
  66. MEDiml/utils/parse_contour_string.py +58 -0
  67. MEDiml/utils/save_MEDscan.py +30 -0
  68. MEDiml/utils/strfind.py +32 -0
  69. MEDiml/utils/textureTools.py +188 -0
  70. MEDiml/utils/texture_features_names.py +115 -0
  71. MEDiml/utils/write_radiomics_csv.py +47 -0
  72. MEDiml/wrangling/DataManager.py +1724 -0
  73. MEDiml/wrangling/ProcessDICOM.py +512 -0
  74. MEDiml/wrangling/__init__.py +3 -0
  75. mediml-0.9.9.dist-info/LICENSE.md +674 -0
  76. mediml-0.9.9.dist-info/METADATA +232 -0
  77. mediml-0.9.9.dist-info/RECORD +78 -0
  78. mediml-0.9.9.dist-info/WHEEL +4 -0
@@ -0,0 +1,2237 @@
1
+ # Description: Class Results to store and analyze the results of experiments.
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import List
6
+
7
+ import matplotlib.patches as mpatches
8
+ import matplotlib.pyplot as plt
9
+ import networkx as nx
10
+ import numpy as np
11
+ import pandas as pd
12
+ import seaborn as sns
13
+ from matplotlib import pyplot as plt
14
+ from matplotlib.colors import to_rgba
15
+ from matplotlib.lines import Line2D
16
+ from networkx.drawing.nx_pydot import graphviz_layout
17
+ from numpyencoder import NumpyEncoder
18
+ from sklearn import metrics
19
+
20
+ from MEDiml.learning.ml_utils import feature_imporance_analysis, list_metrics
21
+ from MEDiml.learning.Stats import Stats
22
+ from MEDiml.utils.json_utils import load_json, save_json
23
+ from MEDiml.utils.texture_features_names import *
24
+
25
+
26
+ class Results:
27
+ """
28
+ A class to analyze the results of a given machine learning experiment, including the assessment of the model's performance,
29
+
30
+ Args:
31
+ model_dict (dict, optional): Dictionary containing the model's parameters. Defaults to {}.
32
+ model_id (str, optional): ID of the model. Defaults to "".
33
+
34
+ Attributes:
35
+ model_dict (dict): Dictionary containing the model's parameters.
36
+ model_id (str): ID of the model.
37
+ results_dict (dict): Dictionary containing the results of the model's performance.
38
+ """
39
+ def __init__(self, model_dict: dict = {}, model_id: str = "") -> None:
40
+ """
41
+ Constructor of the class Results
42
+ """
43
+ self.model_dict = model_dict
44
+ self.model_id = model_id
45
+ self.results_dict = {}
46
+
47
+ def __calculate_performance(
48
+ self,
49
+ response: list,
50
+ labels: pd.DataFrame,
51
+ thresh: float
52
+ ) -> dict:
53
+ """
54
+ Computes performance metrics of given a model's response, outcome and threshold.
55
+
56
+ Args:
57
+ response (list): List of the probabilities of class "1" for all instances (prediction)
58
+ labels (pd.Dataframe): Column vector specifying the outcome status (1 or 0) for all instances.
59
+ thresh (float): Optimal threshold selected from the ROC curve.
60
+
61
+ Returns:
62
+ Dict: Dictionary containing the performance metrics.
63
+ """
64
+ # Recording results
65
+ results_dict = dict()
66
+
67
+ # Removing Nans
68
+ df = labels.copy()
69
+ outcome_name = labels.columns.values[0]
70
+ df['response'] = response
71
+ df.dropna(axis=0, how='any', inplace=True)
72
+
73
+ # Confusion matrix elements:
74
+ results_dict['TP'] = ((df['response'] >= thresh) & (df[outcome_name] == 1)).sum()
75
+ results_dict['TN'] = ((df['response'] < thresh) & (df[outcome_name] == 0)).sum()
76
+ results_dict['FP'] = ((df['response'] >= thresh) & (df[outcome_name] == 0)).sum()
77
+ results_dict['FN'] = ((df['response'] < thresh) & (df[outcome_name] == 1)).sum()
78
+
79
+ # Copying confusion matrix elements
80
+ TP = results_dict['TP']
81
+ TN = results_dict['TN']
82
+ FP = results_dict['FP']
83
+ FN = results_dict['FN']
84
+
85
+ # AUC
86
+ results_dict['AUC'] = metrics.roc_auc_score(df[outcome_name], df['response'])
87
+
88
+ # AUPRC
89
+ results_dict['AUPRC'] = metrics.average_precision_score(df[outcome_name], df['response'])
90
+
91
+ # Sensitivity
92
+ try:
93
+ results_dict['Sensitivity'] = TP / (TP + FN)
94
+ except:
95
+ print('TP + FN = 0, Division by 0, replacing sensitivity by 0.0')
96
+ results_dict['Sensitivity'] = 0.0
97
+
98
+ # Specificity
99
+ try:
100
+ results_dict['Specificity'] = TN / (TN + FP)
101
+ except:
102
+ print('TN + FP= 0, Division by 0, replacing specificity by 0.0')
103
+ results_dict['Specificity'] = 0.0
104
+
105
+ # Balanced accuracy
106
+ results_dict['BAC'] = (results_dict['Sensitivity'] + results_dict['Specificity']) / 2
107
+
108
+ # Precision
109
+ results_dict['Precision'] = TP / (TP + FP)
110
+
111
+ # NPV (Negative Predictive Value)
112
+ results_dict['NPV'] = TN / (TN + FN)
113
+
114
+ # Accuracy
115
+ results_dict['Accuracy'] = (TP + TN) / (TP + TN + FP + FN)
116
+
117
+ # F1 score
118
+ results_dict['F1_score'] = 2 * TP / (2 * TP + FP + FN)
119
+
120
+ # mcc (mathews correlation coefficient)
121
+ results_dict['MCC'] = (TP * TN - FP * FN) / np.sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))
122
+
123
+ return results_dict
124
+
125
+ def __get_metrics_failure_dict(
126
+ self,
127
+ metrics: list = list_metrics
128
+ ) -> dict:
129
+ """
130
+ This function fills the metrics with NaNs in case of failure.
131
+
132
+ Args:
133
+ metrics (list, optional): List of metrics to be filled with NaNs.
134
+ Defaults to ['AUC', 'Sensitivity', 'Specificity', 'BAC',
135
+ 'AUPRC', 'Precision', 'NPV', 'Accuracy', 'F1_score', 'MCC'
136
+ 'TP', 'TN', 'FP', 'FN'].
137
+
138
+ Returns:
139
+ Dict: Dictionary with the metrics filled with NaNs.
140
+ """
141
+ failure_struct = dict()
142
+ failure_struct = {metric: np.nan for metric in metrics}
143
+
144
+ return failure_struct
145
+
146
+ def __count_percentage_levels(self, features_dict: dict, fda: bool = False) -> list:
147
+ """
148
+ Counts the percentage of each radiomics level in a given features dictionary.
149
+
150
+ Args:
151
+ features_dict (dict): Dictionary of features.
152
+ fda (bool, optional): If True, meaning the features are from the FDA logging dict and will be
153
+ treated differently. Defaults to False.
154
+
155
+ Returns:
156
+ list: List of percentages of features in each complexity levels.
157
+ """
158
+ # Intialization
159
+ perc_levels = [0] * 7 # 4 levels and two variants for the filters
160
+
161
+ # List all features in dict
162
+ if fda:
163
+ list_features = [feature.split('/')[-1] for feature in features_dict['final']]
164
+ else:
165
+ list_features = list(features_dict.keys())
166
+
167
+ # Count the percentage of levels
168
+ for feature in list_features:
169
+ level_name = feature.split('__')[1].lower()
170
+ feature_name = feature.split('__')[2].lower()
171
+ # Morph
172
+ if level_name.startswith('morph'):
173
+ perc_levels[0] += 1
174
+ # Intensity
175
+ elif level_name.startswith('intensity'):
176
+ perc_levels[1] += 1
177
+ # Texture
178
+ elif level_name.startswith('texture'):
179
+ perc_levels[2] += 1
180
+ # Linear filters
181
+ elif level_name.startswith('mean') \
182
+ or level_name.startswith('log') \
183
+ or level_name.startswith('laws') \
184
+ or level_name.startswith('gabor') \
185
+ or level_name.startswith('wavelet') \
186
+ or level_name.startswith('coif'):
187
+ # seperate intensity and texture
188
+ if feature_name.startswith('_int'):
189
+ perc_levels[3] += 1
190
+ elif feature_name.startswith(tuple(['_glcm', '_gldzm', '_glrlm', '_glszm', '_ngtdm', '_ngldm'])):
191
+ perc_levels[4] += 1
192
+ # Textural filters
193
+ elif level_name.startswith('glcm'):
194
+ # seperate intensity and texture
195
+ if feature_name.startswith('_int'):
196
+ perc_levels[5] += 1
197
+ elif feature_name.startswith(tuple(['_glcm', '_gldzm', '_glrlm', '_glszm', '_ngtdm', '_ngldm'])):
198
+ perc_levels[6] += 1
199
+
200
+ return perc_levels / np.sum(perc_levels, axis=0) * 100
201
+
202
+ def __count_percentage_radiomics(self, results_dict: dict) -> list:
203
+ """
204
+ Counts the percentage of radiomics levels for all features used for the experiment.
205
+
206
+ Args:
207
+ results_dict (dict): Dictionary of final run results.
208
+
209
+ Returns:
210
+ list: List of percentages of features used for the model sorted by complexity levels.
211
+ """
212
+ # Intialization
213
+ perc_levels = [0] * 5 # 5 levels: morph, intensity, texture, linear filters, textural filters
214
+ model_name = list(results_dict.keys())[0]
215
+ radiomics_tables_dict = results_dict[model_name]['var_info']['normalization']
216
+
217
+ # Count the percentage of levels
218
+ for key in list(radiomics_tables_dict.keys()):
219
+ if key.lower().startswith('radtab'):
220
+ table_path = radiomics_tables_dict[key]['original_data']['path_radiomics_csv']
221
+ table_name = table_path.split('/')[-1]
222
+ table = pd.read_csv(table_path, index_col=0)
223
+ # Morph
224
+ if 'morph' in table_name.lower():
225
+ perc_levels[0] += table.columns.shape[0]
226
+ # Intensity
227
+ elif 'intensity' in table_name.lower():
228
+ perc_levels[1] += table.columns.shape[0]
229
+ # Texture
230
+ elif 'texture' in table_name.lower():
231
+ perc_levels[2] += table.columns.shape[0]
232
+ # Linear filters
233
+ elif 'mean' in table_name.lower() \
234
+ or 'log' in table_name.lower() \
235
+ or 'laws' in table_name.lower() \
236
+ or 'gabor' in table_name.lower() \
237
+ or 'wavelet' in table_name.lower() \
238
+ or 'coif' in table_name.lower():
239
+ perc_levels[3] += table.columns.shape[0]
240
+ # Textural filters
241
+ elif 'glcm' in table_name.lower():
242
+ perc_levels[4] += table.columns.shape[0]
243
+
244
+ return perc_levels / np.sum(perc_levels, axis=0) * 100
245
+
246
+ def __count_stable_fda(self, features_dict: dict) -> list:
247
+ """
248
+ Counts the percentage of levels in the features dictionary.
249
+
250
+ Args:
251
+ features_dict (dict): Dictionary of features.
252
+
253
+ Returns:
254
+ list: List of percentages of features in each complexity levels.
255
+ """
256
+ # Intialization
257
+ count_levels = [0] * 5 # 5 levels and two variants for the filters
258
+
259
+ # List all features in dict
260
+ features_dict = features_dict["one_space"]["unstable"]
261
+ list_features = list(features_dict.keys())
262
+
263
+ # Count the percentage of levels
264
+ for feature_name in list_features:
265
+ # Morph
266
+ if feature_name.lower().startswith('morph'):
267
+ count_levels[0] += features_dict[feature_name]
268
+ # Intensity
269
+ elif feature_name.lower().startswith('intensity'):
270
+ count_levels[1] += features_dict[feature_name]
271
+ # Texture
272
+ elif feature_name.lower().startswith('texture'):
273
+ count_levels[2] += features_dict[feature_name]
274
+ # Linear filters
275
+ elif feature_name.lower().startswith('mean') \
276
+ or feature_name.lower().startswith('log') \
277
+ or feature_name.lower().startswith('laws') \
278
+ or feature_name.lower().startswith('gabor') \
279
+ or feature_name.lower().startswith('wavelet') \
280
+ or feature_name.lower().startswith('coif'):
281
+ count_levels[3] += features_dict[feature_name]
282
+ # Textural filters
283
+ elif feature_name.lower().startswith('glcm'):
284
+ count_levels[4] += features_dict[feature_name]
285
+
286
+ return count_levels
287
+
288
+ def __count_patients(self, path_results: Path) -> dict:
289
+ """
290
+ Counts the number of patients used in learning, testing and holdout.
291
+
292
+ Args:
293
+ path_results(Path): path to the folder containing the results of the experiment.
294
+
295
+ Returns:
296
+ Dict: Dictionary with the number of patients used in learning, testing and holdout.
297
+ """
298
+ # Get all tests paths
299
+ list_path_tests = [path for path in path_results.iterdir() if path.is_dir()]
300
+
301
+ # Initialize dictionaries
302
+ patients_count = {
303
+ 'train': {},
304
+ 'test': {},
305
+ 'holdout': {}
306
+ }
307
+
308
+ # Process metrics
309
+ for dataset in ['train', 'test', 'holdout']:
310
+ for path_test in list_path_tests:
311
+ results_dict = load_json(path_test / 'run_results.json')
312
+ if dataset in results_dict[list(results_dict.keys())[0]].keys():
313
+ if 'patients' in results_dict[list(results_dict.keys())[0]][dataset].keys():
314
+ if results_dict[list(results_dict.keys())[0]][dataset]['patients']:
315
+ patients_count[dataset] = len(results_dict[list(results_dict.keys())[0]][dataset]['patients'])
316
+ else:
317
+ continue
318
+ else:
319
+ continue
320
+ break # The number of patients is the same for all the runs
321
+
322
+ return patients_count
323
+
324
+ def average_results(self, path_results: Path, save: bool = False) -> None:
325
+ """
326
+ Averages the results (AUC, BAC, Sensitivity and Specifity) of all the runs of the same experiment,
327
+ for training, testing and holdout sets.
328
+
329
+ Args:
330
+ path_results(Path): path to the folder containing the results of the experiment.
331
+ save (bool, optional): If True, saves the results in the same folder as the model.
332
+
333
+ Returns:
334
+ None.
335
+ """
336
+ # Get all tests paths
337
+ list_path_tests = [path for path in path_results.iterdir() if path.is_dir()]
338
+
339
+ # Initialize dictionaries
340
+ results_avg = {
341
+ 'train': {},
342
+ 'test': {},
343
+ 'holdout': {}
344
+ }
345
+
346
+ # Retrieve metrics
347
+ for dataset in ['train', 'test', 'holdout']:
348
+ dataset_dict = results_avg[dataset]
349
+ for metric in list_metrics:
350
+ metric_values = []
351
+ for path_test in list_path_tests:
352
+ results_dict = load_json(path_test / 'run_results.json')
353
+ if dataset in results_dict[list(results_dict.keys())[0]].keys():
354
+ if 'metrics' in results_dict[list(results_dict.keys())[0]][dataset].keys():
355
+ metric_values.append(results_dict[list(results_dict.keys())[0]][dataset]['metrics'][metric])
356
+ else:
357
+ continue
358
+ else:
359
+ continue
360
+
361
+ # Fill the dictionary
362
+ if metric_values:
363
+ dataset_dict[f'{metric}_mean'] = np.nanmean(metric_values)
364
+ dataset_dict[f'{metric}_std'] = np.nanstd(metric_values)
365
+ dataset_dict[f'{metric}_max'] = np.nanmax(metric_values)
366
+ dataset_dict[f'{metric}_min'] = np.nanmin(metric_values)
367
+ dataset_dict[f'{metric}_2.5%'] = np.nanpercentile(metric_values, 2.5)
368
+ dataset_dict[f'{metric}_97.5%'] = np.nanpercentile(metric_values, 97.5)
369
+
370
+ # Save the results
371
+ if save:
372
+ save_json(path_results / 'results_avg.json', results_avg, cls=NumpyEncoder)
373
+ return path_results / 'results_avg.json'
374
+
375
+ return results_avg
376
+
377
+ def get_model_performance(
378
+ self,
379
+ response: list,
380
+ outcome_table: pd.DataFrame
381
+ ) -> None:
382
+ """
383
+ Calculates the performance of the model
384
+ Args:
385
+ response (list): List of machine learning model predictions.
386
+ outcome_table (pd.DataFrame): Outcome table with binary labels.
387
+
388
+ Returns:
389
+ None: Updates the ``run_results`` attribute.
390
+ """
391
+ # Calculating performance metrics for the training set
392
+ try:
393
+ # Convert list of model response to a table to facilitate the process
394
+ results_dict = dict()
395
+ patient_ids = list(outcome_table.index)
396
+ response_table = pd.DataFrame(response)
397
+ response_table.index = patient_ids
398
+ response_table._metadata += ['Properties']
399
+ response_table.Properties = dict()
400
+ response_table.Properties['RowNames'] = patient_ids
401
+
402
+ # Make sure the outcome table and the response table have the same patients
403
+ outcome_binary = outcome_table.loc[patient_ids, :]
404
+ outcome_binary = outcome_binary.iloc[:, 0]
405
+ response = response_table.loc[patient_ids, :]
406
+ response = response.iloc[:, 0]
407
+
408
+ # Calculating performance
409
+ results_dict = self.__calculate_performance(response, outcome_binary.to_frame(), self.model_dict['threshold'])
410
+
411
+ return results_dict
412
+
413
+ except Exception as e:
414
+ print(f"Error: ", e, "filling metrics with nan...")
415
+ return self.__get_metrics_failure_dict()
416
+
417
+ def get_optimal_level(
418
+ self,
419
+ path_experiments: Path,
420
+ experiments_labels: List[str],
421
+ metric: str = 'AUC_mean',
422
+ p_value_test: str = 'wilcoxon',
423
+ aggregate: bool = False,
424
+ ) -> None:
425
+ """
426
+ This function plots a heatmap of the metrics values for the performance of the models in the given experiment.
427
+
428
+ Args:
429
+ path_experiments (Path): Path to the folder containing the experiments.
430
+ experiments_labels (List): List of experiments labels to use for the plot. including variants is possible. For
431
+ example: ['experiment1_morph_CT', ['experiment1_intensity5_CT', 'experiment1_intensity10_CT'], 'experiment1_texture_CT'].
432
+ metric (str, optional): Metric to plot. Defaults to 'AUC_mean'.
433
+ p_value_test (str, optional): Method to use to calculate the p-value. Defaults to 'wilcoxon'.
434
+ Available options:
435
+
436
+ - 'delong': Delong test.
437
+ - 'ttest': T-test.
438
+ - 'wilcoxon': Wilcoxon signed rank test.
439
+ - 'bengio': Bengio and Nadeau corrected t-test.
440
+ aggregate (bool, optional): If True, aggregates the results of all the splits and computes one final p-value.
441
+ Only valid for the Delong test when cross-validation is used. Defaults to False.
442
+
443
+ Returns:
444
+ None.
445
+ """
446
+ assert metric.split('_')[0] in list_metrics, f'Given metric {list_metrics} is not in the list of metrics. Please choose from {list_metrics}'
447
+
448
+ # Extract modalities and initialize the dictionary
449
+ if type(experiments_labels[0]) == str:
450
+ experiment = '_'.join(experiments_labels[0].split('_')[:-2])
451
+ elif type(experiments_labels[0]) == list:
452
+ experiment = '_'.join(experiments_labels[0][0].split('_')[:-2])
453
+
454
+ modalities = set()
455
+ for exp_label in experiments_labels:
456
+ if isinstance(exp_label, str):
457
+ modalities.add(exp_label.split("_")[-1])
458
+ elif isinstance(exp_label, list):
459
+ for sub_exp_label in exp_label:
460
+ modalities.add(sub_exp_label.split("_")[-1])
461
+ else:
462
+ raise ValueError(f'experiments_labels must be a list of strings or a list of list of strings, given: {type(exp_label)}')
463
+
464
+ levels_dict = {modality: [] for modality in modalities}
465
+ optimal_lvls = [""] * len(modalities)
466
+
467
+ # Populate the dictionary
468
+ variants = []
469
+ for label in experiments_labels:
470
+ if isinstance(label, str):
471
+ modality = label.split("_")[-1]
472
+ levels_dict[modality].append(label.split("_")[-2])
473
+ elif isinstance(label, list):
474
+ modality = label[0].split("_")[-1]
475
+ variants = []
476
+ for sub_label in label:
477
+ variants.append(sub_label.split("_")[-2])
478
+ levels_dict[modality] += [variants]
479
+
480
+ # Prepare the data for the heatmap
481
+ for idx_m, modality in enumerate(modalities):
482
+ best_levels = []
483
+ results_dict_best = dict()
484
+ results_dicts = []
485
+ best_exp = ""
486
+ levels = levels_dict[modality]
487
+
488
+ # Loop over the levels and find the best variant for each level
489
+ for level in levels:
490
+ metric_compare = -1.0
491
+ if type(level) != list:
492
+ level = [level]
493
+ for variant in level:
494
+ exp_full_name = 'learn__' + experiment + '_' + variant + '_' + modality
495
+ if 'results_avg.json' in os.listdir(path_experiments / exp_full_name):
496
+ results_dict = load_json(path_experiments / exp_full_name / 'results_avg.json')
497
+ else:
498
+ results_dict = self.average_results(path_experiments / exp_full_name)
499
+ if metric_compare < results_dict['test'][metric]:
500
+ metric_compare = results_dict['test'][metric]
501
+ results_dict_best = results_dict
502
+ best_exp = variant
503
+ best_levels.append(best_exp)
504
+ results_dicts.append(results_dict_best)
505
+
506
+ # Create the heatmap data using the metric of interest
507
+ heatmap_data = np.zeros((2, len(best_levels)))
508
+
509
+ # Fill the heatmap data
510
+ for j in range(len(best_levels)):
511
+ # Get metrics and p-values
512
+ results_dict = results_dicts[j]
513
+ if aggregate and 'delong' in p_value_test:
514
+ metric_stat = round(Stats.get_aggregated_metric(
515
+ path_experiments,
516
+ experiment,
517
+ best_levels[j],
518
+ modality,
519
+ metric.split('_')[0] if '_' in metric else metric
520
+ ), 2)
521
+ else:
522
+ metric_stat = round(results_dict['test'][metric], 2)
523
+ heatmap_data[0, j] = metric_stat
524
+
525
+ # Statistical analysis
526
+ # Initializations
527
+ optimal_lvls[idx_m] = experiment + "_" + best_levels[0] + "_" + modality
528
+ init_metric = heatmap_data[0][0]
529
+ idx_d = 0
530
+ start_level = 0
531
+
532
+ # Get p-values for all the levels
533
+ while idx_d < len(best_levels) - 1:
534
+ metric_val = heatmap_data[0][idx_d+1]
535
+ # Get p-value only if the metric is improving
536
+ if metric_val > init_metric:
537
+ # Instantiate the Stats class
538
+ stats = Stats(
539
+ path_experiments,
540
+ experiment,
541
+ [best_levels[start_level], best_levels[idx_d+1]],
542
+ [modality]
543
+ )
544
+
545
+ # Get p-value
546
+ p_value = stats.get_p_value(
547
+ p_value_test,
548
+ metric=metric if '_' not in metric else metric.split('_')[0],
549
+ aggregate=aggregate
550
+ )
551
+
552
+ # If p-value is less than 0.05, change starting level
553
+ if p_value <= 0.05:
554
+ optimal_lvls[idx_m] = experiment + "_" + best_levels[idx_d+1] + "_" + modality
555
+ init_metric = metric_val
556
+ start_level = idx_d + 1
557
+
558
+ # Go to next column
559
+ idx_d += 1
560
+
561
+ return optimal_lvls
562
+
563
+ def plot_features_importance_histogram(
564
+ self,
565
+ path_experiments: Path,
566
+ experiment: str,
567
+ level: str,
568
+ modalities: List,
569
+ sort_option: str = 'importance',
570
+ title: str = None,
571
+ save: bool = True,
572
+ figsize: tuple = (12, 12)
573
+ ) -> None:
574
+ """
575
+ Plots a histogram of the features importance for the given experiment.
576
+
577
+ Args:
578
+ path_experiments (Path): Path to the folder containing the experiments.
579
+ experiment (str): Name of the experiment to plot. Will be used to find the results.
580
+ level (str): Radiomics level to plot. For example: 'morph'.
581
+ modalities (List): List of imaging modalities to use for the plot. A plot for each modality.
582
+ sort_option (str, optional): Option used to sort the features. Available options:
583
+ - 'importance': Sorts the features by importance.
584
+ - 'times_selected': Sorts the features by the number of times they were selected across the different splits.
585
+ - 'both': Sorts the features by importance and then by the number of times they were selected.
586
+ title (str, optional): Title of the plot. Defaults to None.
587
+ save (bool, optional): Whether to save the plot. Defaults to True.
588
+ figsize (tuple, optional): Size of the figure. Defaults to (12, 12).
589
+
590
+ Returns:
591
+ None. Plots the figure or saves it.
592
+ """
593
+
594
+ # checks
595
+ assert sort_option in ['importance', 'times_selected', 'both'], \
596
+ f'sort_option must be either "importance", "times_selected" or "both". Given: {sort_option}'
597
+
598
+ # For each modality, load features importance dict
599
+ for modality in modalities:
600
+ exp_full_name = 'learn__' + experiment + '_' + level + '_' + modality
601
+
602
+ # Load features importance dict
603
+ if 'feature_importance_analysis.json' in os.listdir(path_experiments / exp_full_name):
604
+ feat_imp_dict = load_json(path_experiments / exp_full_name / 'feature_importance_analysis.json')
605
+ else:
606
+ raise FileNotFoundError(f'feature_importance_analysis.json not found in {path_experiments / exp_full_name}')
607
+
608
+ # Organize the data in a dataframe
609
+ keys = list(feat_imp_dict.keys())
610
+ mean_importances = []
611
+ times_selected = []
612
+ for key in keys:
613
+ times_selected
614
+ mean_importances.append(feat_imp_dict[key]['importance_mean'])
615
+ times_selected.append(feat_imp_dict[key]['times_selected'])
616
+ df = pd.DataFrame({'feature': keys, 'importance': mean_importances, 'times_selected': times_selected})
617
+ df = df.sort_values(by=[sort_option], ascending=True)
618
+
619
+ # Plot the histogram
620
+ plt.rcParams["font.weight"] = "bold"
621
+ plt.rcParams["axes.labelweight"] = "bold"
622
+ if sort_option == 'importance':
623
+ color = 'deepskyblue'
624
+ else:
625
+ color = 'darkorange'
626
+ plt.figure(figsize=figsize)
627
+ plt.xlabel(sort_option)
628
+ plt.ylabel('Features')
629
+ plt.barh(df['feature'], df[sort_option], color=color)
630
+
631
+ # Add title
632
+ if title:
633
+ plt.title(title, weight='bold')
634
+ else:
635
+ plt.title(f'Features importance histogram \n {experiment} - {level} - {modality}', weight='bold')
636
+ plt.tight_layout()
637
+
638
+ # Save the plot
639
+ if save:
640
+ plt.savefig(path_experiments / f'features_importance_histogram_{level}_{modality}_{sort_option}.png')
641
+ else:
642
+ plt.show()
643
+
644
+ def plot_heatmap(
645
+ self,
646
+ path_experiments: Path,
647
+ experiments_labels: List[str],
648
+ metric: str = 'AUC_mean',
649
+ stat_extra: list = [],
650
+ plot_p_values: bool = True,
651
+ p_value_test: str = 'wilcoxon',
652
+ aggregate: bool = False,
653
+ title: str = None,
654
+ save: bool = False,
655
+ figsize: tuple = (8, 8)
656
+ ) -> None:
657
+ """
658
+ This function plots a heatmap of the metrics values for the performance of the models in the given experiment.
659
+
660
+ Args:
661
+ path_experiments (Path): Path to the folder containing the experiments.
662
+ experiments_labels (List): List of experiments labels to use for the plot. including variants is possible. For
663
+ example: ['experiment1_morph_CT', ['experiment1_intensity5_CT', 'experiment1_intensity10_CT'], 'experiment1_texture_CT'].
664
+ metric (str, optional): Metric to plot. Defaults to 'AUC_mean'.
665
+ stat_extra (list, optional): List of extra statistics to include in the plot. Defaults to [].
666
+ plot_p_values (bool, optional): If True plots the p-value of the choosen test. Defaults to True.
667
+ p_value_test (str, optional): Method to use to calculate the p-value. Defaults to 'wilcoxon'. Available options:
668
+
669
+ - 'delong': Delong test.
670
+ - 'ttest': T-test.
671
+ - 'wilcoxon': Wilcoxon signed rank test.
672
+ - 'bengio': Bengio and Nadeau corrected t-test.
673
+ aggregate (bool, optional): If True, aggregates the results of all the splits and computes one final p-value.
674
+ Only valid for the Delong test when cross-validation is used. Defaults to False.
675
+ extra_xlabels (List, optional): List of extra x-axis labels. Defaults to [].
676
+ title (str, optional): Title of the plot. Defaults to None.
677
+ save (bool, optional): Whether to save the plot. Defaults to False.
678
+ figsize (tuple, optional): Size of the figure. Defaults to (8, 8).
679
+
680
+ Returns:
681
+ None.
682
+ """
683
+ assert metric.split('_')[0] in list_metrics, f'Given metric {list_metrics} is not in the list of metrics. Please choose from {list_metrics}'
684
+
685
+ # Extract modalities and initialize the dictionary
686
+ if type(experiments_labels[0]) == str:
687
+ experiment = '_'.join(experiments_labels[0].split('_')[:-2])
688
+ elif type(experiments_labels[0]) == list:
689
+ experiment = '_'.join(experiments_labels[0][0].split('_')[:-2])
690
+
691
+ modalities = set()
692
+ for exp_label in experiments_labels:
693
+ if isinstance(exp_label, str):
694
+ modalities.add(exp_label.split("_")[-1])
695
+ elif isinstance(exp_label, list):
696
+ for sub_exp_label in exp_label:
697
+ modalities.add(sub_exp_label.split("_")[-1])
698
+ else:
699
+ raise ValueError(f'experiments_labels must be a list of strings or a list of list of strings, given: {type(exp_label)}')
700
+
701
+ levels_dict = {modality: [] for modality in modalities}
702
+
703
+ # Populate the dictionary
704
+ variants = []
705
+ for label in experiments_labels:
706
+ if isinstance(label, str):
707
+ modality = label.split("_")[-1]
708
+ levels_dict[modality].append(label.split("_")[-2])
709
+ elif isinstance(label, list):
710
+ modality = label[0].split("_")[-1]
711
+ variants = []
712
+ for sub_label in label:
713
+ variants.append(sub_label.split("_")[-2])
714
+ levels_dict[modality] += [variants]
715
+
716
+ # Prepare the data for the heatmap
717
+ fig, axs = plt.subplots(len(modalities), figsize=figsize)
718
+
719
+ # Heatmap conception
720
+ for idx_m, modality in enumerate(modalities):
721
+ # Initializations
722
+ best_levels = []
723
+ results_dict_best = dict()
724
+ results_dicts = []
725
+ best_exp = ""
726
+ patients_count = dict.fromkeys([modality])
727
+ levels = levels_dict[modality]
728
+
729
+ # Loop over the levels and find the best variant for each level
730
+ for level in levels:
731
+ metric_compare = -1.0
732
+ if type(level) != list:
733
+ level = [level]
734
+ for idx, variant in enumerate(level):
735
+ exp_full_name = 'learn__' + experiment + '_' + variant + '_' + modality
736
+ if 'results_avg.json' in os.listdir(path_experiments / exp_full_name):
737
+ results_dict = load_json(path_experiments / exp_full_name / 'results_avg.json')
738
+ else:
739
+ results_dict = self.average_results(path_experiments / exp_full_name)
740
+ if metric_compare < results_dict['test'][metric]:
741
+ metric_compare = results_dict['test'][metric]
742
+ results_dict_best = results_dict
743
+ best_exp = variant
744
+ best_levels.append(best_exp)
745
+ results_dicts.append(results_dict_best)
746
+
747
+ # Patient count
748
+ patients_count[modality] = self.__count_patients(path_experiments / exp_full_name)
749
+
750
+ # Create the heatmap data using the metric of interest
751
+ if plot_p_values:
752
+ heatmap_data = np.zeros((2, len(best_levels)))
753
+ else:
754
+ heatmap_data = np.zeros((1, len(best_levels)))
755
+
756
+ # Fill the heatmap data
757
+ labels = heatmap_data.tolist()
758
+ labels_draw = heatmap_data.tolist()
759
+ heatmap_data_draw = heatmap_data.tolist()
760
+ for j in range(len(best_levels)):
761
+ # Get metrics and p-values
762
+ results_dict = results_dicts[j]
763
+ if aggregate and 'delong' in p_value_test:
764
+ metric_stat = round(Stats.get_aggregated_metric(
765
+ path_experiments,
766
+ experiment,
767
+ best_levels[j],
768
+ modality,
769
+ metric.split('_')[0] if '_' in metric else metric
770
+ ), 2)
771
+ else:
772
+ metric_stat = round(results_dict['test'][metric], 2)
773
+ if plot_p_values:
774
+ heatmap_data[0, j] = metric_stat
775
+ else:
776
+ heatmap_data[1, j] = metric_stat
777
+
778
+ # Extra statistics
779
+ if stat_extra:
780
+ if plot_p_values:
781
+ labels[0][j] = f'{metric_stat}'
782
+ if j < len(best_levels) - 1:
783
+ labels[1][j+1] = f'{round(heatmap_data[1, j+1], 5)}'
784
+ labels[1][0] = '-'
785
+ for extra_stat in stat_extra:
786
+ if aggregate and ('sensitivity' in extra_stat.lower() or 'specificity' in extra_stat.lower()):
787
+ extra_metric_stat = round(Stats.get_aggregated_metric(
788
+ path_experiments,
789
+ experiment,
790
+ best_levels[j],
791
+ modality,
792
+ extra_stat.split('_')[0]
793
+ ), 2)
794
+ extra_stat = extra_stat.split('_')[0] + '_agg' if '_' in extra_stat else extra_stat
795
+ labels[0][j] += f'\n{extra_stat}: {extra_metric_stat}'
796
+ else:
797
+ extra_metric_stat = round(results_dict['test'][extra_stat], 2)
798
+ labels[0][j] += f'\n{extra_stat}: {extra_metric_stat}'
799
+ else:
800
+ labels[0][j] = f'{metric_stat}'
801
+ for extra_stat in stat_extra:
802
+ extra_metric_stat = round(results_dict['test'][extra_stat], 2)
803
+ labels[0][j] += f'\n{extra_stat}: {extra_metric_stat}'
804
+ else:
805
+ labels = np.array(heatmap_data).round(4).tolist()
806
+
807
+ # Update modality name to include the number of patients for training and testing
808
+ modalities_label = [modality + f' ({patients_count[modality]["train"]} train, {patients_count[modality]["test"]} test)']
809
+
810
+ # Data to draw
811
+ heatmap_data_draw = heatmap_data.copy()
812
+ labels_draw = labels.copy()
813
+ labels_draw[1] = [''] * len(labels[1])
814
+ heatmap_data_draw[1] = np.array([-1] * heatmap_data[1].shape[0]) if 'MCC' in metric else np.array([0] * heatmap_data[1].shape[0])
815
+
816
+ # Set up the rows (modalities and p-values)
817
+ if plot_p_values:
818
+ modalities_temp = modalities_label.copy()
819
+ modalities_label = ['p-values'] * len(modalities_temp) * 2
820
+ for idx in range(len(modalities_label)):
821
+ if idx % 2 == 0:
822
+ modalities_label[idx] = modalities_temp[idx // 2]
823
+
824
+ # Convert the numpy array to a DataFrame for Seaborn
825
+ df = pd.DataFrame(heatmap_data_draw, columns=best_levels, index=modalities_label)
826
+
827
+ # To avoid bugs, convert axs to list if only one modality is used
828
+ if len(modalities) == 1:
829
+ axs = [axs]
830
+
831
+ # Create the heatmap using seaborn
832
+ sns.heatmap(
833
+ df,
834
+ annot=labels_draw,
835
+ ax=axs[idx_m],
836
+ fmt="",
837
+ cmap="Blues",
838
+ cbar=True,
839
+ linewidths=0.5,
840
+ vmin=-1 if 'MCC' in metric else 0,
841
+ vmax=1,
842
+ annot_kws={"weight": "bold", "fontsize": 8}
843
+ )
844
+
845
+ # Plot p-values
846
+ if plot_p_values:
847
+ # Initializations
848
+ extent_x = axs[idx_m].get_xlim()
849
+ step_x = 1
850
+ start_x = extent_x[0] + 0.5
851
+ end_x = start_x + step_x
852
+ step_y = 1 / extent_x[1]
853
+ start_y = 1
854
+ endpoints_x = []
855
+ endpoints_y = []
856
+ init_metric = heatmap_data[0][0]
857
+ idx_d = 0
858
+ start_level = 0
859
+
860
+ # p-values for all levels
861
+ while idx_d < len(best_levels) - 1:
862
+ # Retrieve the metric value
863
+ metric_val = heatmap_data[0][idx_d+1]
864
+
865
+ # Instantiate the Stats class
866
+ stats = Stats(
867
+ path_experiments,
868
+ experiment,
869
+ [best_levels[start_level], best_levels[idx_d+1]],
870
+ [modality]
871
+ )
872
+
873
+ # Get p-value only if the metric is improving
874
+ if metric_val > init_metric:
875
+ p_value = stats.get_p_value(
876
+ p_value_test,
877
+ metric=metric if '_' not in metric else metric.split('_')[0],
878
+ aggregate=aggregate
879
+ )
880
+
881
+ # round the pvalue
882
+ p_value = round(p_value, 3)
883
+
884
+ # Set color, red if p-value > 0.05, green otherwise
885
+ color = 'r' if p_value > 0.05 else 'g'
886
+
887
+ # Plot the p-value (line and value)
888
+ axs[idx_m].axhline(start_y + step_y, xmin=start_x/extent_x[1], xmax=end_x/extent_x[1], color=color)
889
+ axs[idx_m].text(start_x + step_x/2, start_y + step_y, p_value, va='center', color=color, ha='center', backgroundcolor='w')
890
+
891
+ # Plot endpoints
892
+ endpoints_x = [start_x, end_x]
893
+ endpoints_y = [start_y + step_y, start_y + step_y]
894
+ axs[idx_m].scatter(endpoints_x, endpoints_y, color=color)
895
+
896
+ # Move to next line
897
+ step_y += 1 / extent_x[1]
898
+
899
+ # If p-value is less than 0.05, change starting level
900
+ if p_value <= 0.05:
901
+ init_metric = metric_val
902
+ start_x = end_x
903
+ start_level = idx_d + 1
904
+
905
+ # Go to next column
906
+ end_x += step_x
907
+ idx_d += 1
908
+
909
+ # Rotate xticks
910
+ axs[idx_m].set_xticks(axs[idx_m].get_xticks(), best_levels, rotation=45)
911
+
912
+ # Set title
913
+ if title:
914
+ fig.suptitle(title)
915
+ else:
916
+ fig.suptitle(f'{metric} heatmap')
917
+
918
+ # Tight layout
919
+ fig.tight_layout()
920
+
921
+ # Save the heatmap
922
+ if save:
923
+ if title:
924
+ fig.savefig(path_experiments / f'{title}.png')
925
+ else:
926
+ fig.savefig(path_experiments / f'{metric}_heatmap.png')
927
+ else:
928
+ fig.show()
929
+
930
+ def plot_radiomics_starting_percentage(
931
+ self,
932
+ path_experiments: Path,
933
+ experiment: str,
934
+ levels: List,
935
+ modalities: List,
936
+ title: str = None,
937
+ figsize: tuple = (15, 10),
938
+ save: bool = False
939
+ ) -> None:
940
+ """
941
+ This function plots a pie chart of the percentage of features used in experiment per radiomics level.
942
+
943
+ Args:
944
+ path_experiments (Path): Path to the folder containing the experiments.
945
+ experiment (str): Name of the experiment to plot. Will be used to find the results.
946
+ levels (List): List of radiomics levels to include in the plot.
947
+ modalities (List): List of imaging modalities to include in the plot.
948
+ title(str, optional): Title and name used to save the plot. Defaults to None.
949
+ figsize(tuple, optional): Size of the figure. Defaults to (15, 10).
950
+ save (bool, optional): Whether to save the plot. Defaults to False.
951
+
952
+ Returns:
953
+ None.
954
+ """
955
+ # Levels names
956
+ levels_names = [
957
+ 'Morphology',
958
+ 'Intensity',
959
+ 'Texture',
960
+ 'Linear filters',
961
+ 'Textural filters'
962
+ ]
963
+
964
+ # Initialization
965
+ colors_sns = sns.color_palette("pastel", n_colors=5)
966
+
967
+ # Create mutliple plots for the pie charts
968
+ fig, axes = plt.subplots(len(modalities), len(levels), figsize=figsize)
969
+
970
+ # Load the models resutls
971
+ for i, modality in enumerate(modalities):
972
+ for j, level in enumerate(levels):
973
+ exp_full_name = 'learn__' + experiment + '_' + level + '_' + modality
974
+ # Use the first test folder to get the results dict
975
+ if 'test__001' in os.listdir(path_experiments / exp_full_name):
976
+ run_results_dict = load_json(path_experiments / exp_full_name / 'test__001' / 'run_results.json')
977
+ else:
978
+ raise FileNotFoundError(f'no test file named test__001 in {path_experiments / exp_full_name}')
979
+
980
+ # Extract percentage of features per level
981
+ perc_levels = np.round(self.__count_percentage_radiomics(run_results_dict), 2)
982
+
983
+ # Plot the pie chart of the percentages
984
+ if len(modalities) > 1:
985
+ axes[i, j].pie(
986
+ perc_levels,
987
+ autopct= lambda p: '{:.1f}%'.format(p) if p > 0 else '',
988
+ pctdistance=0.8,
989
+ startangle=120,
990
+ rotatelabels=True,
991
+ textprops={'fontsize': 14, 'weight': 'bold'},
992
+ colors=colors_sns)
993
+ axes[i, j].set_title(f'{level} - {modality}', fontsize=15)
994
+ else:
995
+ axes[j].pie(
996
+ perc_levels,
997
+ autopct= lambda p: '{:.1f}%'.format(p) if p > 0 else '',
998
+ pctdistance=0.8,
999
+ startangle=120,
1000
+ rotatelabels=True,
1001
+ textprops={'fontsize': 14, 'weight': 'bold'},
1002
+ colors=colors_sns)
1003
+ axes[j].set_title(f'{level} - {modality}', fontsize=15)
1004
+
1005
+ # Add legend
1006
+ plt.legend(levels_names, loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 15})
1007
+ fig.tight_layout()
1008
+
1009
+ if title:
1010
+ fig.suptitle(title, fontsize=20)
1011
+ else:
1012
+ fig.suptitle(f'{experiment}: % of starting features per level', fontsize=20)
1013
+
1014
+ # Save the heatmap
1015
+ if save:
1016
+ if title:
1017
+ plt.savefig(path_experiments / f'{title}.png')
1018
+ else:
1019
+ plt.savefig(path_experiments / f'{experiment}_percentage_starting_features.png')
1020
+ else:
1021
+ plt.show()
1022
+
1023
+ def plot_fda_analysis_heatmap(
1024
+ self,
1025
+ path_experiments: Path,
1026
+ experiment: str,
1027
+ levels: List,
1028
+ modalities: List,
1029
+ title: str = None,
1030
+ save: bool = False
1031
+ ) -> None:
1032
+ """
1033
+ This function plots a heatmap of the percentage of stable features and final features selected by FDA for a given experiment.
1034
+
1035
+ Args:
1036
+ path_experiments (Path): Path to the folder containing the experiments.
1037
+ experiment (str): Name of the experiment to plot. Will be used to find the results.
1038
+ levels (List): List of radiomics levels to include in plot. For example: ['morph', 'intensity'].
1039
+ modalities (List): List of imaging modalities to include in the plot.
1040
+ title(str, optional): Title and name used to save the plot. Defaults to None.
1041
+ save (bool, optional): Whether to save the plot. Defaults to False.
1042
+
1043
+ Returns:
1044
+ None.
1045
+ """
1046
+ # Initialization - Levels names
1047
+ levels_names = [
1048
+ 'Morphology',
1049
+ 'Intensity',
1050
+ 'Texture',
1051
+ 'LF - Intensity',
1052
+ 'LF - Texture',
1053
+ 'TF - Intensity',
1054
+ 'TF - Texture'
1055
+ ]
1056
+ level_names_stable = [
1057
+ 'Morphology',
1058
+ 'Intensity',
1059
+ 'Texture',
1060
+ 'LF',
1061
+ 'TF'
1062
+ ]
1063
+
1064
+ # Initialization - Colors
1065
+ colors_sns = sns.color_palette("pastel", n_colors=5)
1066
+ colors_sns_stable = sns.color_palette("pastel", n_colors=5)
1067
+ colors_sns.insert(3, colors_sns[3])
1068
+ colors_sns.insert(5, colors_sns[-1])
1069
+ hatch = ['', '', '', '..', '//', '..', '//']
1070
+
1071
+ # Set hatches color
1072
+ plt.rcParams['hatch.color'] = 'white'
1073
+
1074
+ # Create mutliple plots for the pie charts
1075
+ fig, axes = plt.subplots(len(modalities) * 2, len(levels), figsize=(18, 10))
1076
+
1077
+ # Load the models resutls
1078
+ for i, modality in enumerate(modalities):
1079
+ for j, level in enumerate(levels):
1080
+ perc_levels_stable = []
1081
+ perc_levels_final = []
1082
+ exp_full_name = 'learn__' + experiment + '_' + level + '_' + modality
1083
+ for folder in os.listdir(path_experiments / exp_full_name):
1084
+ if folder.lower().startswith('test__'):
1085
+ if 'fda_logging_dict.json' in os.listdir(path_experiments / exp_full_name / folder):
1086
+ fda_dict = load_json(path_experiments / exp_full_name / folder / 'fda_logging_dict.json')
1087
+ perc_levels_stable.append(self.__count_stable_fda(fda_dict))
1088
+ perc_levels_final.append(self.__count_percentage_levels(fda_dict, fda=True))
1089
+ else:
1090
+ raise FileNotFoundError(f'no fda_logging_dict.json file in {path_experiments / exp_full_name / folder}')
1091
+
1092
+ # Average the results
1093
+ perc_levels_stable = np.mean(perc_levels_stable, axis=0).astype(int)
1094
+ perc_levels_final = np.mean(perc_levels_final, axis=0).astype(int)
1095
+
1096
+ # Plot pie chart of stable features
1097
+ axes[i*2, j].pie(
1098
+ perc_levels_stable,
1099
+ pctdistance=0.6,
1100
+ startangle=120,
1101
+ radius=1.1,
1102
+ rotatelabels=True,
1103
+ textprops={'fontsize': 14, 'weight': 'bold'},
1104
+ colors=colors_sns_stable
1105
+ )
1106
+
1107
+ # Title
1108
+ axes[i*2, j].set_title(f'{level} - {modality} - Stable', fontsize=15)
1109
+
1110
+ # Legends
1111
+ legends = [f'{level} - {perc_levels_stable[idx]}' for idx, level in enumerate(level_names_stable)]
1112
+ axes[i*2, j].legend(legends, loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 13})
1113
+
1114
+ # Plot pie chart of the final features selected
1115
+ axes[i*2+1, j].pie(
1116
+ perc_levels_final,
1117
+ autopct= lambda p: '{:.1f}%'.format(p) if p > 0 else '',
1118
+ pctdistance=0.6,
1119
+ startangle=120,
1120
+ radius=1.1,
1121
+ rotatelabels=True,
1122
+ textprops={'fontsize': 14, 'weight': 'bold'},
1123
+ colors=colors_sns,
1124
+ hatch=hatch)
1125
+
1126
+ # Title
1127
+ axes[i*2+1, j].set_title(f'{level} - {modality} - Fianl 10', fontsize=15)
1128
+
1129
+ # Legend
1130
+ axes[i*2+1, j].legend(levels_names, loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 13})
1131
+
1132
+ # Add legend
1133
+ plt.tight_layout()
1134
+ plt.subplots_adjust(top=0.9)
1135
+
1136
+ if title:
1137
+ fig.suptitle(title, fontsize=20)
1138
+ else:
1139
+ fig.suptitle(f'{experiment}: FDA breakdown per level', fontsize=20)
1140
+
1141
+ # Save the heatmap
1142
+ if save:
1143
+ if title:
1144
+ plt.savefig(path_experiments / f'{title}.png')
1145
+ else:
1146
+ plt.savefig(path_experiments / f'{experiment}_fda_features.png')
1147
+ else:
1148
+ plt.show()
1149
+
1150
+ def plot_feature_analysis(
1151
+ self,
1152
+ path_experiments: Path,
1153
+ experiment: str,
1154
+ levels: List,
1155
+ modalities: List = [],
1156
+ title: str = None,
1157
+ save: bool = False
1158
+ ) -> None:
1159
+ """
1160
+ This function plots a pie chart of the percentage of the final features used to train the model per radiomics level.
1161
+
1162
+ Args:
1163
+ path_experiments (Path): Path to the folder containing the experiments.
1164
+ experiment (str): Name of the experiment to plot. Will be used to find the results.
1165
+ levels (List): List of radiomics levels to include in plot. For example: ['morph', 'intensity'].
1166
+ modalities (List, optional): List of imaging modalities to include in the plot. Defaults to [].
1167
+ title(str, optional): Title and name used to save the plot. Defaults to None.
1168
+ save (bool, optional): Whether to save the plot. Defaults to False.
1169
+
1170
+ Returns:
1171
+ None.
1172
+ """
1173
+ # Levels names
1174
+ levels_names = [
1175
+ 'Morphology',
1176
+ 'Intensity',
1177
+ 'Texture',
1178
+ 'Linear filters - Intensity',
1179
+ 'Linear filters - Texture',
1180
+ 'Textural filters - Intensity',
1181
+ 'Textural filters - Texture'
1182
+ ]
1183
+
1184
+ # Initialization
1185
+ colors_sns = sns.color_palette("pastel", n_colors=5)
1186
+ colors_sns.insert(3, colors_sns[3])
1187
+ colors_sns.insert(5, colors_sns[-1])
1188
+ hatch = ['', '', '', '..', '//', '..', '//']
1189
+
1190
+ # Set hatches color
1191
+ plt.rcParams['hatch.color'] = 'white'
1192
+
1193
+ # Create mutliple plots for the pie charts
1194
+ fig, axes = plt.subplots(len(modalities), len(levels), figsize=(15, 10))
1195
+
1196
+ # Load the models resutls
1197
+ for i, modality in enumerate(modalities):
1198
+ for j, level in enumerate(levels):
1199
+ exp_full_name = 'learn__' + experiment + '_' + level + '_' + modality
1200
+ if 'feature_importance_analysis.json' in os.listdir(path_experiments / exp_full_name):
1201
+ fa_dict = load_json(path_experiments / exp_full_name / 'feature_importance_analysis.json')
1202
+ else:
1203
+ fa_dict = feature_imporance_analysis(path_experiments / exp_full_name)
1204
+
1205
+ # Extract percentage of features per level
1206
+ perc_levels = np.round(self.__count_percentage_levels(fa_dict), 2)
1207
+
1208
+ # Plot the pie chart of percentages for the final features
1209
+ if len(modalities) > 1:
1210
+ axes[i, j].pie(
1211
+ perc_levels,
1212
+ autopct= lambda p: '{:.1f}%'.format(p) if p > 0 else '',
1213
+ pctdistance=0.8,
1214
+ startangle=120,
1215
+ radius=1.3,
1216
+ rotatelabels=True,
1217
+ textprops={'fontsize': 14, 'weight': 'bold'},
1218
+ colors=colors_sns,
1219
+ hatch=hatch)
1220
+ axes[i, j].set_title(f'{level} - {modality}', fontsize=15)
1221
+ else:
1222
+ axes[j].pie(
1223
+ perc_levels,
1224
+ autopct= lambda p: '{:.1f}%'.format(p) if p > 0 else '',
1225
+ pctdistance=0.8,
1226
+ startangle=120,
1227
+ radius=1.3,
1228
+ rotatelabels=True,
1229
+ textprops={'fontsize': 14, 'weight': 'bold'},
1230
+ colors=colors_sns,
1231
+ hatch=hatch)
1232
+ axes[j].set_title(f'{level} - {modality}', fontsize=15)
1233
+
1234
+ # Add legend
1235
+ plt.legend(levels_names, loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 15})
1236
+ plt.tight_layout()
1237
+
1238
+ # Add title
1239
+ if title:
1240
+ fig.suptitle(title, fontsize=20)
1241
+ else:
1242
+ fig.suptitle(f'{experiment}: % of selected features per level', fontsize=20)
1243
+
1244
+ # Save the heatmap
1245
+ if save:
1246
+ if title:
1247
+ plt.savefig(path_experiments / f'{title}.png')
1248
+ else:
1249
+ plt.savefig(path_experiments / f'{experiment}_percentage_features.png')
1250
+ else:
1251
+ plt.show()
1252
+
1253
+ def plot_original_level_tree(
1254
+ self,
1255
+ path_experiments: Path,
1256
+ experiment: str,
1257
+ level: str,
1258
+ modalities: list,
1259
+ initial_width: float = 4,
1260
+ lines_weight: float = 1,
1261
+ title: str = None,
1262
+ figsize: tuple = (12,10),
1263
+ ) -> None:
1264
+ """
1265
+ Plots a tree explaining the impact of features in the original radiomics complexity level.
1266
+
1267
+ Args:
1268
+ path_experiments (Path): Path to the folder containing the experiments.
1269
+ experiment (str): Name of the experiment to plot. Will be used to find the results.
1270
+ level (List): Radiomics complexity level to use for the plot.
1271
+ modalities (List, optional): List of imaging modalities to include in the plot. Defaults to [].
1272
+ initial_width (float, optional): Initial width of the lines. Defaults to 1. For aesthetic purposes.
1273
+ lines_weight (float, optional): Weight applied to the lines of the tree. Defaults to 2. For aesthetic purposes.
1274
+ title(str, optional): Title and name used to save the plot. Defaults to None.
1275
+ figsize(tuple, optional): Size of the figure. Defaults to (20, 10).
1276
+
1277
+ Returns:
1278
+ None.
1279
+ """
1280
+ # Fill tree data for each modality
1281
+ for modality in modalities:
1282
+ # Initialization
1283
+ selected_feat_color = 'limegreen'
1284
+ optimal_lvl_color = 'darkorange'
1285
+
1286
+ # Initialization - outcome - levels
1287
+ styles_outcome_levels = ["dashed"] * 3
1288
+ colors_outcome_levels = ["black"] * 3
1289
+ width_outcome_levels = [initial_width] * 3
1290
+
1291
+ # Initialization - original - sublevels
1292
+ styles_original_levels = ["dashed"] * 3
1293
+ colors_original_levels = ["black"] * 3
1294
+ width_original_levels = [initial_width] * 3
1295
+
1296
+ # Initialization - texture-families
1297
+ styles_texture_families = ["dashed"] * 6
1298
+ colors_texture_families = ["black"] * 6
1299
+ width_texture_families = [initial_width] * 6
1300
+ families_names = ["glcm", "ngtdm", "ngldm", "glrlm", "gldzm", "glszm"]
1301
+
1302
+ # Get feature importance dict
1303
+ exp_full_name = 'learn__' + experiment + '_' + level + '_' + modality
1304
+ if 'feature_importance_analysis.json' in os.listdir(path_experiments / exp_full_name):
1305
+ fa_dict = load_json(path_experiments / exp_full_name / 'feature_importance_analysis.json')
1306
+ else:
1307
+ fa_dict = feature_imporance_analysis(path_experiments / exp_full_name)
1308
+
1309
+ # Organize data
1310
+ feature_data = {
1311
+ 'features': list(fa_dict.keys()),
1312
+ 'mean_importance': [fa_dict[feature]['importance_mean'] for feature in fa_dict.keys()],
1313
+ }
1314
+
1315
+ # Convert sample to df
1316
+ df = pd.DataFrame(feature_data)
1317
+
1318
+ # Apply weight to the lines
1319
+ df['final_coefficient'] = df['mean_importance']
1320
+
1321
+ # Normalize the final coefficients between 0 and 1
1322
+ df['final_coefficient'] = (df['final_coefficient'] - df['final_coefficient'].min()) \
1323
+ / (df['final_coefficient'].max() - df['final_coefficient'].min())
1324
+
1325
+ # Applying the lines weight
1326
+ df['final_coefficient'] *= lines_weight
1327
+
1328
+ # Assign complexity level to each feature
1329
+ for i, row in df['features'].items():
1330
+ level_name = row.split('__')[1].lower()
1331
+ family_name = row.split('__')[2].lower()
1332
+
1333
+ # Morph
1334
+ if level_name.startswith('morph'):
1335
+ # Update outcome-original connection
1336
+ styles_outcome_levels[0] = "solid"
1337
+ colors_outcome_levels[0] = selected_feat_color
1338
+ width_outcome_levels[0] += df['final_coefficient'][i]
1339
+
1340
+ # Update original-morph connection
1341
+ styles_original_levels[0] = "solid"
1342
+ colors_original_levels[0] = selected_feat_color
1343
+ width_original_levels[0] += df['final_coefficient'][i]
1344
+
1345
+ # Intensity
1346
+ elif level_name.startswith('intensity'):
1347
+ # Update outcome-original connection
1348
+ styles_outcome_levels[0] = "solid"
1349
+ colors_outcome_levels[0] = selected_feat_color
1350
+ width_outcome_levels[0] += df['final_coefficient'][i]
1351
+
1352
+ # Update original-int connection
1353
+ styles_original_levels[1] = "solid"
1354
+ colors_original_levels[1] = selected_feat_color
1355
+ width_original_levels[1] += df['final_coefficient'][i]
1356
+
1357
+ # Texture
1358
+ elif level_name.startswith('texture'):
1359
+ # Update outcome-original connection
1360
+ styles_outcome_levels[0] = "solid"
1361
+ colors_outcome_levels[0] = selected_feat_color
1362
+ width_outcome_levels[0] += df['final_coefficient'][i]
1363
+
1364
+ # Update original-texture connection
1365
+ styles_original_levels[2] = "solid"
1366
+ colors_original_levels[2] = selected_feat_color
1367
+ width_original_levels[2] += df['final_coefficient'][i]
1368
+
1369
+ # Determine the most important level
1370
+ index_best_level = np.argmax(width_outcome_levels)
1371
+ colors_outcome_levels[index_best_level] = optimal_lvl_color
1372
+
1373
+ # Update color for the best sub-level
1374
+ colors_original_levels[np.argmax(width_original_levels)] = optimal_lvl_color
1375
+
1376
+ # If texture features are the optimal
1377
+ if np.argmax(width_original_levels) == 2:
1378
+ for i, row in df['features'].items():
1379
+ level_name = row.split('__')[1].lower()
1380
+ family_name = row.split('__')[2].lower()
1381
+
1382
+ # Update texture-families connection
1383
+ if level_name.startswith('texture'):
1384
+ if family_name.startswith('_glcm'):
1385
+ styles_texture_families[0] = "solid"
1386
+ colors_texture_families[0] = selected_feat_color
1387
+ width_texture_families[0] += df['final_coefficient'][i]
1388
+ elif family_name.startswith('_ngtdm'):
1389
+ styles_texture_families[1] = "solid"
1390
+ colors_texture_families[1] = selected_feat_color
1391
+ width_texture_families[1] += df['final_coefficient'][i]
1392
+ elif family_name.startswith('_ngldm'):
1393
+ styles_texture_families[2] = "solid"
1394
+ colors_texture_families[2] = selected_feat_color
1395
+ width_texture_families[2] += df['final_coefficient'][i]
1396
+ elif family_name.startswith('_glrlm'):
1397
+ styles_texture_families[3] = "solid"
1398
+ colors_texture_families[3] = selected_feat_color
1399
+ width_texture_families[3] += df['final_coefficient'][i]
1400
+ elif family_name.startswith('_gldzm'):
1401
+ styles_texture_families[4] = "solid"
1402
+ colors_texture_families[4] = selected_feat_color
1403
+ width_texture_families[4] += df['final_coefficient'][i]
1404
+ elif family_name.startswith('_glszm'):
1405
+ styles_texture_families[5] = "solid"
1406
+ colors_texture_families[5] = selected_feat_color
1407
+ width_texture_families[5] += df['final_coefficient'][i]
1408
+ else:
1409
+ raise ValueError(f'Family of the feature {family_name} not recognized')
1410
+
1411
+ # Update color
1412
+ colors_texture_families[np.argmax(width_texture_families)] = optimal_lvl_color
1413
+
1414
+ # Find best texture family to continue path
1415
+ best_family_name = ""
1416
+ index_best_family = np.argmax(width_texture_families)
1417
+ best_family_name = families_names[index_best_family]
1418
+ features_names = texture_features_all[index_best_family]
1419
+
1420
+ # Update texture-families-features connection
1421
+ width_texture_families_feature = [initial_width] * len(features_names)
1422
+ colors_texture_families_feature = ["black"] * len(features_names)
1423
+ styles_texture_families_feature = ["dashed"] * len(features_names)
1424
+ for i, row in df['features'].items():
1425
+ level_name = row.split('__')[1].lower()
1426
+ family_name = row.split('__')[2].lower()
1427
+ feature_name = row.split('__')
1428
+ if level_name.startswith('texture') and family_name.startswith('_' + best_family_name):
1429
+ for feature in features_names:
1430
+ if feature in feature_name:
1431
+ colors_texture_families_feature[features_names.index(feature)] = selected_feat_color
1432
+ styles_texture_families_feature[features_names.index(feature)] = "solid"
1433
+ width_texture_families_feature[features_names.index(feature)] += df['final_coefficient'][i]
1434
+ break
1435
+
1436
+ # Update color for the best texture family
1437
+ colors_texture_families_feature[np.argmax(width_texture_families_feature)] = optimal_lvl_color
1438
+
1439
+ # For esthetic purposes
1440
+ experiment_sep = experiment.replace('_', '\n')
1441
+
1442
+ # Design the graph
1443
+ G = nx.Graph()
1444
+
1445
+ # Original level
1446
+ G.add_edge(experiment_sep, 'Original', color=optimal_lvl_color, width=np.sum(width_original_levels), style="solid")
1447
+ if styles_original_levels[0] == "solid":
1448
+ G.add_edge('Original', 'Morph', color=colors_original_levels[0], width=width_original_levels[0], style=styles_original_levels[0])
1449
+ if styles_original_levels[1] == "solid":
1450
+ G.add_edge('Original', 'Int', color=colors_original_levels[1], width=width_original_levels[1], style=styles_original_levels[1])
1451
+ if styles_original_levels[2] == "solid":
1452
+ G.add_edge('Original', 'Text', color=colors_original_levels[2], width=width_original_levels[2], style=styles_original_levels[2])
1453
+
1454
+ # Continue path to the textural features if they are the optimal level
1455
+ if np.argmax(width_original_levels) == 2:
1456
+ # Put best level index in the middle
1457
+ nodes_order = [0, 1, 2, 3, 4, 5]
1458
+ nodes_order.insert(3, nodes_order.pop(nodes_order.index(np.argmax(width_texture_families))))
1459
+
1460
+ # Reorder nodes names
1461
+ nodes_names = ['GLCM', 'NGTDM', 'NGLDM', 'GLRLM', 'GLDZM', 'GLSZM']
1462
+ nodes_names = [nodes_names[i] for i in nodes_order]
1463
+ colors_texture_families = [colors_texture_families[i] for i in nodes_order]
1464
+ width_texture_families = [width_texture_families[i] for i in nodes_order]
1465
+ styles_texture_families = [styles_texture_families[i] for i in nodes_order]
1466
+
1467
+ # Add texture features families nodes
1468
+ for idx, node_name in enumerate(nodes_names):
1469
+ G.add_edge(
1470
+ 'Text',
1471
+ node_name,
1472
+ color=colors_texture_families[idx],
1473
+ width=width_texture_families[idx],
1474
+ style=styles_texture_families[idx]
1475
+ )
1476
+
1477
+ # Continue path to the textural features
1478
+ best_node_name = best_family_name.upper()
1479
+ for idx, feature in enumerate(features_names):
1480
+ G.add_edge(
1481
+ best_node_name,
1482
+ feature.replace('_', '\n'),
1483
+ color=colors_texture_families_feature[idx],
1484
+ width=width_texture_families_feature[idx],
1485
+ style=styles_texture_families_feature[idx]
1486
+ )
1487
+
1488
+ # Graph layout
1489
+ pos = graphviz_layout(G, root=experiment_sep, prog="dot")
1490
+
1491
+ # Create the plot: figure and axis
1492
+ fig = plt.figure(figsize=figsize, dpi=300)
1493
+ ax = fig.add_subplot(1, 1, 1)
1494
+
1495
+ # Get the attributes of the edges
1496
+ colors = nx.get_edge_attributes(G,'color').values()
1497
+ widths = nx.get_edge_attributes(G,'width').values()
1498
+ style = nx.get_edge_attributes(G,'style').values()
1499
+
1500
+ # Draw the graph
1501
+ cmap = [to_rgba('b')] * len(pos)
1502
+ nx.draw(
1503
+ G,
1504
+ pos=pos,
1505
+ ax=ax,
1506
+ edge_color=colors,
1507
+ width=list(widths),
1508
+ with_labels=True,
1509
+ node_color=cmap,
1510
+ node_size=1700,
1511
+ font_size=8,
1512
+ font_color='white',
1513
+ font_weight='bold',
1514
+ node_shape='o',
1515
+ style=style
1516
+ )
1517
+
1518
+ # Create custom legend
1519
+ custom_legends = [
1520
+ Line2D([0], [0], color=selected_feat_color, lw=4, linestyle='solid', label=f'Selected (thickness reflects impact)'),
1521
+ Line2D([0], [0], color='black', lw=4, linestyle='dashed', label='Not selected'),
1522
+ Line2D([0], [0], color=optimal_lvl_color, lw=4, linestyle='solid', label='Path with highest impact')
1523
+ ]
1524
+
1525
+ # Update keys according to the optimal level
1526
+ figure_keys = []
1527
+ if styles_original_levels[0] == "solid":
1528
+ figure_keys.append(mpatches.Patch(color='none', label='Morph: Morphological'))
1529
+ if styles_original_levels[1] == "solid":
1530
+ figure_keys.append(mpatches.Patch(color='none', label='Int: Intensity'))
1531
+ if styles_original_levels[2] == "solid":
1532
+ figure_keys.append(mpatches.Patch(color='none', label='Text: Textural'))
1533
+
1534
+ # Set title
1535
+ if title:
1536
+ ax.set_title(title, fontsize=20)
1537
+ else:
1538
+ ax.set_title(
1539
+ f'Radiomics explanation tree - Original level:'\
1540
+ + f'\nExperiment: {experiment}'\
1541
+ + f'\nLevel: {level}'\
1542
+ + f'\nModality: {modality}', fontsize=20
1543
+ )
1544
+
1545
+ # Apply the custom legend
1546
+ legend = plt.legend(handles=custom_legends, loc='upper right', fontsize=15, frameon=True, title = "Legend")
1547
+ legend.get_frame().set_edgecolor('black')
1548
+ legend.get_frame().set_linewidth(2.0)
1549
+
1550
+ # Abbrevations legend
1551
+ legend_keys = plt.legend(handles=figure_keys, loc='center right', fontsize=15, frameon=True, title = "Abbreviations", handlelength=0)
1552
+ legend_keys.get_frame().set_edgecolor('black')
1553
+ legend_keys.get_frame().set_linewidth(2.0)
1554
+
1555
+ # Options legend
1556
+ plt.gca().add_artist(legend_keys)
1557
+ plt.gca().add_artist(legend)
1558
+
1559
+ # Tight layout
1560
+ fig.tight_layout()
1561
+
1562
+ # Save the plot (Mandatory, since the plot is not well displayed on matplotlib)
1563
+ fig.savefig(path_experiments / f'Original_level_{experiment}_{level}_{modality}_explanation_tree.png', dpi=300)
1564
+
1565
+ def plot_lf_level_tree(
1566
+ self,
1567
+ path_experiments: Path,
1568
+ experiment: str,
1569
+ level: str,
1570
+ modalities: list,
1571
+ initial_width: float = 4,
1572
+ lines_weight: float = 1,
1573
+ title: str = None,
1574
+ figsize: tuple = (12,10),
1575
+ ) -> None:
1576
+ """
1577
+ Plots a tree explaining the impact of features in the linear filters radiomics complexity level.
1578
+
1579
+ Args:
1580
+ path_experiments (Path): Path to the folder containing the experiments.
1581
+ experiment (str): Name of the experiment to plot. Will be used to find the results.
1582
+ level (List): Radiomics complexity level to use for the plot.
1583
+ modalities (List, optional): List of imaging modalities to include in the plot. Defaults to [].
1584
+ initial_width (float, optional): Initial width of the lines. Defaults to 1. For aesthetic purposes.
1585
+ lines_weight (float, optional): Weight applied to the lines of the tree. Defaults to 2. For aesthetic purposes.
1586
+ title(str, optional): Title and name used to save the plot. Defaults to None.
1587
+ figsize(tuple, optional): Size of the figure. Defaults to (20, 10).
1588
+
1589
+ Returns:
1590
+ None.
1591
+ """
1592
+ # Fill tree data
1593
+ for modality in modalities:
1594
+ # Initialization
1595
+ selected_feat_color = 'limegreen'
1596
+ optimal_lvl_color = 'darkorange'
1597
+
1598
+ # Initialization - outcome - levels
1599
+ styles_outcome_levels = ["dashed"] * 3
1600
+ colors_outcome_levels = ["black"] * 3
1601
+ width_outcome_levels = [initial_width] * 3
1602
+
1603
+ # Initialization - lf - sublevels
1604
+ filters_names = ['mean', 'log', 'laws', 'gabor', 'coif']
1605
+ styles_lf_levels = ["dashed"] * 2
1606
+ colors_lf_levels = ["black"] * 2
1607
+ width_lf_levels = [initial_width] * 2
1608
+
1609
+ # Initialization - texture-families
1610
+ styles_texture_families = ["dashed"] * 6
1611
+ colors_texture_families = ["black"] * 6
1612
+ width_texture_families = [initial_width] * 6
1613
+ families_names = ["glcm", "ngtdm", "ngldm", "glrlm", "gldzm", "glszm"]
1614
+
1615
+ # Get feature importance dict
1616
+ exp_full_name = 'learn__' + experiment + '_' + level + '_' + modality
1617
+ if 'feature_importance_analysis.json' in os.listdir(path_experiments / exp_full_name):
1618
+ fa_dict = load_json(path_experiments / exp_full_name / 'feature_importance_analysis.json')
1619
+ else:
1620
+ fa_dict = feature_imporance_analysis(path_experiments / exp_full_name)
1621
+
1622
+ # Organize data
1623
+ feature_data = {
1624
+ 'features': list(fa_dict.keys()),
1625
+ 'mean_importance': [fa_dict[feature]['importance_mean'] for feature in fa_dict.keys()],
1626
+ }
1627
+
1628
+ # Convert sample to df
1629
+ df = pd.DataFrame(feature_data)
1630
+
1631
+ # Apply weight to the lines
1632
+ df['final_coefficient'] = df['mean_importance']
1633
+
1634
+ # Normalize the final coefficients between 0 and 1
1635
+ df['final_coefficient'] = (df['final_coefficient'] - df['final_coefficient'].min()) \
1636
+ / (df['final_coefficient'].max() - df['final_coefficient'].min())
1637
+
1638
+ # Applying the lines weight
1639
+ df['final_coefficient'] *= lines_weight
1640
+
1641
+ # Finding linear filters features and updating the connections
1642
+ for i, row in df['features'].items():
1643
+ level_name = row.split('__')[1].lower()
1644
+ family_name = row.split('__')[2].lower()
1645
+
1646
+ # Linear filters
1647
+ if level_name.startswith('mean') \
1648
+ or level_name.startswith('log') \
1649
+ or level_name.startswith('laws') \
1650
+ or level_name.startswith('gabor') \
1651
+ or level_name.startswith('wavelet') \
1652
+ or level_name.startswith('coif'):
1653
+
1654
+ # Update outcome-original connection
1655
+ styles_outcome_levels[1] = "solid"
1656
+ colors_outcome_levels[1] = selected_feat_color
1657
+ width_outcome_levels[1] += df['final_coefficient'][i]
1658
+
1659
+ # Find the best performing filter
1660
+ width_lf_filters = [initial_width] * 5
1661
+ for i, row in df['features'].items():
1662
+ level_name = row.split('__')[1].lower()
1663
+ family_name = row.split('__')[2].lower()
1664
+ if level_name.startswith('mean'):
1665
+ width_lf_filters[0] += df['final_coefficient'][i]
1666
+ elif level_name.startswith('log'):
1667
+ width_lf_filters[1] += df['final_coefficient'][i]
1668
+ elif level_name.startswith('laws'):
1669
+ width_lf_filters[2] += df['final_coefficient'][i]
1670
+ elif level_name.startswith('gabor'):
1671
+ width_lf_filters[3] += df['final_coefficient'][i]
1672
+ elif level_name.startswith('wavelet'):
1673
+ width_lf_filters[4] += df['final_coefficient'][i]
1674
+ elif level_name.startswith('coif'):
1675
+ width_lf_filters[4] += df['final_coefficient'][i]
1676
+
1677
+ # Get best filter
1678
+ index_best_filter = np.argmax(width_lf_filters)
1679
+ best_filter = filters_names[index_best_filter]
1680
+
1681
+ # Seperate intensity and texture then update the connections
1682
+ for i, row in df['features'].items():
1683
+ level_name = row.split('__')[1].lower()
1684
+ family_name = row.split('__')[2].lower()
1685
+ if level_name.startswith(best_filter):
1686
+ if family_name.startswith('_int'):
1687
+ width_lf_levels[0] += df['final_coefficient'][i]
1688
+ elif family_name.startswith(tuple(['_glcm', '_gldzm', '_glrlm', '_glszm', '_ngtdm', '_ngldm'])):
1689
+ width_lf_levels[1] += df['final_coefficient'][i]
1690
+
1691
+ # If Texture features are more impacful, update the connections
1692
+ if width_lf_levels[1] > width_lf_levels[0]:
1693
+ colors_lf_levels[1] = optimal_lvl_color
1694
+ styles_lf_levels[1] = "solid"
1695
+
1696
+ # Update lf-texture-families connection
1697
+ for i, row in df['features'].items():
1698
+ level_name = row.split('__')[1].lower()
1699
+ family_name = row.split('__')[2].lower()
1700
+ if not family_name.startswith('_int') and level_name.startswith(best_filter):
1701
+ if family_name.startswith('_glcm'):
1702
+ styles_texture_families[0] = "solid"
1703
+ colors_texture_families[0] = selected_feat_color
1704
+ width_texture_families[0] += df['final_coefficient'][i]
1705
+ elif family_name.startswith('_ngtdm'):
1706
+ styles_texture_families[1] = "solid"
1707
+ colors_texture_families[1] = selected_feat_color
1708
+ width_texture_families[1] += df['final_coefficient'][i]
1709
+ elif family_name.startswith('_ngldm'):
1710
+ styles_texture_families[2] = "solid"
1711
+ colors_texture_families[2] = selected_feat_color
1712
+ width_texture_families[2] += df['final_coefficient'][i]
1713
+ elif family_name.startswith('_glrlm'):
1714
+ styles_texture_families[3] = "solid"
1715
+ colors_texture_families[3] = selected_feat_color
1716
+ width_texture_families[3] += df['final_coefficient'][i]
1717
+ elif family_name.startswith('_gldzm'):
1718
+ styles_texture_families[4] = "solid"
1719
+ colors_texture_families[4] = selected_feat_color
1720
+ width_texture_families[4] += df['final_coefficient'][i]
1721
+ elif family_name.startswith('_glszm'):
1722
+ styles_texture_families[5] = "solid"
1723
+ colors_texture_families[5] = selected_feat_color
1724
+ width_texture_families[5] += df['final_coefficient'][i]
1725
+ else:
1726
+ raise ValueError(f'Family of the feature {family_name} not recognized')
1727
+
1728
+ # Update color
1729
+ colors_texture_families[np.argmax(width_texture_families)] = optimal_lvl_color
1730
+
1731
+ else:
1732
+ colors_lf_levels[0] = optimal_lvl_color
1733
+ styles_lf_levels[0] = "solid"
1734
+
1735
+ # If texture features are the optimal level, continue path
1736
+ if width_lf_levels[1] > width_lf_levels[0]:
1737
+
1738
+ # Get best texture family
1739
+ best_family_name = ""
1740
+ index_best_family = np.argmax(width_texture_families)
1741
+ best_family_name = families_names[index_best_family]
1742
+ features_names = texture_features_all[index_best_family]
1743
+
1744
+ # Update texture-families-features connection
1745
+ width_texture_families_feature = [initial_width] * len(features_names)
1746
+ colors_texture_families_feature = ["black"] * len(features_names)
1747
+ styles_texture_families_feature = ["dashed"] * len(features_names)
1748
+ for i, row in df['features'].items():
1749
+ level_name = row.split('__')[1].lower()
1750
+ family_name = row.split('__')[2].lower()
1751
+ feature_name = row.split('__')
1752
+ if family_name.startswith('_' + best_family_name) and level_name.startswith(best_filter):
1753
+ for feature in features_names:
1754
+ if feature in feature_name:
1755
+ colors_texture_families_feature[features_names.index(feature)] = selected_feat_color
1756
+ styles_texture_families_feature[features_names.index(feature)] = "solid"
1757
+ width_texture_families_feature[features_names.index(feature)] += df['final_coefficient'][i]
1758
+ break
1759
+
1760
+ # Update color for the best texture family
1761
+ colors_texture_families_feature[np.argmax(width_texture_families_feature)] = optimal_lvl_color
1762
+
1763
+ # For esthetic purposes
1764
+ experiment_sep = experiment.replace('_', '\n')
1765
+
1766
+ # Design the graph
1767
+ G = nx.Graph()
1768
+
1769
+ # Linear filters level
1770
+ G.add_edge(experiment_sep, 'LF', color=optimal_lvl_color, width=np.sum(width_lf_filters), style=styles_outcome_levels[1])
1771
+
1772
+ # Add best filter
1773
+ best_filter = best_filter.replace('_', '\n')
1774
+ G.add_edge('LF', best_filter.upper(), color=optimal_lvl_color, width=width_lf_filters[index_best_filter], style="solid")
1775
+
1776
+ # Int or Text
1777
+ if width_lf_levels[1] <= width_lf_levels[0]:
1778
+ G.add_edge(best_filter.upper(), 'LF\nInt', color=colors_lf_levels[0], width=width_lf_levels[0], style=styles_lf_levels[0])
1779
+ else:
1780
+ G.add_edge(best_filter.upper(), 'LF\nText', color=colors_lf_levels[1], width=width_lf_levels[1], style=styles_lf_levels[1])
1781
+
1782
+ # Put best level index in the middle
1783
+ nodes_order = [0, 1, 2, 3, 4, 5]
1784
+ nodes_order.insert(3, nodes_order.pop(nodes_order.index(np.argmax(width_texture_families))))
1785
+
1786
+ # Reorder nodes names
1787
+ nodes_names = ['LF\nGLCM', 'LF\nNGTDM', 'LF\nNGLDM', 'LF\nGLRLM', 'LF\nGLDZM', 'LF\nGLSZM']
1788
+ nodes_names = [nodes_names[i] for i in nodes_order]
1789
+ colors_texture_families = [colors_texture_families[i] for i in nodes_order]
1790
+ width_texture_families = [width_texture_families[i] for i in nodes_order]
1791
+ styles_texture_families = [styles_texture_families[i] for i in nodes_order]
1792
+
1793
+ # Add texture features families nodes
1794
+ for idx, node_name in enumerate(nodes_names):
1795
+ G.add_edge(
1796
+ 'LF\nText',
1797
+ node_name,
1798
+ color=colors_texture_families[idx],
1799
+ width=width_texture_families[idx],
1800
+ style=styles_texture_families[idx]
1801
+ )
1802
+
1803
+ # Continue path to the textural features
1804
+ best_node_name = f'LF\n{best_family_name.upper()}'
1805
+ for idx, feature in enumerate(features_names):
1806
+ G.add_edge(
1807
+ best_node_name,
1808
+ feature.replace('_', '\n'),
1809
+ color=colors_texture_families_feature[idx],
1810
+ width=width_texture_families_feature[idx],
1811
+ style=styles_texture_families_feature[idx]
1812
+ )
1813
+
1814
+ # Graph layout
1815
+ pos = graphviz_layout(G, root=experiment_sep, prog="dot")
1816
+
1817
+ # Create the plot: figure and axis
1818
+ fig = plt.figure(figsize=figsize, dpi=300)
1819
+ ax = fig.add_subplot(1, 1, 1)
1820
+
1821
+ # Get the attributes of the edges
1822
+ colors = nx.get_edge_attributes(G,'color').values()
1823
+ widths = nx.get_edge_attributes(G,'width').values()
1824
+ style = nx.get_edge_attributes(G,'style').values()
1825
+
1826
+ # Draw the graph
1827
+ cmap = [to_rgba('b')] * len(pos)
1828
+ nx.draw(
1829
+ G,
1830
+ pos=pos,
1831
+ ax=ax,
1832
+ edge_color=colors,
1833
+ width=list(widths),
1834
+ with_labels=True,
1835
+ node_color=cmap,
1836
+ node_size=1700,
1837
+ font_size=8,
1838
+ font_color='white',
1839
+ font_weight='bold',
1840
+ node_shape='o',
1841
+ style=style
1842
+ )
1843
+
1844
+ # Create custom legend
1845
+ custom_legends = [
1846
+ Line2D([0], [0], color=selected_feat_color, lw=4, linestyle='solid', label=f'Selected (thickness reflects impact)'),
1847
+ Line2D([0], [0], color='black', lw=4, linestyle='dashed', label='Not selected'),
1848
+ Line2D([0], [0], color=optimal_lvl_color, lw=4, linestyle='solid', label='Path with highest impact')
1849
+ ]
1850
+
1851
+ # Update keys according to the optimal level
1852
+ figure_keys = []
1853
+ figure_keys.append(mpatches.Patch(color='none', label='LF: Linear Filters'))
1854
+ if width_lf_levels[1] > width_lf_levels[0]:
1855
+ figure_keys.append(mpatches.Patch(color='none', label='Text: Textural'))
1856
+ else:
1857
+ figure_keys.append(mpatches.Patch(color='none', label='Int: Intensity'))
1858
+
1859
+ # Set title
1860
+ if title:
1861
+ ax.set_title(title, fontsize=20)
1862
+ else:
1863
+ ax.set_title(
1864
+ f'Radiomics explanation tree:'\
1865
+ + f'\nExperiment: {experiment}'\
1866
+ + f'\nLevel: {level}'\
1867
+ + f'\nModality: {modality}', fontsize=20
1868
+ )
1869
+
1870
+ # Apply the custom legend
1871
+ legend = plt.legend(handles=custom_legends, loc='upper right', fontsize=15, frameon=True, title = "Legend")
1872
+ legend.get_frame().set_edgecolor('black')
1873
+ legend.get_frame().set_linewidth(2.0)
1874
+
1875
+ # Abbrevations legend
1876
+ legend_keys = plt.legend(handles=figure_keys, loc='center right', fontsize=15, frameon=True, title = "Abbreviations", handlelength=0)
1877
+ legend_keys.get_frame().set_edgecolor('black')
1878
+ legend_keys.get_frame().set_linewidth(2.0)
1879
+
1880
+ # Options legend
1881
+ plt.gca().add_artist(legend_keys)
1882
+ plt.gca().add_artist(legend)
1883
+
1884
+ # Tight layout
1885
+ fig.tight_layout()
1886
+
1887
+ # Save the plot (Mandatory, since the plot is not well displayed on matplotlib)
1888
+ fig.savefig(path_experiments / f'LF_level_{experiment}_{level}_{modality}_explanation_tree.png', dpi=300)
1889
+
1890
+ def plot_tf_level_tree(
1891
+ self,
1892
+ path_experiments: Path,
1893
+ experiment: str,
1894
+ level: str,
1895
+ modalities: list,
1896
+ initial_width: float = 4,
1897
+ lines_weight: float = 1,
1898
+ title: str = None,
1899
+ figsize: tuple = (12,10),
1900
+ ) -> None:
1901
+ """
1902
+ Plots a tree explaining the impact of features in the textural filters radiomics complexity level.
1903
+
1904
+ Args:
1905
+ path_experiments (Path): Path to the folder containing the experiments.
1906
+ experiment (str): Name of the experiment to plot. Will be used to find the results.
1907
+ level (List): Radiomics complexity level to use for the plot.
1908
+ modalities (List, optional): List of imaging modalities to include in the plot. Defaults to [].
1909
+ initial_width (float, optional): Initial width of the lines. Defaults to 1. For aesthetic purposes.
1910
+ lines_weight (float, optional): Weight applied to the lines of the tree. Defaults to 2. For aesthetic purposes.
1911
+ title(str, optional): Title and name used to save the plot. Defaults to None.
1912
+ figsize(tuple, optional): Size of the figure. Defaults to (20, 10).
1913
+
1914
+ Returns:
1915
+ None.
1916
+ """
1917
+ # Fill tree data
1918
+ for modality in modalities:
1919
+ # Initialization
1920
+ selected_feat_color = 'limegreen'
1921
+ optimal_lvl_color = 'darkorange'
1922
+
1923
+ # Initialization - outcome - levels
1924
+ styles_outcome_levels = ["dashed"] * 3
1925
+ colors_outcome_levels = ["black"] * 3
1926
+ width_outcome_levels = [initial_width] * 3
1927
+
1928
+ # Initialization - tf - sublevels
1929
+ styles_tf_levels = ["dashed"] * 2
1930
+ colors_tf_levels = ["black"] * 2
1931
+ width_tf_levels = [initial_width] * 2
1932
+
1933
+ # Initialization - tf - best filter
1934
+ width_tf_filters = [initial_width] * len(glcm_features_names)
1935
+
1936
+ # Initialization - texture-families
1937
+ styles_texture_families = ["dashed"] * 6
1938
+ colors_texture_families = ["black"] * 6
1939
+ width_texture_families = [initial_width] * 6
1940
+ families_names = ["glcm", "ngtdm", "ngldm", "glrlm", "gldzm", "glszm"]
1941
+
1942
+ # Get feature importance dict
1943
+ exp_full_name = 'learn__' + experiment + '_' + level + '_' + modality
1944
+ if 'feature_importance_analysis.json' in os.listdir(path_experiments / exp_full_name):
1945
+ fa_dict = load_json(path_experiments / exp_full_name / 'feature_importance_analysis.json')
1946
+ else:
1947
+ fa_dict = feature_imporance_analysis(path_experiments / exp_full_name)
1948
+
1949
+ # Organize data
1950
+ feature_data = {
1951
+ 'features': list(fa_dict.keys()),
1952
+ 'mean_importance': [fa_dict[feature]['importance_mean'] for feature in fa_dict.keys()],
1953
+ }
1954
+
1955
+ # Convert sample to df
1956
+ df = pd.DataFrame(feature_data)
1957
+
1958
+ # Apply weight to the lines
1959
+ df['final_coefficient'] = df['mean_importance']
1960
+
1961
+ # Normalize the final coefficients between 0 and 1
1962
+ df['final_coefficient'] = (df['final_coefficient'] - df['final_coefficient'].min()) \
1963
+ / (df['final_coefficient'].max() - df['final_coefficient'].min())
1964
+
1965
+ # Applying the lines weight
1966
+ df['final_coefficient'] *= lines_weight
1967
+
1968
+ # Filling the lines data for textural filters features and updating the connections
1969
+ for i, row in df['features'].items():
1970
+ level_name = row.split('__')[1].lower()
1971
+ family_name = row.split('__')[2].lower()
1972
+
1973
+ # Textural filters
1974
+ if level_name.startswith('glcm'):
1975
+ # Update outcome-original connection
1976
+ styles_outcome_levels[2] = "solid"
1977
+ colors_outcome_levels[2] = optimal_lvl_color
1978
+ width_outcome_levels[2] += df['final_coefficient'][i]
1979
+
1980
+ # Update tf-best filter connection
1981
+ for feature in glcm_features_names:
1982
+ if feature + '__' in row:
1983
+ width_tf_filters[glcm_features_names.index(feature)] += df['final_coefficient'][i]
1984
+ break
1985
+
1986
+ # Get best filter
1987
+ index_best_filter = np.argmax(width_tf_filters)
1988
+ best_filter = glcm_features_names[index_best_filter]
1989
+
1990
+ # Seperate intensity and texture then update the connections
1991
+ for i, row in df['features'].items():
1992
+ level_name = row.split('__')[1].lower()
1993
+ family_name = row.split('__')[2].lower()
1994
+ if level_name.startswith('glcm') and best_filter + '__' in row:
1995
+ if family_name.startswith('_int'):
1996
+ width_tf_levels[0] += df['final_coefficient'][i]
1997
+ elif family_name.startswith(tuple(['_glcm', '_gldzm', '_glrlm', '_glszm', '_ngtdm', '_ngldm'])):
1998
+ width_tf_levels[1] += df['final_coefficient'][i]
1999
+
2000
+ # If Texture features are more impacful, update the connections
2001
+ if width_tf_levels[1] > width_tf_levels[0]:
2002
+ colors_tf_levels[1] = optimal_lvl_color
2003
+ styles_tf_levels[1] = "solid"
2004
+
2005
+ # Update tf-texture-families connection
2006
+ for i, row in df['features'].items():
2007
+ level_name = row.split('__')[1].lower()
2008
+ family_name = row.split('__')[2].lower()
2009
+ if level_name.startswith('glcm') and best_filter + '__' in row:
2010
+ if family_name.startswith('_glcm'):
2011
+ styles_texture_families[0] = "solid"
2012
+ colors_texture_families[0] = selected_feat_color
2013
+ width_texture_families[0] += df['final_coefficient'][i]
2014
+ elif family_name.startswith('_ngtdm'):
2015
+ styles_texture_families[1] = "solid"
2016
+ colors_texture_families[1] = selected_feat_color
2017
+ width_texture_families[1] += df['final_coefficient'][i]
2018
+ elif family_name.startswith('_ngldm'):
2019
+ styles_texture_families[2] = "solid"
2020
+ colors_texture_families[2] = selected_feat_color
2021
+ width_texture_families[2] += df['final_coefficient'][i]
2022
+ elif family_name.startswith('_glrlm'):
2023
+ styles_texture_families[3] = "solid"
2024
+ colors_texture_families[3] = selected_feat_color
2025
+ width_texture_families[3] += df['final_coefficient'][i]
2026
+ elif family_name.startswith('_gldzm'):
2027
+ styles_texture_families[4] = "solid"
2028
+ colors_texture_families[4] = selected_feat_color
2029
+ width_texture_families[4] += df['final_coefficient'][i]
2030
+ elif family_name.startswith('_glszm'):
2031
+ styles_texture_families[5] = "solid"
2032
+ colors_texture_families[5] = selected_feat_color
2033
+ width_texture_families[5] += df['final_coefficient'][i]
2034
+
2035
+ # Get best texture family
2036
+ best_family_name = ""
2037
+ index_best_family = np.argmax(width_texture_families)
2038
+ best_family_name = families_names[index_best_family]
2039
+ features_names = texture_features_all[index_best_family]
2040
+
2041
+ # Update texture-families-features connection
2042
+ width_texture_families_feature = [initial_width] * len(features_names)
2043
+ colors_texture_families_feature = ["black"] * len(features_names)
2044
+ styles_texture_families_feature = ["dashed"] * len(features_names)
2045
+ for i, row in df['features'].items():
2046
+ level_name = row.split('__')[1].lower()
2047
+ family_name = row.split('__')[2].lower()
2048
+ feature_name = row.split('__')
2049
+ if level_name.startswith('glcm') and family_name.startswith('_' + best_family_name) and best_filter + '__' in row:
2050
+ for feature in features_names:
2051
+ if feature in feature_name:
2052
+ colors_texture_families_feature[features_names.index(feature)] = selected_feat_color
2053
+ styles_texture_families_feature[features_names.index(feature)] = "solid"
2054
+ width_texture_families_feature[features_names.index(feature)] += df['final_coefficient'][i]
2055
+ break
2056
+
2057
+ # Update color for the best texture family
2058
+ colors_texture_families_feature[np.argmax(width_texture_families_feature)] = optimal_lvl_color
2059
+
2060
+ # Update color
2061
+ colors_texture_families[np.argmax(width_texture_families)] = optimal_lvl_color
2062
+ else:
2063
+ colors_tf_levels[0] = optimal_lvl_color
2064
+ styles_tf_levels[0] = "solid"
2065
+
2066
+ # For esthetic purposes
2067
+ experiment_sep = experiment.replace('_', '\n')
2068
+
2069
+ # Design the graph
2070
+ G = nx.Graph()
2071
+ G.add_edge(experiment_sep, 'TF', color=colors_outcome_levels[2], width=width_outcome_levels[2], style=styles_outcome_levels[2])
2072
+
2073
+ # Add best filter
2074
+ best_filter = best_filter.replace('_', '\n')
2075
+ G.add_edge('TF', best_filter.upper(), color=optimal_lvl_color, width=width_tf_filters[index_best_filter], style="solid")
2076
+
2077
+ # Check which level is the best (intensity or texture)
2078
+ if width_tf_levels[1] <= width_tf_levels[0]:
2079
+ G.add_edge(best_filter.upper(), 'TF\nInt', color=colors_tf_levels[0], width=width_tf_levels[0], style=styles_tf_levels[0])
2080
+ else:
2081
+ G.add_edge(best_filter.upper(), 'TF\nText', color=colors_tf_levels[1], width=width_tf_levels[1], style=styles_tf_levels[1])
2082
+
2083
+ # Put best level index in the middle
2084
+ nodes_order = [0, 1, 2, 3, 4, 5]
2085
+ nodes_order.insert(3, nodes_order.pop(nodes_order.index(np.argmax(width_texture_families))))
2086
+
2087
+ # Reorder nodes names
2088
+ nodes_names = ['TF\nGLCM', 'TF\nNGTDM', 'TF\nNGLDM', 'TF\nGLRLM', 'TF\nGLDZM', 'TF\nGLSZM']
2089
+ nodes_names = [nodes_names[i] for i in nodes_order]
2090
+ colors_texture_families = [colors_texture_families[i] for i in nodes_order]
2091
+ width_texture_families = [width_texture_families[i] for i in nodes_order]
2092
+ styles_texture_families = [styles_texture_families[i] for i in nodes_order]
2093
+
2094
+ # Add texture features families nodes
2095
+ for idx, node_names in enumerate(nodes_names):
2096
+ G.add_edge(
2097
+ 'TF\nText',
2098
+ node_names,
2099
+ color=colors_texture_families[idx],
2100
+ width=width_texture_families[idx],
2101
+ style=styles_texture_families[idx]
2102
+ )
2103
+
2104
+ # Continue path to the textural features
2105
+ best_node_name = f'TF\n{best_family_name.upper()}'
2106
+ for idx, feature in enumerate(features_names):
2107
+ G.add_edge(
2108
+ best_node_name,
2109
+ feature.replace('_', '\n'),
2110
+ color=colors_texture_families_feature[idx],
2111
+ width=width_texture_families_feature[idx],
2112
+ style=styles_texture_families_feature[idx]
2113
+ )
2114
+
2115
+ # Graph layout
2116
+ pos = graphviz_layout(G, root=experiment_sep, prog="dot")
2117
+
2118
+ # Create the plot: figure and axis
2119
+ fig = plt.figure(figsize=figsize, dpi=300)
2120
+ ax = fig.add_subplot(1, 1, 1)
2121
+
2122
+ # Get the attributes of the edges
2123
+ colors = nx.get_edge_attributes(G,'color').values()
2124
+ widths = nx.get_edge_attributes(G,'width').values()
2125
+ style = nx.get_edge_attributes(G,'style').values()
2126
+
2127
+ # Draw the graph
2128
+ cmap = [to_rgba('b')] * len(pos)
2129
+ nx.draw(
2130
+ G,
2131
+ pos=pos,
2132
+ ax=ax,
2133
+ edge_color=colors,
2134
+ width=list(widths),
2135
+ with_labels=True,
2136
+ node_color=cmap,
2137
+ node_size=1700,
2138
+ font_size=8,
2139
+ font_color='white',
2140
+ font_weight='bold',
2141
+ node_shape='o',
2142
+ style=style
2143
+ )
2144
+
2145
+ # Create custom legend
2146
+ custom_legends = [
2147
+ Line2D([0], [0], color=selected_feat_color, lw=4, linestyle='solid', label=f'Selected (thickness reflects impact)'),
2148
+ Line2D([0], [0], color='black', lw=4, linestyle='dashed', label='Not selected')
2149
+ ]
2150
+ figure_keys = []
2151
+
2152
+ # Update keys according to the optimal level
2153
+ figure_keys = [mpatches.Patch(color='none', label='TF: Linear Filters')]
2154
+ if width_tf_levels[1] > width_tf_levels[0]:
2155
+ figure_keys.append(mpatches.Patch(color='none', label='Text: Textural'))
2156
+ else:
2157
+ figure_keys.append(mpatches.Patch(color='none', label='Int: Intensity'))
2158
+
2159
+ custom_legends.append(
2160
+ Line2D([0], [0], color=optimal_lvl_color, lw=4, linestyle='solid', label='Path with highest impact')
2161
+ )
2162
+
2163
+ # Set title
2164
+ if title:
2165
+ ax.set_title(title, fontsize=20)
2166
+ else:
2167
+ ax.set_title(
2168
+ f'Radiomics explanation tree:'\
2169
+ + f'\nExperiment: {experiment}'\
2170
+ + f'\nLevel: {level}'\
2171
+ + f'\nModality: {modality}', fontsize=20
2172
+ )
2173
+
2174
+ # Apply the custom legend
2175
+ legend = plt.legend(handles=custom_legends, loc='upper right', fontsize=15, frameon=True, title = "Legend")
2176
+ legend.get_frame().set_edgecolor('black')
2177
+ legend.get_frame().set_linewidth(2.0)
2178
+
2179
+ # Abbrevations legend
2180
+ legend_keys = plt.legend(handles=figure_keys, loc='center right', fontsize=15, frameon=True, title = "Abbreviations", handlelength=0)
2181
+ legend_keys.get_frame().set_edgecolor('black')
2182
+ legend_keys.get_frame().set_linewidth(2.0)
2183
+
2184
+ # Options legend
2185
+ plt.gca().add_artist(legend_keys)
2186
+ plt.gca().add_artist(legend)
2187
+
2188
+ # Tight layout
2189
+ fig.tight_layout()
2190
+
2191
+ # Save the plot (Mandatory, since the plot is not well displayed on matplotlib)
2192
+ fig.savefig(path_experiments / f'TF_{experiment}_{level}_{modality}_explanation_tree.png', dpi=300)
2193
+
2194
+ def to_json(
2195
+ self,
2196
+ response_train: list = None,
2197
+ response_test: list = None,
2198
+ response_holdout: list = None,
2199
+ patients_train: list = None,
2200
+ patients_test: list = None,
2201
+ patients_holdout: list = None
2202
+ ) -> dict:
2203
+ """
2204
+ Creates a dictionary with the results of the model using the class attributes.
2205
+
2206
+ Args:
2207
+ response_train (list): List of machine learning model predictions for the training set.
2208
+ response_test (list): List of machine learning model predictions for the test set.
2209
+ patients_train (list): List of patients in the training set.
2210
+ patients_test (list): List of patients in the test set.
2211
+ patients_holdout (list): List of patients in the holdout set.
2212
+
2213
+ Returns:
2214
+ Dict: Dictionary with the the responses of the model and the patients used for training, testing and holdout.
2215
+ """
2216
+ run_results = dict()
2217
+ run_results[self.model_id] = self.model_dict
2218
+
2219
+ # Training results info
2220
+ run_results[self.model_id]['train'] = dict()
2221
+ run_results[self.model_id]['train']['patients'] = patients_train
2222
+ run_results[self.model_id]['train']['response'] = response_train.tolist() if response_train is not None else []
2223
+
2224
+ # Testing results info
2225
+ run_results[self.model_id]['test'] = dict()
2226
+ run_results[self.model_id]['test']['patients'] = patients_test
2227
+ run_results[self.model_id]['test']['response'] = response_test.tolist() if response_test is not None else []
2228
+
2229
+ # Holdout results info
2230
+ run_results[self.model_id]['holdout'] = dict()
2231
+ run_results[self.model_id]['holdout']['patients'] = patients_holdout
2232
+ run_results[self.model_id]['holdout']['response'] = response_holdout.tolist() if response_holdout is not None else []
2233
+
2234
+ # keep a copy of the results
2235
+ self.results_dict = run_results
2236
+
2237
+ return run_results