mediml 0.9.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. MEDiml/MEDscan.py +1696 -0
  2. MEDiml/__init__.py +21 -0
  3. MEDiml/biomarkers/BatchExtractor.py +806 -0
  4. MEDiml/biomarkers/BatchExtractorTexturalFilters.py +840 -0
  5. MEDiml/biomarkers/__init__.py +16 -0
  6. MEDiml/biomarkers/diagnostics.py +125 -0
  7. MEDiml/biomarkers/get_oriented_bound_box.py +158 -0
  8. MEDiml/biomarkers/glcm.py +1602 -0
  9. MEDiml/biomarkers/gldzm.py +523 -0
  10. MEDiml/biomarkers/glrlm.py +1315 -0
  11. MEDiml/biomarkers/glszm.py +555 -0
  12. MEDiml/biomarkers/int_vol_hist.py +527 -0
  13. MEDiml/biomarkers/intensity_histogram.py +615 -0
  14. MEDiml/biomarkers/local_intensity.py +89 -0
  15. MEDiml/biomarkers/morph.py +1756 -0
  16. MEDiml/biomarkers/ngldm.py +780 -0
  17. MEDiml/biomarkers/ngtdm.py +414 -0
  18. MEDiml/biomarkers/stats.py +373 -0
  19. MEDiml/biomarkers/utils.py +389 -0
  20. MEDiml/filters/TexturalFilter.py +299 -0
  21. MEDiml/filters/__init__.py +9 -0
  22. MEDiml/filters/apply_filter.py +134 -0
  23. MEDiml/filters/gabor.py +215 -0
  24. MEDiml/filters/laws.py +283 -0
  25. MEDiml/filters/log.py +147 -0
  26. MEDiml/filters/mean.py +121 -0
  27. MEDiml/filters/textural_filters_kernels.py +1738 -0
  28. MEDiml/filters/utils.py +107 -0
  29. MEDiml/filters/wavelet.py +237 -0
  30. MEDiml/learning/DataCleaner.py +198 -0
  31. MEDiml/learning/DesignExperiment.py +480 -0
  32. MEDiml/learning/FSR.py +667 -0
  33. MEDiml/learning/Normalization.py +112 -0
  34. MEDiml/learning/RadiomicsLearner.py +714 -0
  35. MEDiml/learning/Results.py +2237 -0
  36. MEDiml/learning/Stats.py +694 -0
  37. MEDiml/learning/__init__.py +10 -0
  38. MEDiml/learning/cleaning_utils.py +107 -0
  39. MEDiml/learning/ml_utils.py +1015 -0
  40. MEDiml/processing/__init__.py +6 -0
  41. MEDiml/processing/compute_suv_map.py +121 -0
  42. MEDiml/processing/discretisation.py +149 -0
  43. MEDiml/processing/interpolation.py +275 -0
  44. MEDiml/processing/resegmentation.py +66 -0
  45. MEDiml/processing/segmentation.py +912 -0
  46. MEDiml/utils/__init__.py +25 -0
  47. MEDiml/utils/batch_patients.py +45 -0
  48. MEDiml/utils/create_radiomics_table.py +131 -0
  49. MEDiml/utils/data_frame_export.py +42 -0
  50. MEDiml/utils/find_process_names.py +16 -0
  51. MEDiml/utils/get_file_paths.py +34 -0
  52. MEDiml/utils/get_full_rad_names.py +21 -0
  53. MEDiml/utils/get_institutions_from_ids.py +16 -0
  54. MEDiml/utils/get_patient_id_from_scan_name.py +22 -0
  55. MEDiml/utils/get_patient_names.py +26 -0
  56. MEDiml/utils/get_radiomic_names.py +27 -0
  57. MEDiml/utils/get_scan_name_from_rad_name.py +22 -0
  58. MEDiml/utils/image_reader_SITK.py +37 -0
  59. MEDiml/utils/image_volume_obj.py +22 -0
  60. MEDiml/utils/imref.py +340 -0
  61. MEDiml/utils/initialize_features_names.py +62 -0
  62. MEDiml/utils/inpolygon.py +159 -0
  63. MEDiml/utils/interp3.py +43 -0
  64. MEDiml/utils/json_utils.py +78 -0
  65. MEDiml/utils/mode.py +31 -0
  66. MEDiml/utils/parse_contour_string.py +58 -0
  67. MEDiml/utils/save_MEDscan.py +30 -0
  68. MEDiml/utils/strfind.py +32 -0
  69. MEDiml/utils/textureTools.py +188 -0
  70. MEDiml/utils/texture_features_names.py +115 -0
  71. MEDiml/utils/write_radiomics_csv.py +47 -0
  72. MEDiml/wrangling/DataManager.py +1724 -0
  73. MEDiml/wrangling/ProcessDICOM.py +512 -0
  74. MEDiml/wrangling/__init__.py +3 -0
  75. mediml-0.9.9.dist-info/LICENSE.md +674 -0
  76. mediml-0.9.9.dist-info/METADATA +232 -0
  77. mediml-0.9.9.dist-info/RECORD +78 -0
  78. mediml-0.9.9.dist-info/WHEEL +4 -0
@@ -0,0 +1,1724 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ import pickle
5
+ import re
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from time import time
9
+ from typing import List, Union
10
+
11
+ import matplotlib.pyplot as plt
12
+ import nibabel as nib
13
+ import numpy as np
14
+ import pandas as pd
15
+ import pydicom
16
+ import pydicom.errors
17
+ import pydicom.misc
18
+ import ray
19
+ from nilearn import image
20
+ from numpyencoder import NumpyEncoder
21
+ from tqdm import tqdm, trange
22
+
23
+ from ..MEDscan import MEDscan
24
+ from ..processing.compute_suv_map import compute_suv_map
25
+ from ..processing.segmentation import get_roi_from_indexes
26
+ from ..utils.get_file_paths import get_file_paths
27
+ from ..utils.get_patient_names import get_patient_names
28
+ from ..utils.imref import imref3d
29
+ from ..utils.json_utils import load_json, save_json
30
+ from ..utils.save_MEDscan import save_MEDscan
31
+ from .ProcessDICOM import ProcessDICOM
32
+
33
+
34
+ class DataManager(object):
35
+ """Reads all the raw data (DICOM, NIfTI) content and organizes it in instances of the MEDscan class."""
36
+
37
+
38
+ @dataclass
39
+ class DICOM(object):
40
+ """DICOM data management class that will organize data during the conversion to MEDscan class process"""
41
+ stack_series_rs: List
42
+ stack_path_rs: List
43
+ stack_frame_rs: List
44
+ cell_series_id: List
45
+ cell_path_rs: List
46
+ cell_path_images: List
47
+ cell_frame_rs: List
48
+ cell_frame_id: List
49
+
50
+
51
+ @dataclass
52
+ class NIfTI(object):
53
+ """NIfTI data management class that will organize data during the conversion to MEDscan class process"""
54
+ stack_path_images: List
55
+ stack_path_roi: List
56
+ stack_path_all: List
57
+
58
+
59
+ @dataclass
60
+ class Paths(object):
61
+ """Paths management class that will organize the paths used in the processing"""
62
+ _path_to_dicoms: List
63
+ _path_to_niftis: List
64
+ _path_csv: Union[Path, str]
65
+ _path_save: Union[Path, str]
66
+ _path_save_checks: Union[Path, str]
67
+ _path_pre_checks_settings: Union[Path, str]
68
+
69
+ def __init__(
70
+ self,
71
+ path_to_dicoms: List = [],
72
+ path_to_niftis: List = [],
73
+ path_csv: Union[Path, str] = None,
74
+ path_save: Union[Path, str] = None,
75
+ path_save_checks: Union[Path, str] = None,
76
+ path_pre_checks_settings: Union[Path, str] = None,
77
+ save: bool = True,
78
+ n_batch: int = 2
79
+ ) -> None:
80
+ """Constructor of the class DataManager.
81
+
82
+ Args:
83
+ path_to_dicoms (Union[Path, str], optional): Full path to the starting directory
84
+ where the DICOM data is located.
85
+ path_to_niftis (Union[Path, str], optional): Full path to the starting directory
86
+ where the NIfTI is located.
87
+ path_csv (Union[Path, str], optional): Full path to the CSV file containing the scans info list.
88
+ path_save (Union[Path, str], optional): Full path to the directory where to save all the MEDscan classes.
89
+ path_save_checks(Union[Path, str], optional): Full path to the directory where to save all
90
+ the pre-radiomics checks analysis results.
91
+ path_pre_checks_settings(Union[Path, str], optional): Full path to the JSON file of the pre-checks analysis
92
+ parameters.
93
+ save (bool, optional): True to save the MEDscan classes in `path_save`.
94
+ n_batch (int, optional): Numerical value specifying the number of batch to use in the
95
+ parallel computations (use 0 for serial computation).
96
+
97
+ Returns:
98
+ None
99
+ """
100
+ # Convert all paths to Pathlib.Path
101
+ if path_to_dicoms:
102
+ path_to_dicoms = Path(path_to_dicoms)
103
+ if path_to_niftis:
104
+ path_to_niftis = Path(path_to_niftis)
105
+ if path_csv:
106
+ path_csv = Path(path_csv)
107
+ if path_save:
108
+ path_save = Path(path_save)
109
+ if path_save_checks:
110
+ path_save_checks = Path(path_save_checks)
111
+ if path_pre_checks_settings:
112
+ path_pre_checks_settings = Path(path_pre_checks_settings)
113
+
114
+ self.paths = self.Paths(
115
+ path_to_dicoms,
116
+ path_to_niftis,
117
+ path_csv,
118
+ path_save,
119
+ path_save_checks,
120
+ path_pre_checks_settings,
121
+ )
122
+ self.save = save
123
+ self.n_batch = n_batch
124
+ self.__dicom = self.DICOM(
125
+ stack_series_rs=[],
126
+ stack_path_rs=[],
127
+ stack_frame_rs=[],
128
+ cell_series_id=[],
129
+ cell_path_rs=[],
130
+ cell_path_images=[],
131
+ cell_frame_rs=[],
132
+ cell_frame_id=[]
133
+ )
134
+ self.__nifti = self.NIfTI(
135
+ stack_path_images=[],
136
+ stack_path_roi=[],
137
+ stack_path_all=[]
138
+ )
139
+ self.path_to_objects = []
140
+ self.summary = {}
141
+ self.csv_data = None
142
+ self.__studies = []
143
+ self.__institutions = []
144
+ self.__scans = []
145
+
146
+ def __find_uid_cell_index(self, uid: Union[str, List[str]], cell: List[str]) -> List:
147
+ """Finds the cell with the same `uid`. If not is present in `cell`, creates a new position
148
+ in the `cell` for the new `uid`.
149
+
150
+ Args:
151
+ uid (Union[str, List[str]]): Unique identifier of the Series to find.
152
+ cell (List[str]): List of Unique identifiers of the Series.
153
+
154
+ Returns:
155
+ Union[List[str], str]: List or string of the uid
156
+ """
157
+ return [len(cell)] if uid not in cell else[i for i, e in enumerate(cell) if e == uid]
158
+
159
+ def __get_list_of_files(self, dir_name: str) -> List:
160
+ """Gets all files in the given directory
161
+
162
+ Args:
163
+ dir_name (str): directory name
164
+
165
+ Returns:
166
+ List: List of all files in the directory
167
+ """
168
+ list_of_file = os.listdir(dir_name)
169
+ all_files = list()
170
+ for entry in list_of_file:
171
+ full_path = os.path.join(dir_name, entry)
172
+ if os.path.isdir(full_path):
173
+ all_files = all_files + self.__get_list_of_files(full_path)
174
+ else:
175
+ all_files.append(full_path)
176
+
177
+ return all_files
178
+
179
+ def __get_MEDscan_name_save(self, medscan: MEDscan) -> str:
180
+ """Returns the name that will be used to save the MEDscan instance, based on the values of the attributes.
181
+
182
+ Args:
183
+ medscan(MEDscan): A MEDscan class instance.
184
+
185
+ Returns:
186
+ str: String of the name save.
187
+ """
188
+ series_description = medscan.series_description.translate({ord(ch): '-' for ch in '/\\ ()&:*'})
189
+ name_id = medscan.patientID.translate({ord(ch): '-' for ch in '/\\ ()&:*'})
190
+ # final saving name
191
+ name_complete = name_id + '__' + series_description + '.' + medscan.type + '.npy'
192
+ return name_complete
193
+
194
+ def __associate_rt_stuct(self) -> None:
195
+ """Associates the imaging volumes to their mask using UIDs
196
+
197
+ Returns:
198
+ None
199
+ """
200
+ print('--> Associating all RT objects to imaging volumes')
201
+ n_rs = len(self.__dicom.stack_path_rs)
202
+ self.__dicom.stack_series_rs = list(dict.fromkeys(self.__dicom.stack_series_rs))
203
+ if n_rs:
204
+ for i in trange(0, n_rs):
205
+ try:
206
+ # PUT ALL THE DICOM PATHS WITH THE SAME UID IN THE SAME PATH LIST
207
+ ind_series_id = self.__find_uid_cell_index(
208
+ self.__dicom.stack_series_rs[i],
209
+ self.__dicom.cell_series_id)
210
+ for n in range(len(ind_series_id)):
211
+ if ind_series_id[n] < len(self.__dicom.cell_path_rs):
212
+ self.__dicom.cell_path_rs[ind_series_id[n]] += [self.__dicom.stack_path_rs[i]]
213
+ except:
214
+ ind_series_id = self.__find_uid_cell_index(
215
+ self.__dicom.stack_frame_rs[i],
216
+ self.__dicom.cell_frame_id)
217
+ for n in range(len(ind_series_id)):
218
+ if ind_series_id[n] < len(self.__dicom.cell_path_rs):
219
+ self.__dicom.cell_path_rs[ind_series_id[n]] += [self.__dicom.stack_path_rs[i]]
220
+ print('DONE')
221
+
222
+ def __read_all_dicoms(self) -> None:
223
+ """Reads all the dicom files in the all the paths of the attribute `_path_to_dicoms`
224
+
225
+ Returns:
226
+ None
227
+ """
228
+ # SCANNING ALL FOLDERS IN INITIAL DIRECTORY
229
+ print('\n--> Scanning all folders in initial directory...', end='')
230
+ p = Path(self.paths._path_to_dicoms)
231
+ e_rglob = '*.dcm'
232
+
233
+ # EXTRACT ALL FILES IN THE PATH TO DICOMS
234
+ if self.paths._path_to_dicoms.is_dir():
235
+ stack_folder_temp = list(p.rglob(e_rglob))
236
+ stack_folder = [x for x in stack_folder_temp if not x.is_dir()]
237
+ elif str(self.paths._path_to_dicoms).find('json') != -1:
238
+ with open(self.paths._path_to_dicoms) as f:
239
+ data = json.load(f)
240
+ for value in data.values():
241
+ stack_folder_temp = value
242
+ directory_name = str(stack_folder_temp).replace("'", '').replace('[', '').replace(']', '')
243
+ stack_folder = self.__get_list_of_files(directory_name)
244
+ else:
245
+ raise ValueError("The given dicom folder path either doesn't exist or not a folder.")
246
+ # READ ALL DICOM FILES AND UPDATE ATTRIBUTES FOR FURTHER PROCESSING
247
+ for file in tqdm(stack_folder):
248
+ if pydicom.misc.is_dicom(file):
249
+ try:
250
+ info = pydicom.dcmread(str(file))
251
+ if info.Modality in ['MR', 'PT', 'CT']:
252
+ ind_series_id = self.__find_uid_cell_index(
253
+ info.SeriesInstanceUID,
254
+ self.__dicom.cell_series_id)[0]
255
+ if ind_series_id == len(self.__dicom.cell_series_id): # New volume
256
+ self.__dicom.cell_series_id = self.__dicom.cell_series_id + [info.SeriesInstanceUID]
257
+ self.__dicom.cell_frame_id += [info.FrameOfReferenceUID]
258
+ self.__dicom.cell_path_images += [[]]
259
+ self.__dicom.cell_path_rs = self.__dicom.cell_path_rs + [[]]
260
+ self.__dicom.cell_path_images[ind_series_id] += [file]
261
+ elif info.Modality == 'RTSTRUCT':
262
+ self.__dicom.stack_path_rs += [file]
263
+ try:
264
+ series_uid = info.ReferencedFrameOfReferenceSequence[
265
+ 0].RTReferencedStudySequence[
266
+ 0].RTReferencedSeriesSequence[
267
+ 0].SeriesInstanceUID
268
+ except:
269
+ series_uid = 'NotFound'
270
+ self.__dicom.stack_series_rs += [series_uid]
271
+ try:
272
+ frame_uid = info.ReferencedFrameOfReferenceSequence[0].FrameOfReferenceUID
273
+ except:
274
+ frame_uid = info.FrameOfReferenceUID
275
+ self.__dicom.stack_frame_rs += [frame_uid]
276
+ else:
277
+ print("Modality not supported: ", info.Modality)
278
+
279
+ except Exception as e:
280
+ print(f'Error while reading: {file}, error: {e}\n')
281
+ continue
282
+ print('DONE')
283
+
284
+ # ASSOCIATE ALL VOLUMES TO THEIR MASK
285
+ self.__associate_rt_stuct()
286
+
287
+ def process_all_dicoms(self) -> Union[List[MEDscan], None]:
288
+ """This function reads the DICOM content of all the sub-folder tree of a starting directory defined by
289
+ `path_to_dicoms`. It then organizes the data (files throughout the starting directory are associated by
290
+ 'SeriesInstanceUID') in the MEDscan class including the region of interest (ROI) defined by an
291
+ associated RTstruct. All MEDscan classes hereby created are saved in `path_save` with a name
292
+ varying with every scan.
293
+
294
+ Returns:
295
+ List[MEDscan]: List of MEDscan instances.
296
+ """
297
+ ray.init(local_mode=True, include_dashboard=True)
298
+
299
+ print('--> Reading all DICOM objects to create MEDscan classes')
300
+ self.__read_all_dicoms()
301
+
302
+ print('--> Processing DICOMs and creating MEDscan objects')
303
+ n_scans = len(self.__dicom.cell_path_images)
304
+ if self.n_batch is None:
305
+ n_batch = 1
306
+ elif n_scans < self.n_batch:
307
+ n_batch = n_scans
308
+ else:
309
+ n_batch = self.n_batch
310
+
311
+ # Distribute the first tasks to all workers
312
+ pds = [ProcessDICOM(
313
+ self.__dicom.cell_path_images[i],
314
+ self.__dicom.cell_path_rs[i],
315
+ self.paths._path_save,
316
+ self.save)
317
+ for i in range(n_batch)]
318
+
319
+ ids = [pd.process_files() for pd in pds]
320
+
321
+ # Update the path to the created instances
322
+ for name_save in ray.get(ids):
323
+ if self.paths._path_save:
324
+ self.path_to_objects.append(str(self.paths._path_save / name_save))
325
+ # Update processing summary
326
+ if name_save.split('_')[0].count('-') >= 2:
327
+ scan_type = name_save[name_save.find('__')+2 : name_save.find('.')]
328
+ if name_save.split('-')[0] not in self.__studies:
329
+ self.__studies.append(name_save.split('-')[0]) # add new study
330
+ if name_save.split('-')[1] not in self.__institutions:
331
+ self.__institutions.append(name_save.split('-')[1]) # add new study
332
+ if name_save.split('-')[0] not in self.summary:
333
+ self.summary[name_save.split('-')[0]] = {}
334
+ if name_save.split('-')[1] not in self.summary[name_save.split('-')[0]]:
335
+ self.summary[name_save.split('-')[0]][name_save.split('-')[1]] = {} # add new institution
336
+ if scan_type not in self.__scans:
337
+ self.__scans.append(scan_type)
338
+ if scan_type not in self.summary[name_save.split('-')[0]][name_save.split('-')[1]]:
339
+ self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type] = []
340
+ if name_save not in self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type]:
341
+ self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type].append(name_save)
342
+ else:
343
+ if self.save:
344
+ logging.warning(f"The patient ID of the following file: {name_save} does not respect the MEDiml "\
345
+ "naming convention 'study-institution-id' (Ex: Glioma-TCGA-001)")
346
+
347
+ nb_job_left = n_scans - n_batch
348
+
349
+ # Distribute the remaining tasks
350
+ for _ in trange(n_scans):
351
+ _, ids = ray.wait(ids, num_returns=1)
352
+ if nb_job_left > 0:
353
+ idx = n_scans - nb_job_left
354
+ pd = ProcessDICOM(
355
+ self.__dicom.cell_path_images[idx],
356
+ self.__dicom.cell_path_rs[idx],
357
+ self.paths._path_save,
358
+ self.save)
359
+ ids.extend([pd.process_files()])
360
+ nb_job_left -= 1
361
+
362
+ # Update the path to the created instances
363
+ for name_save in ray.get(ids):
364
+ if self.paths._path_save:
365
+ self.path_to_objects.extend(str(self.paths._path_save / name_save))
366
+ # Update processing summary
367
+ if name_save.split('_')[0].count('-') >= 2:
368
+ scan_type = name_save[name_save.find('__')+2 : name_save.find('.')]
369
+ if name_save.split('-')[0] not in self.__studies:
370
+ self.__studies.append(name_save.split('-')[0]) # add new study
371
+ if name_save.split('-')[1] not in self.__institutions:
372
+ self.__institutions.append(name_save.split('-')[1]) # add new study
373
+ if name_save.split('-')[0] not in self.summary:
374
+ self.summary[name_save.split('-')[0]] = {}
375
+ if name_save.split('-')[1] not in self.summary[name_save.split('-')[0]]:
376
+ self.summary[name_save.split('-')[0]][name_save.split('-')[1]] = {} # add new institution
377
+ if scan_type not in self.__scans:
378
+ self.__scans.append(scan_type)
379
+ if scan_type not in self.summary[name_save.split('-')[0]][name_save.split('-')[1]]:
380
+ self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type] = []
381
+ if name_save not in self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type]:
382
+ self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type].append(name_save)
383
+ else:
384
+ if self.save:
385
+ logging.warning(f"The patient ID of the following file: {name_save} does not respect the MEDiml "\
386
+ "naming convention 'study-institution-id' (Ex: Glioma-TCGA-001)")
387
+ print('DONE')
388
+
389
+ def __read_all_niftis(self) -> None:
390
+ """Reads all files in the initial path and organizes other path to images and roi
391
+ in the class attributes.
392
+
393
+ Returns:
394
+ None.
395
+ """
396
+ print('\n--> Scanning all folders in initial directory')
397
+ if not self.paths._path_to_niftis:
398
+ raise ValueError("The path to the niftis is not defined")
399
+ p = Path(self.paths._path_to_niftis)
400
+ e_rglob1 = '*.nii'
401
+ e_rglob2 = '*.nii.gz'
402
+
403
+ # EXTRACT ALL FILES IN THE PATH TO DICOMS
404
+ if p.is_dir():
405
+ self.__nifti.stack_path_all = list(p.rglob(e_rglob1))
406
+ self.__nifti.stack_path_all.extend(list(p.rglob(e_rglob2)))
407
+ else:
408
+ raise TypeError(f"{p} must be a path to a directory")
409
+
410
+ all_niftis = list(self.__nifti.stack_path_all)
411
+ for i in trange(0, len(all_niftis)):
412
+ if 'ROI' in all_niftis[i].name.split("."):
413
+ self.__nifti.stack_path_roi.append(all_niftis[i])
414
+ else:
415
+ self.__nifti.stack_path_images.append(all_niftis[i])
416
+ print('DONE')
417
+
418
+ def __associate_roi_to_image(
419
+ self,
420
+ image_file: Union[Path, str],
421
+ medscan: MEDscan,
422
+ nifti: nib.Nifti1Image,
423
+ path_roi_data: Path = None
424
+ ) -> MEDscan:
425
+ """Extracts all ROI data from the given path for the given patient ID and updates all class attributes with
426
+ the new extracted data.
427
+
428
+ Args:
429
+ image_file(Union[Path, str]): Path to the ROI data.
430
+ medscan (MEDscan): MEDscan class instance that will hold the data.
431
+
432
+ Returns:
433
+ MEDscan: Returns a MEDscan instance with updated roi attributes.
434
+ """
435
+ image_file = Path(image_file)
436
+ roi_index = 0
437
+
438
+ if not path_roi_data:
439
+ if not self.paths._path_to_niftis:
440
+ raise ValueError("The path to the niftis is not defined")
441
+ else:
442
+ path_roi_data = self.paths._path_to_niftis
443
+
444
+ for file in path_roi_data.glob('*.nii.gz'):
445
+ _id = image_file.name.split("(")[0] # id is PatientID__ImagingScanName
446
+ # Load the patient's ROI nifti files:
447
+ if file.name.startswith(_id) and 'ROI' in file.name.split("."):
448
+ roi = nib.load(file)
449
+ roi = image.resample_to_img(roi, nifti, interpolation='nearest')
450
+ roi_data = roi.get_fdata()
451
+ roi_name = file.name[file.name.find("(") + 1 : file.name.find(")")]
452
+ name_set = file.name[file.name.find("_") + 2 : file.name.find("(")]
453
+ medscan.data.ROI.update_indexes(key=roi_index, indexes=np.nonzero(roi_data.flatten()))
454
+ medscan.data.ROI.update_name_set(key=roi_index, name_set=name_set)
455
+ medscan.data.ROI.update_roi_name(key=roi_index, roi_name=roi_name)
456
+ roi_index += 1
457
+ return medscan
458
+
459
+ def __associate_spatialRef(self, nifti_file: Union[Path, str], medscan: MEDscan) -> MEDscan:
460
+ """Computes the imref3d spatialRef using a NIFTI file and updates the spatialRef attribute.
461
+
462
+ Args:
463
+ nifti_file(Union[Path, str]): Path to the nifti data.
464
+ medscan (MEDscan): MEDscan class instance that will hold the data.
465
+
466
+ Returns:
467
+ MEDscan: Returns a MEDscan instance with updated spatialRef attribute.
468
+ """
469
+ # Loading the nifti file :
470
+ nifti = nib.load(nifti_file)
471
+ nifti_data = medscan.data.volume.array
472
+
473
+ # spatialRef Creation
474
+ pixel_x = abs(nifti.affine[0, 0])
475
+ pixel_y = abs(nifti.affine[1, 1])
476
+ slices = abs(nifti.affine[2, 2])
477
+ min_grid = nifti.affine[:3, 3] * [-1.0, -1.0, 1.0] # x and y are flipped
478
+ min_x_grid = min_grid[0]
479
+ min_y_grid = min_grid[1]
480
+ min_z_grid = min_grid[2]
481
+ size_image = np.shape(nifti_data)
482
+ spatialRef = imref3d(size_image, abs(pixel_x), abs(pixel_y), abs(slices))
483
+ spatialRef.XWorldLimits = (np.array(spatialRef.XWorldLimits) -
484
+ (spatialRef.XWorldLimits[0] -
485
+ (min_x_grid-pixel_x/2))
486
+ ).tolist()
487
+ spatialRef.YWorldLimits = (np.array(spatialRef.YWorldLimits) -
488
+ (spatialRef.YWorldLimits[0] -
489
+ (min_y_grid-pixel_y/2))
490
+ ).tolist()
491
+ spatialRef.ZWorldLimits = (np.array(spatialRef.ZWorldLimits) -
492
+ (spatialRef.ZWorldLimits[0] -
493
+ (min_z_grid-slices/2))
494
+ ).tolist()
495
+
496
+ # Converting the results into lists
497
+ spatialRef.ImageSize = spatialRef.ImageSize.tolist()
498
+ spatialRef.XIntrinsicLimits = spatialRef.XIntrinsicLimits.tolist()
499
+ spatialRef.YIntrinsicLimits = spatialRef.YIntrinsicLimits.tolist()
500
+ spatialRef.ZIntrinsicLimits = spatialRef.ZIntrinsicLimits.tolist()
501
+
502
+ # update spatialRef in the volume sub-class
503
+ medscan.data.volume.update_spatialRef(spatialRef)
504
+
505
+ return medscan
506
+
507
+ def __process_one_nifti(self, nifti_file: Union[Path, str], path_data) -> MEDscan:
508
+ """
509
+ Processes one NIfTI file to create a MEDscan class instance.
510
+
511
+ Args:
512
+ nifti_file (Union[Path, str]): Path to the NIfTI file.
513
+ path_data (Union[Path, str]): Path to the data.
514
+
515
+ Returns:
516
+ MEDscan: MEDscan class instance.
517
+ """
518
+ medscan = MEDscan()
519
+ medscan.patientID = os.path.basename(nifti_file).split("_")[0]
520
+ medscan.type = os.path.basename(nifti_file).split(".")[-3]
521
+ medscan.series_description = nifti_file.name[nifti_file.name.find('__') + 2: nifti_file.name.find('(')]
522
+ medscan.format = "nifti"
523
+ medscan.data.set_orientation(orientation="Axial")
524
+ medscan.data.set_patient_position(patient_position="HFS")
525
+ medscan.data.volume.array = nib.load(nifti_file).get_fdata()
526
+ medscan.data.volume.scan_rot = None
527
+
528
+ # Update spatialRef
529
+ self.__associate_spatialRef(nifti_file, medscan)
530
+
531
+ # Assiocate ROI
532
+ medscan = self.__associate_roi_to_image(nifti_file, medscan, nib.load(nifti_file), path_data)
533
+
534
+ return medscan
535
+
536
+ def process_all(self) -> None:
537
+ """Processes both DICOM & NIfTI content to create MEDscan classes
538
+ """
539
+ self.process_all_dicoms()
540
+ self.process_all_niftis()
541
+
542
+ def process_all_niftis(self) -> List[MEDscan]:
543
+ """This function reads the NIfTI content of all the sub-folder tree of a starting directory.
544
+ It then organizes the data in the MEDscan class including the region of interest (ROI)
545
+ defined by an associated mask file. All MEDscan classes hereby created are saved in a specific path
546
+ with a name specific name varying with every scan.
547
+
548
+ Args:
549
+ None.
550
+
551
+ Returns:
552
+ List[MEDscan]: List of MEDscan instances.
553
+ """
554
+
555
+ # Reading all NIfTI files
556
+ self.__read_all_niftis()
557
+
558
+ # Create the MEDscan instances
559
+ print('--> Reading all NIfTI objects (imaging volumes & masks) to create MEDscan classes')
560
+ list_instances = []
561
+ for file in tqdm(self.__nifti.stack_path_images):
562
+ # Assert the list of instances does not exceed the a size of 10
563
+ if len(list_instances) >= 10:
564
+ print('The number of MEDscan instances exceeds 10, please consider saving the instances')
565
+ break
566
+ # INITIALIZE MEDscan INSTANCE AND UPDATE ATTRIBUTES
567
+ medscan = MEDscan()
568
+ medscan.patientID = os.path.basename(file).split("_")[0]
569
+ medscan.type = os.path.basename(file).split(".")[-3]
570
+ medscan.series_description = file.name[file.name.find('__') + 2: file.name.find('(')]
571
+ medscan.format = "nifti"
572
+ medscan.data.set_orientation(orientation="Axial")
573
+ medscan.data.set_patient_position(patient_position="HFS")
574
+ medscan.data.volume.array = nib.load(file).get_fdata()
575
+
576
+ # RAS to LPS
577
+ #medscan.data.volume.convert_to_LPS()
578
+ medscan.data.volume.scan_rot = None
579
+
580
+ # Update spatialRef
581
+ medscan = self.__associate_spatialRef(file, medscan)
582
+
583
+ # Get ROI
584
+ medscan = self.__associate_roi_to_image(file, medscan, nib.load(file))
585
+
586
+ # SAVE MEDscan INSTANCE
587
+ if self.save and self.paths._path_save:
588
+ save_MEDscan(medscan, self.paths._path_save)
589
+ else:
590
+ list_instances.append(medscan)
591
+
592
+ # Update the path to the created instances
593
+ name_save = self.__get_MEDscan_name_save(medscan)
594
+
595
+ # Clear memory
596
+ del medscan
597
+
598
+ # Update the path to the created instances
599
+ if self.paths._path_save:
600
+ self.path_to_objects.append(str(self.paths._path_save / name_save))
601
+
602
+ # Update processing summary
603
+ if name_save.split('_')[0].count('-') >= 2:
604
+ scan_type = name_save[name_save.find('__')+2 : name_save.find('.')]
605
+ if name_save.split('-')[0] not in self.__studies:
606
+ self.__studies.append(name_save.split('-')[0]) # add new study
607
+ if name_save.split('-')[1] not in self.__institutions:
608
+ self.__institutions.append(name_save.split('-')[1]) # add new institution
609
+ if name_save.split('-')[0] not in self.summary:
610
+ self.summary[name_save.split('-')[0]] = {} # add new study to summary
611
+ if name_save.split('-')[1] not in self.summary[name_save.split('-')[0]]:
612
+ self.summary[name_save.split('-')[0]][name_save.split('-')[1]] = {} # add new institution
613
+ if scan_type not in self.__scans:
614
+ self.__scans.append(scan_type)
615
+ if scan_type not in self.summary[name_save.split('-')[0]][name_save.split('-')[1]]:
616
+ self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type] = []
617
+ if name_save not in self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type]:
618
+ self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type].append(name_save)
619
+ else:
620
+ if self.save:
621
+ logging.warning(f"The patient ID of the following file: {name_save} does not respect the MEDiml "\
622
+ "naming convention 'study-institution-id' (Ex: Glioma-TCGA-001)")
623
+ print('DONE')
624
+
625
+ if list_instances:
626
+ return list_instances
627
+
628
+ def update_from_csv(self, path_csv: Union[str, Path] = None) -> None:
629
+ """Updates the class from a given CSV and summarizes the processed scans again according to it.
630
+
631
+ Args:
632
+ path_csv(optional, Union[str, Path]): Path to a csv file, if not given, will check
633
+ for csv info in the class attributes.
634
+
635
+ Returns:
636
+ None
637
+ """
638
+ if not (path_csv or self.paths._path_csv):
639
+ print('No csv provided, no updates will be made')
640
+ else:
641
+ if path_csv:
642
+ self.paths._path_csv = path_csv
643
+ # Extract roi type label from csv file name
644
+ name_csv = self.paths._path_csv.name
645
+ roi_type_label = name_csv[name_csv.find('_')+1 : name_csv.find('.')]
646
+
647
+ # Create a dictionary
648
+ csv_data = {}
649
+ csv_data[roi_type_label] = pd.read_csv(self.paths._path_csv)
650
+ self.csv_data = csv_data
651
+ self.summarize()
652
+
653
+ def summarize(self, retrun_summary: bool = False) -> None:
654
+ """Creates and shows a summary of processed scans organized by study, institution, scan type and roi type
655
+
656
+ Args:
657
+ retrun_summary (bool, optional): If True, will return the summary as a dictionary.
658
+
659
+ Returns:
660
+ None
661
+ """
662
+ def count_scans(summary):
663
+ count = 0
664
+ if type(summary) == dict:
665
+ for study in summary:
666
+ if type(summary[study]) == dict:
667
+ for institution in summary[study]:
668
+ if type(summary[study][institution]) == dict:
669
+ for scan in self.summary[study][institution]:
670
+ count += len(summary[study][institution][scan])
671
+ else:
672
+ count += len(summary[study][institution])
673
+ else:
674
+ count += len(summary[study])
675
+ elif type(summary) == list:
676
+ count = len(summary)
677
+ return count
678
+
679
+ summary_df = pd.DataFrame(columns=['study', 'institution', 'scan_type', 'roi_type', 'count'])
680
+
681
+ for study in self.summary:
682
+ summary_df = summary_df.append({
683
+ 'study': study,
684
+ 'institution': "",
685
+ 'scan_type': "",
686
+ 'roi_type': "",
687
+ 'count' : count_scans(self.summary)
688
+ }, ignore_index=True)
689
+ for institution in self.summary[study]:
690
+ summary_df = summary_df.append({
691
+ 'study': study,
692
+ 'institution': institution,
693
+ 'scan_type': "",
694
+ 'roi_type': "",
695
+ 'count' : count_scans(self.summary[study][institution])
696
+ }, ignore_index=True)
697
+ for scan in self.summary[study][institution]:
698
+ summary_df = summary_df.append({
699
+ 'study': study,
700
+ 'institution': institution,
701
+ 'scan_type': scan,
702
+ 'roi_type': "",
703
+ 'count' : count_scans(self.summary[study][institution][scan])
704
+ }, ignore_index=True)
705
+ if self.csv_data:
706
+ roi_count = 0
707
+ for roi_type in self.csv_data:
708
+ csv_table = pd.DataFrame(self.csv_data[roi_type])
709
+ csv_table['under'] = '_'
710
+ csv_table['dot'] = '.'
711
+ csv_table['npy'] = '.npy'
712
+ name_patients = (pd.Series(
713
+ csv_table[['PatientID', 'under', 'under',
714
+ 'ImagingScanName',
715
+ 'dot',
716
+ 'ImagingModality',
717
+ 'npy']].fillna('').values.tolist()).str.join('')).tolist()
718
+ for patient_id in self.summary[study][institution][scan]:
719
+ if patient_id in name_patients:
720
+ roi_count += 1
721
+ summary_df = summary_df.append({
722
+ 'study': study,
723
+ 'institution': institution,
724
+ 'scan_type': scan,
725
+ 'roi_type': roi_type,
726
+ 'count' : roi_count
727
+ }, ignore_index=True)
728
+ print(summary_df.to_markdown(index=False))
729
+
730
+ if retrun_summary:
731
+ return summary_df
732
+
733
+ def __pre_radiomics_checks_dimensions(
734
+ self,
735
+ path_data: Union[Path, str] = None,
736
+ wildcards_dimensions: List[str] = [],
737
+ min_percentile: float = 0.05,
738
+ max_percentile: float = 0.95,
739
+ save: bool = False
740
+ ) -> None:
741
+ """Finds proper voxels dimension options for radiomics analyses for a group of scans
742
+
743
+ Args:
744
+ path_data (Path, optional): Path to the MEDscan objects, if not specified will use ``path_save`` from the
745
+ inner-class ``Paths`` in the current instance.
746
+ wildcards_dimensions(List[str], optional): List of wildcards that determines the scans
747
+ that will be analyzed. You can learn more about wildcards in
748
+ :ref:`this link <https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html>`.
749
+ min_percentile (float, optional): Minimum percentile to use for the histograms. Defaults to 0.05.
750
+ max_percentile (float, optional): Maximum percentile to use for the histograms. Defaults to 0.95.
751
+ save (bool, optional): If True, will save the results in a json file. Defaults to False.
752
+
753
+ Returns:
754
+ None.
755
+ """
756
+ xy_dim = {
757
+ "data": [],
758
+ "mean": [],
759
+ "median": [],
760
+ "std": [],
761
+ "min": [],
762
+ "max": [],
763
+ f"p{min_percentile}": [],
764
+ f"p{max_percentile}": []
765
+ }
766
+ z_dim = {
767
+ "data": [],
768
+ "mean": [],
769
+ "median": [],
770
+ "std": [],
771
+ "min": [],
772
+ "max": [],
773
+ f"p{min_percentile}": [],
774
+ f"p{max_percentile}": []
775
+ }
776
+ if type(wildcards_dimensions) is str:
777
+ wildcards_dimensions = [wildcards_dimensions]
778
+
779
+ if len(wildcards_dimensions) == 0:
780
+ print("Wildcard is empty, the pre-checks will be aborted")
781
+ return
782
+
783
+ # Updating plotting params
784
+ plt.rcParams["figure.figsize"] = (20,20)
785
+ plt.rcParams.update({'font.size': 22})
786
+
787
+ # TODO: seperate by studies and scan type (MRscan, CTscan...)
788
+ # TODO: Two summaries (df, list of names saves) ->
789
+ # name_save = name_save(ROI) : Glioma-Huashan-001__T1.MRscan.npy({GTV})
790
+ file_paths = list()
791
+ for w in range(len(wildcards_dimensions)):
792
+ wildcard = wildcards_dimensions[w]
793
+ if path_data:
794
+ file_paths = get_file_paths(path_data, wildcard)
795
+ elif self.paths._path_save:
796
+ file_paths = get_file_paths(self.paths._path_save, wildcard)
797
+ else:
798
+ raise ValueError("Path data is invalid.")
799
+ n_files = len(file_paths)
800
+ xy_dim["data"] = np.zeros((n_files, 1))
801
+ xy_dim["data"] = np.multiply(xy_dim["data"], np.nan)
802
+ z_dim["data"] = np.zeros((n_files, 1))
803
+ z_dim["data"] = np.multiply(z_dim["data"], np.nan)
804
+ for f in tqdm(range(len(file_paths))):
805
+ try:
806
+ if file_paths[f].name.endswith("nii.gz") or file_paths[f].name.endswith("nii"):
807
+ medscan = nib.load(file_paths[f])
808
+ xy_dim["data"][f] = medscan.header.get_zooms()[0]
809
+ z_dim["data"][f] = medscan.header.get_zooms()[2]
810
+ else:
811
+ medscan = np.load(file_paths[f], allow_pickle=True)
812
+ xy_dim["data"][f] = medscan.data.volume.spatialRef.PixelExtentInWorldX
813
+ z_dim["data"][f] = medscan.data.volume.spatialRef.PixelExtentInWorldZ
814
+ except Exception as e:
815
+ print(e)
816
+
817
+ # Running analysis
818
+ xy_dim["data"] = np.concatenate(xy_dim["data"])
819
+ xy_dim["mean"] = np.mean(xy_dim["data"][~np.isnan(xy_dim["data"])])
820
+ xy_dim["median"] = np.median(xy_dim["data"][~np.isnan(xy_dim["data"])])
821
+ xy_dim["std"] = np.std(xy_dim["data"][~np.isnan(xy_dim["data"])])
822
+ xy_dim["min"] = np.min(xy_dim["data"][~np.isnan(xy_dim["data"])])
823
+ xy_dim["max"] = np.max(xy_dim["data"][~np.isnan(xy_dim["data"])])
824
+ xy_dim[f"p{min_percentile}"] = np.percentile(xy_dim["data"][~np.isnan(xy_dim["data"])],
825
+ min_percentile)
826
+ xy_dim[f"p{max_percentile}"] = np.percentile(xy_dim["data"][~np.isnan(xy_dim["data"])],
827
+ max_percentile)
828
+ z_dim["mean"] = np.mean(z_dim["data"][~np.isnan(z_dim["data"])])
829
+ z_dim["median"] = np.median(z_dim["data"][~np.isnan(z_dim["data"])])
830
+ z_dim["std"] = np.std(z_dim["data"][~np.isnan(z_dim["data"])])
831
+ z_dim["min"] = np.min(z_dim["data"][~np.isnan(z_dim["data"])])
832
+ z_dim["max"] = np.max(z_dim["data"][~np.isnan(z_dim["data"])])
833
+ z_dim[f"p{min_percentile}"] = np.percentile(z_dim["data"][~np.isnan(z_dim["data"])],
834
+ min_percentile)
835
+ z_dim[f"p{max_percentile}"] = np.percentile(z_dim["data"][~np.isnan(z_dim["data"])], max_percentile)
836
+ xy_dim["data"] = xy_dim["data"].tolist()
837
+ z_dim["data"] = z_dim["data"].tolist()
838
+
839
+ # Plotting xy-spacing data histogram
840
+ df_xy = pd.DataFrame(xy_dim["data"], columns=['data'])
841
+ del xy_dim["data"] # no interest in keeping data (we only need statistics)
842
+ ax = df_xy.hist(column='data')
843
+ min_quant, max_quant, median = df_xy.quantile(min_percentile), df_xy.quantile(max_percentile), df_xy.median()
844
+ for x in ax[0]:
845
+ x.axvline(min_quant.data, linestyle=':', color='r', label=f"Min Percentile: {float(min_quant):.3f}")
846
+ x.axvline(max_quant.data, linestyle=':', color='g', label=f"Max Percentile: {float(max_quant):.3f}")
847
+ x.axvline(median.data, linestyle='solid', color='gold', label=f"Median: {float(median.data):.3f}")
848
+ x.grid(False)
849
+ plt.title(f"Voxels xy-spacing checks for {wildcard}")
850
+ plt.legend()
851
+ # Save the plot
852
+ if save:
853
+ plt.savefig(self.paths._path_save_checks / ('Voxels_xy_check.png'))
854
+ else:
855
+ plt.show()
856
+
857
+ # Plotting z-spacing data histogram
858
+ df_z = pd.DataFrame(z_dim["data"], columns=['data'])
859
+ del z_dim["data"] # no interest in keeping data (we only need statistics)
860
+ ax = df_z.hist(column='data')
861
+ min_quant, max_quant, median = df_z.quantile(min_percentile), df_z.quantile(max_percentile), df_z.median()
862
+ for x in ax[0]:
863
+ x.axvline(min_quant.data, linestyle=':', color='r', label=f"Min Percentile: {float(min_quant):.3f}")
864
+ x.axvline(max_quant.data, linestyle=':', color='g', label=f"Max Percentile: {float(max_quant):.3f}")
865
+ x.axvline(median.data, linestyle='solid', color='gold', label=f"Median: {float(median.data):.3f}")
866
+ x.grid(False)
867
+ plt.title(f"Voxels z-spacing checks for {wildcard}")
868
+ plt.legend()
869
+ # Save the plot
870
+ if save:
871
+ plt.savefig(self.paths._path_save_checks / ('Voxels_z_check.png'))
872
+ else:
873
+ plt.show()
874
+
875
+ # Saving files using wildcard for name
876
+ if save:
877
+ wildcard = str(wildcard).replace('*', '').replace('.npy', '.json')
878
+ save_json(self.paths._path_save_checks / ('xyDim_' + wildcard), xy_dim, cls=NumpyEncoder)
879
+ save_json(self.paths._path_save_checks / ('zDim_' + wildcard), z_dim, cls=NumpyEncoder)
880
+
881
+ def __pre_radiomics_checks_window(
882
+ self,
883
+ path_data: Union[str, Path] = None,
884
+ wildcards_window: List = [],
885
+ path_csv: Union[str, Path] = None,
886
+ min_percentile: float = 0.05,
887
+ max_percentile: float = 0.95,
888
+ bin_width: int = 0,
889
+ hist_range: list = [],
890
+ nifti: bool = True,
891
+ save: bool = False
892
+ ) -> None:
893
+ """Finds proper re-segmentation ranges options for radiomics analyses for a group of scans
894
+
895
+ Args:
896
+ path_data (Path, optional): Path to the MEDscan objects, if not specified will use ``path_save`` from the
897
+ inner-class ``Paths`` in the current instance.
898
+ wildcards_window(List[str], optional): List of wildcards that determines the scans
899
+ that will be analyzed. You can learn more about wildcards in
900
+ :ref:`this link <https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html>`.
901
+ path_csv(Union[str, Path], optional): Path to a csv file containing a list of the scans that will be
902
+ analyzed (a CSV file for a single ROI type).
903
+ min_percentile (float, optional): Minimum percentile to use for the histograms. Defaults to 0.05.
904
+ max_percentile (float, optional): Maximum percentile to use for the histograms. Defaults to 0.95.
905
+ bin_width(int, optional): Width of the bins for the histograms. If not provided, will use the
906
+ default number of bins in the method
907
+ :ref:`pandas.DataFrame.hist <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.hist.html>`: 10 bins.
908
+ hist_range(list, optional): Range of the histograms. If empty, will use the minimum and maximum values.
909
+ nifti(bool, optional): If True, will use the NIfTI files, otherwise will use the numpy files.
910
+ save (bool, optional): If True, will save the results in a json file. Defaults to False.
911
+
912
+ Returns:
913
+ None.
914
+ """
915
+ # Updating plotting params
916
+ plt.rcParams["figure.figsize"] = (20,20)
917
+ plt.rcParams.update({'font.size': 22})
918
+
919
+ if type(wildcards_window) is str:
920
+ wildcards_window = [wildcards_window]
921
+
922
+ if len(wildcards_window) == 0:
923
+ print("Wilcards is empty")
924
+ return
925
+ if path_csv:
926
+ self.paths._path_csv = Path(path_csv)
927
+ roi_table = pd.read_csv(self.paths._path_csv)
928
+ if nifti:
929
+ roi_table['under'] = '_'
930
+ roi_table['dot'] = '.'
931
+ roi_table['roi_label'] = 'GTV'
932
+ roi_table['oparenthesis'] = '('
933
+ roi_table['cparenthesis'] = ')'
934
+ roi_table['ext'] = '.nii.gz'
935
+ patient_names = (pd.Series(
936
+ roi_table[['PatientID', 'under', 'under',
937
+ 'ImagingScanName',
938
+ 'oparenthesis',
939
+ 'roi_label',
940
+ 'cparenthesis',
941
+ 'dot',
942
+ 'ImagingModality',
943
+ 'ext']].fillna('').values.tolist()).str.join('')).tolist()
944
+ else:
945
+ roi_names = [[], [], []]
946
+ roi_names[0] = roi_table['PatientID']
947
+ roi_names[1] = roi_table['ImagingScanName']
948
+ roi_names[2] = roi_table['ImagingModality']
949
+ patient_names = get_patient_names(roi_names)
950
+ for w in range(len(wildcards_window)):
951
+ temp_val = []
952
+ temp = []
953
+ file_paths = []
954
+ roi_data= {
955
+ "data": [],
956
+ "mean": [],
957
+ "median": [],
958
+ "std": [],
959
+ "min": [],
960
+ "max": [],
961
+ f"p{min_percentile}": [],
962
+ f"p{max_percentile}": []
963
+ }
964
+ wildcard = wildcards_window[w]
965
+ if path_data:
966
+ file_paths = get_file_paths(path_data, wildcard)
967
+ elif self.paths._path_save:
968
+ path_data = self.paths._path_save
969
+ file_paths = get_file_paths(self.paths._path_save, wildcard)
970
+ else:
971
+ raise ValueError("Path data is invalid.")
972
+ n_files = len(file_paths)
973
+ i = 0
974
+ for f in tqdm(range(n_files)):
975
+ file = file_paths[f]
976
+ _, filename = os.path.split(file)
977
+ filename, ext = os.path.splitext(filename)
978
+ patient_name = filename + ext
979
+ try:
980
+ if file.name.endswith('nii.gz') or file.name.endswith('nii'):
981
+ medscan = self.__process_one_nifti(file, path_data)
982
+ else:
983
+ medscan = np.load(file, allow_pickle=True)
984
+ if re.search('PTscan', wildcard) and medscan.format != 'nifti':
985
+ medscan.data.volume.array = compute_suv_map(
986
+ np.double(medscan.data.volume.array),
987
+ medscan.dicomH[2])
988
+ patient_names = pd.Index(patient_names)
989
+ ind_roi = patient_names.get_loc(patient_name)
990
+ name_roi = roi_table.loc[ind_roi][3]
991
+ vol_obj_init, roi_obj_init = get_roi_from_indexes(medscan, name_roi, 'box')
992
+ temp = vol_obj_init.data[roi_obj_init.data == 1]
993
+ temp_val.append(len(temp))
994
+ roi_data["data"].append(np.zeros(shape=(n_files, temp_val[i])))
995
+ roi_data["data"][i] = temp
996
+ i+=1
997
+ del medscan
998
+ del vol_obj_init
999
+ del roi_obj_init
1000
+ except Exception as e:
1001
+ print(f"Problem with patient {patient_name}, error: {e}")
1002
+
1003
+ roi_data["data"] = np.concatenate(roi_data["data"])
1004
+ roi_data["mean"] = np.mean(roi_data["data"][~np.isnan(roi_data["data"])])
1005
+ roi_data["median"] = np.median(roi_data["data"][~np.isnan(roi_data["data"])])
1006
+ roi_data["std"] = np.std(roi_data["data"][~np.isnan(roi_data["data"])])
1007
+ roi_data["min"] = np.min(roi_data["data"][~np.isnan(roi_data["data"])])
1008
+ roi_data["max"] = np.max(roi_data["data"][~np.isnan(roi_data["data"])])
1009
+ roi_data[f"p{min_percentile}"] = np.percentile(roi_data["data"][~np.isnan(roi_data["data"])],
1010
+ min_percentile)
1011
+ roi_data[f"p{max_percentile}"] = np.percentile(roi_data["data"][~np.isnan(roi_data["data"])],
1012
+ max_percentile)
1013
+
1014
+ # Set bin width if not provided
1015
+ if bin_width != 0:
1016
+ if hist_range:
1017
+ nb_bins = (round(hist_range[1]) - round(hist_range[0])) // bin_width
1018
+ else:
1019
+ nb_bins = (round(roi_data["max"]) - round(roi_data["min"])) // bin_width
1020
+ else:
1021
+ nb_bins = 10
1022
+ if hist_range:
1023
+ bin_width = int((round(hist_range[1]) - round(hist_range[0])) // nb_bins)
1024
+ else:
1025
+ bin_width = int((round(roi_data["max"]) - round(roi_data["min"])) // nb_bins)
1026
+ nb_bins = int(nb_bins)
1027
+
1028
+ # Set histogram range if not provided
1029
+ if not hist_range:
1030
+ hist_range = (roi_data["min"], roi_data["max"])
1031
+
1032
+ # re-segment data according to histogram range
1033
+ roi_data["data"] = roi_data["data"][(roi_data["data"] > hist_range[0]) & (roi_data["data"] < hist_range[1])]
1034
+ df_data = pd.DataFrame(roi_data["data"], columns=['data'])
1035
+ del roi_data["data"] # no interest in keeping data (we only need statistics)
1036
+
1037
+ # Plot histogram
1038
+ ax = df_data.hist(column='data', bins=nb_bins, range=(hist_range[0], hist_range[1]), edgecolor='black')
1039
+ min_quant, max_quant= df_data.quantile(min_percentile), df_data.quantile(max_percentile)
1040
+ for x in ax[0]:
1041
+ x.axvline(min_quant.data, linestyle=':', color='r', label=f"{min_percentile*100}% Percentile: {float(min_quant):.3f}")
1042
+ x.axvline(max_quant.data, linestyle=':', color='g', label=f"{max_percentile*100}% Percentile: {float(max_quant):.3f}")
1043
+ x.grid(False)
1044
+ x.xaxis.set_ticks(np.arange(hist_range[0], hist_range[1], bin_width, dtype=int))
1045
+ x.set_xticklabels(x.get_xticks(), rotation=45)
1046
+ x.xaxis.set_tick_params(pad=15)
1047
+ plt.title(f"Intensity range checks for {wildcard}, bw={bin_width}")
1048
+ plt.legend()
1049
+ # Save the plot
1050
+ if save:
1051
+ plt.savefig(self.paths._path_save_checks / ('Intensity_range_check_' + f'bw_{bin_width}.png'))
1052
+ else:
1053
+ plt.show()
1054
+
1055
+ # save final checks
1056
+ if save:
1057
+ wildcard = str(wildcard).replace('*', '').replace('.npy', '.json')
1058
+ save_json(self.paths._path_save_checks / ('roi_data_' + wildcard), roi_data, cls=NumpyEncoder)
1059
+
1060
+ def pre_radiomics_checks(self,
1061
+ path_data: Union[str, Path] = None,
1062
+ wildcards_dimensions: List = [],
1063
+ wildcards_window: List = [],
1064
+ path_csv: Union[str, Path] = None,
1065
+ min_percentile: float = 0.05,
1066
+ max_percentile: float = 0.95,
1067
+ bin_width: int = 0,
1068
+ hist_range: list = [],
1069
+ nifti: bool = False,
1070
+ save: bool = False) -> None:
1071
+ """Finds proper dimension and re-segmentation ranges options for radiomics analyses.
1072
+
1073
+ The resulting files from this method can then be analyzed and used to set up radiomics
1074
+ parameters options in computation methods.
1075
+
1076
+ Args:
1077
+ path_data (Path, optional): Path to the MEDscan objects, if not specified will use ``path_save`` from the
1078
+ inner-class ``Paths`` in the current instance.
1079
+ wildcards_dimensions(List[str], optional): List of wildcards that determines the scans
1080
+ that will be analyzed. You can learn more about wildcards in
1081
+ `this link <https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html>`_.
1082
+ wildcards_window(List[str], optional): List of wildcards that determines the scans
1083
+ that will be analyzed. You can learn more about wildcards in
1084
+ `this link <https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html>`_.
1085
+ path_csv(Union[str, Path], optional): Path to a csv file containing a list of the scans that will be
1086
+ analyzed (a CSV file for a single ROI type).
1087
+ min_percentile (float, optional): Minimum percentile to use for the histograms. Defaults to 0.05.
1088
+ max_percentile (float, optional): Maximum percentile to use for the histograms. Defaults to 0.95.
1089
+ bin_width(int, optional): Width of the bins for the histograms. If not provided, will use the
1090
+ default number of bins in the method
1091
+ :ref:`pandas.DataFrame.hist <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.hist.html>`: 10 bins.
1092
+ hist_range(list, optional): Range of the histograms. If empty, will use the minimum and maximum values.
1093
+ nifti (bool, optional): Set to True if the scans are nifti files. Defaults to False.
1094
+ save (bool, optional): If True, will save the results in a json file. Defaults to False.
1095
+
1096
+ Returns:
1097
+ None
1098
+ """
1099
+ # Initialization
1100
+ path_study = Path.cwd()
1101
+
1102
+ # Load params
1103
+ if not self.paths._path_pre_checks_settings:
1104
+ if not wildcards_dimensions or not wildcards_window:
1105
+ raise ValueError("path to pre-checks settings is None.\
1106
+ wildcards_dimensions and wildcards_window need to be specified")
1107
+ else:
1108
+ settings = self.paths._path_pre_checks_settings
1109
+ settings = load_json(settings)
1110
+ settings = settings['pre_radiomics_checks']
1111
+
1112
+ # Setting up paths
1113
+ if 'path_save_checks' in settings and settings['path_save_checks']:
1114
+ self.paths._path_save_checks = Path(settings['path_save_checks'])
1115
+ if 'path_csv' in settings and settings['path_csv']:
1116
+ self.paths._path_csv = Path(settings['path_csv'])
1117
+
1118
+ # Wildcards of groups of files to analyze for dimensions in path_data.
1119
+ # See for example: https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html
1120
+ # Keep the cell empty if no dimension checks are to be performed.
1121
+ if not wildcards_dimensions:
1122
+ wildcards_dimensions = []
1123
+ for i in range(len(settings['wildcards_dimensions'])):
1124
+ wildcards_dimensions.append(settings['wildcards_dimensions'][i])
1125
+
1126
+ # ROI intensity window checks params
1127
+ if not wildcards_window:
1128
+ wildcards_window = []
1129
+ for i in range(len(settings['wildcards_window'])):
1130
+ wildcards_window.append(settings['wildcards_window'][i])
1131
+
1132
+ # PRE-RADIOMICS CHECKS
1133
+ if not self.paths._path_save_checks:
1134
+ if (path_study / 'checks').exists():
1135
+ self.paths._path_save_checks = Path(path_study / 'checks')
1136
+ else:
1137
+ os.mkdir(path_study / 'checks')
1138
+ self.paths._path_save_checks = Path(path_study / 'checks')
1139
+ else:
1140
+ if self.paths._path_save_checks.name != 'checks':
1141
+ if (self.paths._path_save_checks / 'checks').exists():
1142
+ self.paths._path_save_checks /= 'checks'
1143
+ else:
1144
+ os.mkdir(self.paths._path_save_checks / 'checks')
1145
+ self.paths._path_save_checks = Path(self.paths._path_save_checks / 'checks')
1146
+
1147
+ # Initializing plotting params
1148
+ plt.rcParams["figure.figsize"] = (20,20)
1149
+ plt.rcParams.update({'font.size': 22})
1150
+
1151
+ start = time()
1152
+ print('\n\n************************* PRE-RADIOMICS CHECKS *************************', end='')
1153
+
1154
+ # 1. PRE-RADIOMICS CHECKS -- DIMENSIONS
1155
+ start1 = time()
1156
+ print('\n--> PRE-RADIOMICS CHECKS -- DIMENSIONS ... ', end='')
1157
+ self.__pre_radiomics_checks_dimensions(
1158
+ path_data,
1159
+ wildcards_dimensions,
1160
+ min_percentile,
1161
+ max_percentile,
1162
+ save)
1163
+ print('DONE', end='')
1164
+ time1 = f"{time() - start1:.2f}"
1165
+ print(f'\nElapsed time: {time1} sec', end='')
1166
+
1167
+ # 2. PRE-RADIOMICS CHECKS - WINDOW
1168
+ start2 = time()
1169
+ print('\n\n--> PRE-RADIOMICS CHECKS -- WINDOW ... \n', end='')
1170
+ self.__pre_radiomics_checks_window(
1171
+ path_data,
1172
+ wildcards_window,
1173
+ path_csv,
1174
+ min_percentile,
1175
+ max_percentile,
1176
+ bin_width,
1177
+ hist_range,
1178
+ nifti,
1179
+ save)
1180
+ print('DONE', end='')
1181
+ time2 = f"{time() - start2:.2f}"
1182
+ print(f'\nElapsed time: {time2} sec', end='')
1183
+
1184
+ time_elapsed = f"{time() - start:.2f}"
1185
+ print(f'\n\n--> TOTAL TIME FOR PRE-RADIOMICS CHECKS: {time_elapsed} seconds')
1186
+ print('-------------------------------------------------------------------------------------')
1187
+
1188
+ def perform_mr_imaging_summary(self,
1189
+ wildcards_scans: List[str],
1190
+ path_data: Path = None,
1191
+ path_save_checks: Path = None,
1192
+ min_percentile: float = 0.05,
1193
+ max_percentile: float = 0.95
1194
+ ) -> None:
1195
+ """
1196
+ Summarizes MRI imaging acquisition parameters. Plots summary histograms
1197
+ for different dimensions and saves all acquisition parameters locally in JSON files.
1198
+
1199
+ Args:
1200
+ wildcards_scans (List[str]): List of wildcards that determines the scans
1201
+ that will be analyzed (Only MRI scans will be analyzed). You can learn more about wildcards in
1202
+ `this link <https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html>`_.
1203
+ For example: ``[\"STS*.MRscan.npy\"]``.
1204
+ path_data (Path, optional): Path to the MEDscan objects, if not specified will use ``path_save`` from the
1205
+ inner-class ``Paths`` in the current instance.
1206
+ path_save_checks (Path, optional): Path where to save the checks, if not specified will use the one
1207
+ in the current instance.
1208
+ min_percentile (float, optional): Minimum percentile to use for the histograms. Defaults to 0.05.
1209
+ max_percentile (float, optional): Maximum percentile to use for the histograms. Defaults to 0.95.
1210
+
1211
+ Returns:
1212
+ None.
1213
+ """
1214
+ # Initializing data structures
1215
+ class param:
1216
+ dates = []
1217
+ manufacturer = []
1218
+ scanning_sequence = []
1219
+ class years:
1220
+ data = []
1221
+
1222
+ class fieldStrength:
1223
+ data = []
1224
+
1225
+ class repetitionTime:
1226
+ data = []
1227
+
1228
+ class echoTime:
1229
+ data = []
1230
+
1231
+ class inversionTime:
1232
+ data = []
1233
+
1234
+ class echoTrainLength:
1235
+ data = []
1236
+
1237
+ class flipAngle:
1238
+ data = []
1239
+
1240
+ class numberAverages:
1241
+ data = []
1242
+
1243
+ class xyDim:
1244
+ data = []
1245
+
1246
+ class zDim:
1247
+ data = []
1248
+
1249
+ if len(wildcards_scans) == 0:
1250
+ print('wildcards_scans is empty')
1251
+
1252
+ # wildcards checks:
1253
+ no_mr_scan = True
1254
+ for wildcard in wildcards_scans:
1255
+ if 'MRscan' in wildcard:
1256
+ no_mr_scan = False
1257
+ if no_mr_scan:
1258
+ raise ValueError(f"wildcards: {wildcards_scans} does not include MR scans. (Only MR scans are supported)")
1259
+
1260
+ # Initialization
1261
+ if path_data is None:
1262
+ if self.paths._path_save:
1263
+ path_data = Path(self.paths._path_save)
1264
+ else:
1265
+ print("No path to data was given and path save is None.")
1266
+ return 0
1267
+
1268
+ if not path_save_checks:
1269
+ if self.paths._path_save_checks:
1270
+ path_save_checks = Path(self.paths._path_save_checks)
1271
+ else:
1272
+ if (Path(os.getcwd()) / "checks").exists():
1273
+ path_save_checks = Path(os.getcwd()) / "checks"
1274
+ else:
1275
+ path_save_checks = (Path(os.getcwd()) / "checks").mkdir()
1276
+ # Looping through all the different wildcards
1277
+ for i in tqdm(range(len(wildcards_scans))):
1278
+ wildcard = wildcards_scans[i]
1279
+ file_paths = get_file_paths(path_data, wildcard)
1280
+ n_files = len(file_paths)
1281
+ param.dates = np.zeros(n_files)
1282
+ param.years.data = np.zeros((n_files, 1))
1283
+ param.years.data = np.multiply(param.years.data, np.NaN)
1284
+ param.manufacturer = [None] * n_files
1285
+ param.scanning_sequence = [None] * n_files
1286
+ param.fieldStrength.data = np.zeros((n_files, 1))
1287
+ param.fieldStrength.data = np.multiply(param.fieldStrength.data, np.NaN)
1288
+ param.repetitionTime.data = np.zeros((n_files, 1))
1289
+ param.repetitionTime.data = np.multiply(param.repetitionTime.data, np.NaN)
1290
+ param.echoTime.data = np.zeros((n_files, 1))
1291
+ param.echoTime.data = np.multiply(param.echoTime.data, np.NaN)
1292
+ param.inversionTime.data = np.zeros((n_files, 1))
1293
+ param.inversionTime.data = np.multiply(param.inversionTime.data, np.NaN)
1294
+ param.echoTrainLength.data = np.zeros((n_files, 1))
1295
+ param.echoTrainLength.data = np.multiply(param.echoTrainLength.data, np.NaN)
1296
+ param.flipAngle.data = np.zeros((n_files, 1))
1297
+ param.flipAngle.data = np.multiply(param.flipAngle.data, np.NaN)
1298
+ param.numberAverages.data = np.zeros((n_files, 1))
1299
+ param.numberAverages.data = np.multiply(param.numberAverages.data, np.NaN)
1300
+ param.xyDim.data = np.zeros((n_files, 1))
1301
+ param.xyDim.data = np.multiply(param.xyDim.data, np.NaN)
1302
+ param.zDim.data = np.zeros((n_files, 1))
1303
+ param.zDim.data = np.multiply(param.zDim.data, np.NaN)
1304
+
1305
+ # Loading and recording data
1306
+ for f in tqdm(range(n_files)):
1307
+ file = file_paths[f]
1308
+
1309
+ #Open file for warning
1310
+ try:
1311
+ warn_file = open(path_save_checks / 'imaging_summary_mr_warnings.txt', 'a')
1312
+ except IOError:
1313
+ print("Could not open warning file")
1314
+
1315
+ # Loading Data
1316
+ try:
1317
+ print(f'\nCurrently working on: {file}', file = warn_file)
1318
+ with open(path_data / file, 'rb') as fe: medscan = pickle.load(fe)
1319
+
1320
+ # Example of DICOM header
1321
+ info = medscan.dicomH[1]
1322
+ # Recording dates (info.AcquistionDates)
1323
+ try:
1324
+ param.dates[f] = info.AcquisitionDate
1325
+ except AttributeError:
1326
+ param.dates[f] = info.StudyDate
1327
+ # Recording years
1328
+ try:
1329
+ y = str(param.dates[f]) # Only the first four characters represent the years
1330
+ param.years.data[f] = y[0:4]
1331
+ except Exception as e:
1332
+ print(f'Cannot read years of: {file}. Error: {e}', file = warn_file)
1333
+ # Recording manufacturers
1334
+ try:
1335
+ param.manufacturer[f] = info.Manufacturer
1336
+ except Exception as e:
1337
+ print(f'Cannot read manufacturer of: {file}. Error: {e}', file = warn_file)
1338
+ # Recording scanning sequence
1339
+ try:
1340
+ param.scanning_sequence[f] = info.scanning_sequence
1341
+ except Exception as e:
1342
+ print(f'Cannot read scanning sequence of: {file}. Error: {e}', file = warn_file)
1343
+ # Recording field strength
1344
+ try:
1345
+ param.fieldStrength.data[f] = info.MagneticFieldStrength
1346
+ except Exception as e:
1347
+ print(f'Cannot read field strength of: {file}. Error: {e}', file = warn_file)
1348
+ # Recording repetition time
1349
+ try:
1350
+ param.repetitionTime.data[f] = info.RepetitionTime
1351
+ except Exception as e:
1352
+ print(f'Cannot read repetition time of: {file}. Error: {e}', file = warn_file)
1353
+ # Recording echo time
1354
+ try:
1355
+ param.echoTime.data[f] = info.EchoTime
1356
+ except Exception as e:
1357
+ print(f'Cannot read echo time of: {file}. Error: {e}', file = warn_file)
1358
+ # Recording inversion time
1359
+ try:
1360
+ param.inversionTime.data[f] = info.InversionTime
1361
+ except Exception as e:
1362
+ print(f'Cannot read inversion time of: {file}. Error: {e}', file = warn_file)
1363
+ # Recording echo train length
1364
+ try:
1365
+ param.echoTrainLength.data[f] = info.EchoTrainLength
1366
+ except Exception as e:
1367
+ print(f'Cannot read echo train length of: {file}. Error: {e}', file = warn_file)
1368
+ # Recording flip angle
1369
+ try:
1370
+ param.flipAngle.data[f] = info.FlipAngle
1371
+ except Exception as e:
1372
+ print(f'Cannot read flip angle of: {file}. Error: {e}', file = warn_file)
1373
+ # Recording number of averages
1374
+ try:
1375
+ param.numberAverages.data[f] = info.NumberOfAverages
1376
+ except Exception as e:
1377
+ print(f'Cannot read number averages of: {file}. Error: {e}', file = warn_file)
1378
+ # Recording xy spacing
1379
+ try:
1380
+ param.xyDim.data[f] = medscan.data.volume.spatialRef.PixelExtentInWorldX
1381
+ except Exception as e:
1382
+ print(f'Cannot read x spacing of: {file}. Error: {e}', file = warn_file)
1383
+ # Recording z spacing
1384
+ try:
1385
+ param.zDim.data[f] = medscan.data.volume.spatialRef.PixelExtentInWorldZ
1386
+ except Exception as e:
1387
+ print(f'Cannot read z spacing of: {file}', file = warn_file)
1388
+ except Exception as e:
1389
+ print(f'Cannot read file: {file}. Error: {e}', file = warn_file)
1390
+
1391
+ warn_file.close()
1392
+
1393
+ # Summarize data
1394
+ # Summarizing years
1395
+ df_years = pd.DataFrame(param.years.data,
1396
+ columns=['years']).describe(percentiles=[min_percentile, max_percentile],
1397
+ include='all')
1398
+ # Summarizing field strength
1399
+ df_fs = pd.DataFrame(param.fieldStrength.data,
1400
+ columns=['fieldStrength']).describe(percentiles=[min_percentile, max_percentile],
1401
+ include='all')
1402
+ # Summarizing repetition time
1403
+ df_rt = pd.DataFrame(param.repetitionTime.data,
1404
+ columns=['repetitionTime']).describe(percentiles=[min_percentile, max_percentile],
1405
+ include='all')
1406
+ # Summarizing echo time
1407
+ df_et = pd.DataFrame(param.echoTime.data,
1408
+ columns=['echoTime']).describe(percentiles=[min_percentile, max_percentile],
1409
+ include='all')
1410
+ # Summarizing inversion time
1411
+ df_it = pd.DataFrame(param.inversionTime.data,
1412
+ columns=['inversionTime']).describe(percentiles=[min_percentile, max_percentile],
1413
+ include='all')
1414
+ # Summarizing echo train length
1415
+ df_etl = pd.DataFrame(param.echoTrainLength.data,
1416
+ columns=['echoTrainLength']).describe(percentiles=[min_percentile, max_percentile],
1417
+ include='all')
1418
+ # Summarizing flip angle
1419
+ df_fa = pd.DataFrame(param.flipAngle.data,
1420
+ columns=['flipAngle']).describe(percentiles=[min_percentile, max_percentile],
1421
+ include='all')
1422
+ # Summarizing number of averages
1423
+ df_na = pd.DataFrame(param.numberAverages.data,
1424
+ columns=['numberAverages']).describe(percentiles=[min_percentile, max_percentile],
1425
+ include='all')
1426
+ # Summarizing xy-spacing
1427
+ df_xy = pd.DataFrame(param.xyDim.data,
1428
+ columns=['xyDim'])
1429
+ # Summarizing z-spacing
1430
+ df_z = pd.DataFrame(param.zDim.data,
1431
+ columns=['zDim'])
1432
+
1433
+ # Plotting xy-spacing histogram
1434
+ ax = df_xy.hist(column='xyDim')
1435
+ min_quant, max_quant, average = df_xy.quantile(min_percentile), df_xy.quantile(max_percentile), param.xyDim.data.mean()
1436
+ for x in ax[0]:
1437
+ x.axvline(min_quant.xyDim, linestyle=':', color='r', label=f"Min Percentile: {float(min_quant):.3f}")
1438
+ x.axvline(max_quant.xyDim, linestyle=':', color='g', label=f"Max Percentile: {float(max_quant):.3f}")
1439
+ x.axvline(average, linestyle='solid', color='gold', label=f"Average: {float(average):.3f}")
1440
+ x.grid(False)
1441
+ plt.title(f"MR xy-spacing imaging summary for {wildcard}")
1442
+ plt.legend()
1443
+ plt.show()
1444
+
1445
+ # Plotting z-spacing histogram
1446
+ ax = df_z.hist(column='zDim')
1447
+ min_quant, max_quant, average = df_z.quantile(min_percentile), df_z.quantile(max_percentile), param.zDim.data.mean()
1448
+ for x in ax[0]:
1449
+ x.axvline(min_quant.zDim, linestyle=':', color='r', label=f"Min Percentile: {float(min_quant):.3f}")
1450
+ x.axvline(max_quant.zDim, linestyle=':', color='g', label=f"Max Percentile: {float(max_quant):.3f}")
1451
+ x.axvline(average, linestyle='solid', color='gold', label=f"Average: {float(average):.3f}")
1452
+ x.grid(False)
1453
+ plt.title(f"MR z-spacing imaging summary for {wildcard}")
1454
+ plt.legend()
1455
+ plt.show()
1456
+
1457
+ # Summarizing xy-spacing
1458
+ df_xy = df_xy.describe(percentiles=[min_percentile, max_percentile], include='all')
1459
+ # Summarizing z-spacing
1460
+ df_z = df_z.describe(percentiles=[min_percentile, max_percentile], include='all')
1461
+
1462
+ # Saving data
1463
+ name_save = wildcard.replace('*', '').replace('.npy', '')
1464
+ save_name = 'imagingSummary__' + name_save + ".json"
1465
+ df_all = [df_years, df_fs, df_rt, df_et, df_it, df_etl, df_fa, df_na, df_xy, df_z]
1466
+ df_all = df_all[0].join(df_all[1:])
1467
+ df_all.to_json(path_save_checks / save_name, orient='columns', indent=4)
1468
+
1469
+ def perform_ct_imaging_summary(self,
1470
+ wildcards_scans: List[str],
1471
+ path_data: Path = None,
1472
+ path_save_checks: Path = None,
1473
+ min_percentile: float = 0.05,
1474
+ max_percentile: float = 0.95
1475
+ ) -> None:
1476
+ """
1477
+ Summarizes CT imaging acquisition parameters. Plots summary histograms
1478
+ for different dimensions and saves all acquisition parameters locally in JSON files.
1479
+
1480
+ Args:
1481
+ wildcards_scans (List[str]): List of wildcards that determines the scans
1482
+ that will be analyzed (Only MRI scans will be analyzed). You can learn more about wildcards in
1483
+ `this link <https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html>`_.
1484
+ For example: ``[\"STS*.CTscan.npy\"]``.
1485
+ path_data (Path, optional): Path to the MEDscan objects, if not specified will use ``path_save`` from the
1486
+ inner-class ``Paths`` in the current instance.
1487
+ path_save_checks (Path, optional): Path where to save the checks, if not specified will use the one
1488
+ in the current instance.
1489
+ min_percentile (float, optional): Minimum percentile to use for the histograms. Defaults to 0.05.
1490
+ max_percentile (float, optional): Maximum percentile to use for the histograms. Defaults to 0.95.
1491
+
1492
+ Returns:
1493
+ None.
1494
+ """
1495
+
1496
+ class param:
1497
+ manufacturer = []
1498
+ dates = []
1499
+ kernel = []
1500
+
1501
+ class years:
1502
+ data = []
1503
+ class voltage:
1504
+ data = []
1505
+ class exposure:
1506
+ data = []
1507
+ class xyDim:
1508
+ data = []
1509
+ class zDim:
1510
+ data = []
1511
+
1512
+ if len(wildcards_scans) == 0:
1513
+ print('wildcards_scans is empty')
1514
+
1515
+ # wildcards checks:
1516
+ no_mr_scan = True
1517
+ for wildcard in wildcards_scans:
1518
+ if 'CTscan' in wildcard:
1519
+ no_mr_scan = False
1520
+ if no_mr_scan:
1521
+ raise ValueError(f"wildcards: {wildcards_scans} does not include CT scans. (Only CT scans are supported)")
1522
+
1523
+ # Initialization
1524
+ if path_data is None:
1525
+ if self.paths._path_save:
1526
+ path_data = Path(self.paths._path_save)
1527
+ else:
1528
+ print("No path to data was given and path save is None.")
1529
+ return 0
1530
+
1531
+ if not path_save_checks:
1532
+ if self.paths._path_save_checks:
1533
+ path_save_checks = Path(self.paths._path_save_checks)
1534
+ else:
1535
+ if (Path(os.getcwd()) / "checks").exists():
1536
+ path_save_checks = Path(os.getcwd()) / "checks"
1537
+ else:
1538
+ path_save_checks = (Path(os.getcwd()) / "checks").mkdir()
1539
+
1540
+ # Looping through all the different wildcards
1541
+ for i in tqdm(range(len(wildcards_scans))):
1542
+ wildcard = wildcards_scans[i]
1543
+ file_paths = get_file_paths(path_data, wildcard)
1544
+ n_files = len(file_paths)
1545
+ param.dates = np.zeros(n_files)
1546
+ param.years.data = np.zeros(n_files)
1547
+ param.years.data = np.multiply(param.years.data, np.NaN)
1548
+ param.manufacturer = [None] * n_files
1549
+ param.voltage.data = np.zeros(n_files)
1550
+ param.voltage.data = np.multiply(param.voltage.data, np.NaN)
1551
+ param.exposure.data = np.zeros(n_files)
1552
+ param.exposure.data = np.multiply(param.exposure.data, np.NaN)
1553
+ param.kernel = [None] * n_files
1554
+ param.xyDim.data = np.zeros(n_files)
1555
+ param.xyDim.data = np.multiply(param.xyDim.data, np.NaN)
1556
+ param.zDim.data = np.zeros(n_files)
1557
+ param.zDim.data = np.multiply(param.zDim.data, np.NaN)
1558
+
1559
+ # Loading and recording data
1560
+ for f in tqdm(range(n_files)):
1561
+ file = file_paths[f]
1562
+
1563
+ # Open file for warning
1564
+ try:
1565
+ warn_file = open(path_save_checks / 'imaging_summary_ct_warnings.txt', 'a')
1566
+ except IOError:
1567
+ print("Could not open file")
1568
+
1569
+ # Loading Data
1570
+ try:
1571
+ with open(path_data / file, 'rb') as fe: medscan = pickle.load(fe)
1572
+ print(f'Currently working on: {file}', file=warn_file)
1573
+
1574
+ # DICOM header
1575
+ info = medscan.dicomH[1]
1576
+
1577
+ # Recording dates
1578
+ try:
1579
+ param.dates[f] = info.AcquisitionDate
1580
+ except AttributeError:
1581
+ param.dates[f] = info.StudyDate
1582
+ # Recording years
1583
+ try:
1584
+ years = str(param.dates[f]) # Only the first four characters represent the years
1585
+ param.years.data[f] = years[0:4]
1586
+ except Exception as e:
1587
+ print(f'Cannot read dates of : {file}. Error: {e}', file=warn_file)
1588
+ # Recording manufacturers
1589
+ try:
1590
+ param.manufacturer[f] = info.Manufacturer
1591
+ except Exception as e:
1592
+ print(f'Cannot read Manufacturer of: {file}. Error: {e}', file=warn_file)
1593
+ # Recording voltage
1594
+ try:
1595
+ param.voltage.data[f] = info.KVP
1596
+ except Exception as e:
1597
+ print(f'Cannot read voltage of: {file}. Error: {e}', file=warn_file)
1598
+ # Recording exposure
1599
+ try:
1600
+ param.exposure.data[f] = info.Exposure
1601
+ except Exception as e:
1602
+ print(f'Cannot read exposure of: {file}. Error: {e}', file=warn_file)
1603
+ # Recording reconstruction kernel
1604
+ try:
1605
+ param.kernel[f] = info.ConvolutionKernel
1606
+ except Exception as e:
1607
+ print(f'Cannot read Kernel of: {file}. Error: {e}', file=warn_file)
1608
+ # Recording xy spacing
1609
+ try:
1610
+ param.xyDim.data[f] = medscan.data.volume.spatialRef.PixelExtentInWorldX
1611
+ except Exception as e:
1612
+ print(f'Cannot read x spacing of: {file}. Error: {e}', file=warn_file)
1613
+ # Recording z spacing
1614
+ try:
1615
+ param.zDim.data[f] = medscan.data.volume.spatialRef.PixelExtentInWorldZ
1616
+ except Exception as e:
1617
+ print(f'Cannot read z spacing of: {file}. Error: {e}', file=warn_file)
1618
+ except Exception as e:
1619
+ print(f'Cannot load file: {file}', file=warn_file)
1620
+
1621
+ warn_file.close()
1622
+
1623
+ # Summarize data
1624
+ # Summarizing years
1625
+ df_years = pd.DataFrame(param.years.data, columns=['years']).describe(percentiles=[min_percentile, max_percentile], include='all')
1626
+ # Summarizing voltage
1627
+ df_voltage = pd.DataFrame(param.voltage.data, columns=['voltage']).describe(percentiles=[min_percentile, max_percentile], include='all')
1628
+ # Summarizing exposure
1629
+ df_exposure = pd.DataFrame(param.exposure.data, columns=['exposure']).describe(percentiles=[min_percentile, max_percentile], include='all')
1630
+ # Summarizing kernel
1631
+ df_kernel = pd.DataFrame(param.kernel, columns=['kernel']).describe(percentiles=[min_percentile, max_percentile], include='all')
1632
+ # Summarize xy spacing
1633
+ df_xy = pd.DataFrame(param.xyDim.data, columns=['xyDim']).describe(percentiles=[min_percentile, max_percentile], include='all')
1634
+ # Summarize z spacing
1635
+ df_z = pd.DataFrame(param.zDim.data, columns=['zDim']).describe(percentiles=[min_percentile, max_percentile], include='all')
1636
+ # Summarizing xy-spacing
1637
+ df_xy = pd.DataFrame(param.xyDim.data, columns=['xyDim'])
1638
+ # Summarizing z-spacing
1639
+ df_z = pd.DataFrame(param.zDim.data, columns=['zDim'])
1640
+
1641
+ # Plotting xy-spacing histogram
1642
+ ax = df_xy.hist(column='xyDim')
1643
+ min_quant, max_quant, average = df_xy.quantile(min_percentile), df_xy.quantile(max_percentile), param.xyDim.data.mean()
1644
+ for x in ax[0]:
1645
+ x.axvline(min_quant.xyDim, linestyle=':', color='r', label=f"Min Percentile: {float(min_quant):.3f}")
1646
+ x.axvline(max_quant.xyDim, linestyle=':', color='g', label=f"Max Percentile: {float(max_quant):.3f}")
1647
+ x.axvline(average, linestyle='solid', color='gold', label=f"Average: {float(average):.3f}")
1648
+ x.grid(False)
1649
+ plt.title(f"CT xy-spacing imaging summary for {wildcard}")
1650
+ plt.legend()
1651
+ plt.show()
1652
+
1653
+ # Plotting z-spacing histogram
1654
+ ax = df_z.hist(column='zDim')
1655
+ min_quant, max_quant, average = df_z.quantile(min_percentile), df_z.quantile(max_percentile), param.zDim.data.mean()
1656
+ for x in ax[0]:
1657
+ x.axvline(min_quant.zDim, linestyle=':', color='r', label=f"Min Percentile: {float(min_quant):.3f}")
1658
+ x.axvline(max_quant.zDim, linestyle=':', color='g', label=f"Max Percentile: {float(max_quant):.3f}")
1659
+ x.axvline(average, linestyle='solid', color='gold', label=f"Average: {float(average):.3f}")
1660
+ x.grid(False)
1661
+ plt.title(f"CT z-spacing imaging summary for {wildcard}")
1662
+ plt.legend()
1663
+ plt.show()
1664
+
1665
+ # Summarizing xy-spacing
1666
+ df_xy = df_xy.describe(percentiles=[min_percentile, max_percentile], include='all')
1667
+ # Summarizing z-spacing
1668
+ df_z = df_z.describe(percentiles=[min_percentile, max_percentile], include='all')
1669
+
1670
+ # Saving data
1671
+ name_save = wildcard.replace('*', '').replace('.npy', '')
1672
+ save_name = 'imagingSummary__' + name_save + ".json"
1673
+ df_all = [df_years, df_voltage, df_exposure, df_kernel, df_xy, df_z]
1674
+ df_all = df_all[0].join(df_all[1:])
1675
+ df_all.to_json(path_save_checks / save_name, orient='columns', indent=4)
1676
+
1677
+ def perform_imaging_summary(self,
1678
+ wildcards_scans: List[str],
1679
+ path_data: Path = None,
1680
+ path_save_checks: Path = None,
1681
+ min_percentile: float = 0.05,
1682
+ max_percentile: float = 0.95
1683
+ ) -> None:
1684
+ """
1685
+ Summarizes CT and MR imaging acquisition parameters. Plots summary histograms
1686
+ for different dimensions and saves all acquisition parameters locally in JSON files.
1687
+
1688
+ Args:
1689
+ wildcards_scans (List[str]): List of wildcards that determines the scans
1690
+ that will be analyzed (CT and MRI scans will be analyzed). You can learn more about wildcards in
1691
+ `this link <https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html>`_.
1692
+ For example: ``[\"STS*.CTscan.npy\", \"STS*.MRscan.npy\"]``.
1693
+ path_data (Path, optional): Path to the MEDscan objects, if not specified will use ``path_save`` from the
1694
+ inner-class ``Paths`` in the current instance.
1695
+ path_save_checks (Path, optional): Path where to save the checks, if not specified will use the one
1696
+ in the current instance.
1697
+ min_percentile (float, optional): Minimum percentile to use for the histograms. Defaults to 0.05.
1698
+ max_percentile (float, optional): Maximum percentile to use for the histograms. Defaults to 0.95.
1699
+
1700
+ Returns:
1701
+ None.
1702
+ """
1703
+ # MR imaging summary
1704
+ wildcards_scans_mr = [wildcard for wildcard in wildcards_scans if 'MRscan' in wildcard]
1705
+ if len(wildcards_scans_mr) == 0:
1706
+ print("Cannot perform imaging summary for MR, no MR scan wildcard was given! ")
1707
+ else:
1708
+ self.perform_mr_imaging_summary(
1709
+ wildcards_scans_mr,
1710
+ path_data,
1711
+ path_save_checks,
1712
+ min_percentile,
1713
+ max_percentile)
1714
+ # CT imaging summary
1715
+ wildcards_scans_ct = [wildcard for wildcard in wildcards_scans if 'CTscan' in wildcard]
1716
+ if len(wildcards_scans_ct) == 0:
1717
+ print("Cannot perform imaging summary for CT, no CT scan wildcard was given! ")
1718
+ else:
1719
+ self.perform_ct_imaging_summary(
1720
+ wildcards_scans_ct,
1721
+ path_data,
1722
+ path_save_checks,
1723
+ min_percentile,
1724
+ max_percentile)