mediml 0.9.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- MEDiml/MEDscan.py +1696 -0
- MEDiml/__init__.py +21 -0
- MEDiml/biomarkers/BatchExtractor.py +806 -0
- MEDiml/biomarkers/BatchExtractorTexturalFilters.py +840 -0
- MEDiml/biomarkers/__init__.py +16 -0
- MEDiml/biomarkers/diagnostics.py +125 -0
- MEDiml/biomarkers/get_oriented_bound_box.py +158 -0
- MEDiml/biomarkers/glcm.py +1602 -0
- MEDiml/biomarkers/gldzm.py +523 -0
- MEDiml/biomarkers/glrlm.py +1315 -0
- MEDiml/biomarkers/glszm.py +555 -0
- MEDiml/biomarkers/int_vol_hist.py +527 -0
- MEDiml/biomarkers/intensity_histogram.py +615 -0
- MEDiml/biomarkers/local_intensity.py +89 -0
- MEDiml/biomarkers/morph.py +1756 -0
- MEDiml/biomarkers/ngldm.py +780 -0
- MEDiml/biomarkers/ngtdm.py +414 -0
- MEDiml/biomarkers/stats.py +373 -0
- MEDiml/biomarkers/utils.py +389 -0
- MEDiml/filters/TexturalFilter.py +299 -0
- MEDiml/filters/__init__.py +9 -0
- MEDiml/filters/apply_filter.py +134 -0
- MEDiml/filters/gabor.py +215 -0
- MEDiml/filters/laws.py +283 -0
- MEDiml/filters/log.py +147 -0
- MEDiml/filters/mean.py +121 -0
- MEDiml/filters/textural_filters_kernels.py +1738 -0
- MEDiml/filters/utils.py +107 -0
- MEDiml/filters/wavelet.py +237 -0
- MEDiml/learning/DataCleaner.py +198 -0
- MEDiml/learning/DesignExperiment.py +480 -0
- MEDiml/learning/FSR.py +667 -0
- MEDiml/learning/Normalization.py +112 -0
- MEDiml/learning/RadiomicsLearner.py +714 -0
- MEDiml/learning/Results.py +2237 -0
- MEDiml/learning/Stats.py +694 -0
- MEDiml/learning/__init__.py +10 -0
- MEDiml/learning/cleaning_utils.py +107 -0
- MEDiml/learning/ml_utils.py +1015 -0
- MEDiml/processing/__init__.py +6 -0
- MEDiml/processing/compute_suv_map.py +121 -0
- MEDiml/processing/discretisation.py +149 -0
- MEDiml/processing/interpolation.py +275 -0
- MEDiml/processing/resegmentation.py +66 -0
- MEDiml/processing/segmentation.py +912 -0
- MEDiml/utils/__init__.py +25 -0
- MEDiml/utils/batch_patients.py +45 -0
- MEDiml/utils/create_radiomics_table.py +131 -0
- MEDiml/utils/data_frame_export.py +42 -0
- MEDiml/utils/find_process_names.py +16 -0
- MEDiml/utils/get_file_paths.py +34 -0
- MEDiml/utils/get_full_rad_names.py +21 -0
- MEDiml/utils/get_institutions_from_ids.py +16 -0
- MEDiml/utils/get_patient_id_from_scan_name.py +22 -0
- MEDiml/utils/get_patient_names.py +26 -0
- MEDiml/utils/get_radiomic_names.py +27 -0
- MEDiml/utils/get_scan_name_from_rad_name.py +22 -0
- MEDiml/utils/image_reader_SITK.py +37 -0
- MEDiml/utils/image_volume_obj.py +22 -0
- MEDiml/utils/imref.py +340 -0
- MEDiml/utils/initialize_features_names.py +62 -0
- MEDiml/utils/inpolygon.py +159 -0
- MEDiml/utils/interp3.py +43 -0
- MEDiml/utils/json_utils.py +78 -0
- MEDiml/utils/mode.py +31 -0
- MEDiml/utils/parse_contour_string.py +58 -0
- MEDiml/utils/save_MEDscan.py +30 -0
- MEDiml/utils/strfind.py +32 -0
- MEDiml/utils/textureTools.py +188 -0
- MEDiml/utils/texture_features_names.py +115 -0
- MEDiml/utils/write_radiomics_csv.py +47 -0
- MEDiml/wrangling/DataManager.py +1724 -0
- MEDiml/wrangling/ProcessDICOM.py +512 -0
- MEDiml/wrangling/__init__.py +3 -0
- mediml-0.9.9.dist-info/LICENSE.md +674 -0
- mediml-0.9.9.dist-info/METADATA +232 -0
- mediml-0.9.9.dist-info/RECORD +78 -0
- mediml-0.9.9.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,1724 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import pickle
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from time import time
|
|
9
|
+
from typing import List, Union
|
|
10
|
+
|
|
11
|
+
import matplotlib.pyplot as plt
|
|
12
|
+
import nibabel as nib
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
import pydicom
|
|
16
|
+
import pydicom.errors
|
|
17
|
+
import pydicom.misc
|
|
18
|
+
import ray
|
|
19
|
+
from nilearn import image
|
|
20
|
+
from numpyencoder import NumpyEncoder
|
|
21
|
+
from tqdm import tqdm, trange
|
|
22
|
+
|
|
23
|
+
from ..MEDscan import MEDscan
|
|
24
|
+
from ..processing.compute_suv_map import compute_suv_map
|
|
25
|
+
from ..processing.segmentation import get_roi_from_indexes
|
|
26
|
+
from ..utils.get_file_paths import get_file_paths
|
|
27
|
+
from ..utils.get_patient_names import get_patient_names
|
|
28
|
+
from ..utils.imref import imref3d
|
|
29
|
+
from ..utils.json_utils import load_json, save_json
|
|
30
|
+
from ..utils.save_MEDscan import save_MEDscan
|
|
31
|
+
from .ProcessDICOM import ProcessDICOM
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class DataManager(object):
|
|
35
|
+
"""Reads all the raw data (DICOM, NIfTI) content and organizes it in instances of the MEDscan class."""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class DICOM(object):
|
|
40
|
+
"""DICOM data management class that will organize data during the conversion to MEDscan class process"""
|
|
41
|
+
stack_series_rs: List
|
|
42
|
+
stack_path_rs: List
|
|
43
|
+
stack_frame_rs: List
|
|
44
|
+
cell_series_id: List
|
|
45
|
+
cell_path_rs: List
|
|
46
|
+
cell_path_images: List
|
|
47
|
+
cell_frame_rs: List
|
|
48
|
+
cell_frame_id: List
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class NIfTI(object):
|
|
53
|
+
"""NIfTI data management class that will organize data during the conversion to MEDscan class process"""
|
|
54
|
+
stack_path_images: List
|
|
55
|
+
stack_path_roi: List
|
|
56
|
+
stack_path_all: List
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class Paths(object):
|
|
61
|
+
"""Paths management class that will organize the paths used in the processing"""
|
|
62
|
+
_path_to_dicoms: List
|
|
63
|
+
_path_to_niftis: List
|
|
64
|
+
_path_csv: Union[Path, str]
|
|
65
|
+
_path_save: Union[Path, str]
|
|
66
|
+
_path_save_checks: Union[Path, str]
|
|
67
|
+
_path_pre_checks_settings: Union[Path, str]
|
|
68
|
+
|
|
69
|
+
def __init__(
|
|
70
|
+
self,
|
|
71
|
+
path_to_dicoms: List = [],
|
|
72
|
+
path_to_niftis: List = [],
|
|
73
|
+
path_csv: Union[Path, str] = None,
|
|
74
|
+
path_save: Union[Path, str] = None,
|
|
75
|
+
path_save_checks: Union[Path, str] = None,
|
|
76
|
+
path_pre_checks_settings: Union[Path, str] = None,
|
|
77
|
+
save: bool = True,
|
|
78
|
+
n_batch: int = 2
|
|
79
|
+
) -> None:
|
|
80
|
+
"""Constructor of the class DataManager.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
path_to_dicoms (Union[Path, str], optional): Full path to the starting directory
|
|
84
|
+
where the DICOM data is located.
|
|
85
|
+
path_to_niftis (Union[Path, str], optional): Full path to the starting directory
|
|
86
|
+
where the NIfTI is located.
|
|
87
|
+
path_csv (Union[Path, str], optional): Full path to the CSV file containing the scans info list.
|
|
88
|
+
path_save (Union[Path, str], optional): Full path to the directory where to save all the MEDscan classes.
|
|
89
|
+
path_save_checks(Union[Path, str], optional): Full path to the directory where to save all
|
|
90
|
+
the pre-radiomics checks analysis results.
|
|
91
|
+
path_pre_checks_settings(Union[Path, str], optional): Full path to the JSON file of the pre-checks analysis
|
|
92
|
+
parameters.
|
|
93
|
+
save (bool, optional): True to save the MEDscan classes in `path_save`.
|
|
94
|
+
n_batch (int, optional): Numerical value specifying the number of batch to use in the
|
|
95
|
+
parallel computations (use 0 for serial computation).
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
None
|
|
99
|
+
"""
|
|
100
|
+
# Convert all paths to Pathlib.Path
|
|
101
|
+
if path_to_dicoms:
|
|
102
|
+
path_to_dicoms = Path(path_to_dicoms)
|
|
103
|
+
if path_to_niftis:
|
|
104
|
+
path_to_niftis = Path(path_to_niftis)
|
|
105
|
+
if path_csv:
|
|
106
|
+
path_csv = Path(path_csv)
|
|
107
|
+
if path_save:
|
|
108
|
+
path_save = Path(path_save)
|
|
109
|
+
if path_save_checks:
|
|
110
|
+
path_save_checks = Path(path_save_checks)
|
|
111
|
+
if path_pre_checks_settings:
|
|
112
|
+
path_pre_checks_settings = Path(path_pre_checks_settings)
|
|
113
|
+
|
|
114
|
+
self.paths = self.Paths(
|
|
115
|
+
path_to_dicoms,
|
|
116
|
+
path_to_niftis,
|
|
117
|
+
path_csv,
|
|
118
|
+
path_save,
|
|
119
|
+
path_save_checks,
|
|
120
|
+
path_pre_checks_settings,
|
|
121
|
+
)
|
|
122
|
+
self.save = save
|
|
123
|
+
self.n_batch = n_batch
|
|
124
|
+
self.__dicom = self.DICOM(
|
|
125
|
+
stack_series_rs=[],
|
|
126
|
+
stack_path_rs=[],
|
|
127
|
+
stack_frame_rs=[],
|
|
128
|
+
cell_series_id=[],
|
|
129
|
+
cell_path_rs=[],
|
|
130
|
+
cell_path_images=[],
|
|
131
|
+
cell_frame_rs=[],
|
|
132
|
+
cell_frame_id=[]
|
|
133
|
+
)
|
|
134
|
+
self.__nifti = self.NIfTI(
|
|
135
|
+
stack_path_images=[],
|
|
136
|
+
stack_path_roi=[],
|
|
137
|
+
stack_path_all=[]
|
|
138
|
+
)
|
|
139
|
+
self.path_to_objects = []
|
|
140
|
+
self.summary = {}
|
|
141
|
+
self.csv_data = None
|
|
142
|
+
self.__studies = []
|
|
143
|
+
self.__institutions = []
|
|
144
|
+
self.__scans = []
|
|
145
|
+
|
|
146
|
+
def __find_uid_cell_index(self, uid: Union[str, List[str]], cell: List[str]) -> List:
|
|
147
|
+
"""Finds the cell with the same `uid`. If not is present in `cell`, creates a new position
|
|
148
|
+
in the `cell` for the new `uid`.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
uid (Union[str, List[str]]): Unique identifier of the Series to find.
|
|
152
|
+
cell (List[str]): List of Unique identifiers of the Series.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Union[List[str], str]: List or string of the uid
|
|
156
|
+
"""
|
|
157
|
+
return [len(cell)] if uid not in cell else[i for i, e in enumerate(cell) if e == uid]
|
|
158
|
+
|
|
159
|
+
def __get_list_of_files(self, dir_name: str) -> List:
|
|
160
|
+
"""Gets all files in the given directory
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
dir_name (str): directory name
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
List: List of all files in the directory
|
|
167
|
+
"""
|
|
168
|
+
list_of_file = os.listdir(dir_name)
|
|
169
|
+
all_files = list()
|
|
170
|
+
for entry in list_of_file:
|
|
171
|
+
full_path = os.path.join(dir_name, entry)
|
|
172
|
+
if os.path.isdir(full_path):
|
|
173
|
+
all_files = all_files + self.__get_list_of_files(full_path)
|
|
174
|
+
else:
|
|
175
|
+
all_files.append(full_path)
|
|
176
|
+
|
|
177
|
+
return all_files
|
|
178
|
+
|
|
179
|
+
def __get_MEDscan_name_save(self, medscan: MEDscan) -> str:
|
|
180
|
+
"""Returns the name that will be used to save the MEDscan instance, based on the values of the attributes.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
medscan(MEDscan): A MEDscan class instance.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
str: String of the name save.
|
|
187
|
+
"""
|
|
188
|
+
series_description = medscan.series_description.translate({ord(ch): '-' for ch in '/\\ ()&:*'})
|
|
189
|
+
name_id = medscan.patientID.translate({ord(ch): '-' for ch in '/\\ ()&:*'})
|
|
190
|
+
# final saving name
|
|
191
|
+
name_complete = name_id + '__' + series_description + '.' + medscan.type + '.npy'
|
|
192
|
+
return name_complete
|
|
193
|
+
|
|
194
|
+
def __associate_rt_stuct(self) -> None:
|
|
195
|
+
"""Associates the imaging volumes to their mask using UIDs
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
None
|
|
199
|
+
"""
|
|
200
|
+
print('--> Associating all RT objects to imaging volumes')
|
|
201
|
+
n_rs = len(self.__dicom.stack_path_rs)
|
|
202
|
+
self.__dicom.stack_series_rs = list(dict.fromkeys(self.__dicom.stack_series_rs))
|
|
203
|
+
if n_rs:
|
|
204
|
+
for i in trange(0, n_rs):
|
|
205
|
+
try:
|
|
206
|
+
# PUT ALL THE DICOM PATHS WITH THE SAME UID IN THE SAME PATH LIST
|
|
207
|
+
ind_series_id = self.__find_uid_cell_index(
|
|
208
|
+
self.__dicom.stack_series_rs[i],
|
|
209
|
+
self.__dicom.cell_series_id)
|
|
210
|
+
for n in range(len(ind_series_id)):
|
|
211
|
+
if ind_series_id[n] < len(self.__dicom.cell_path_rs):
|
|
212
|
+
self.__dicom.cell_path_rs[ind_series_id[n]] += [self.__dicom.stack_path_rs[i]]
|
|
213
|
+
except:
|
|
214
|
+
ind_series_id = self.__find_uid_cell_index(
|
|
215
|
+
self.__dicom.stack_frame_rs[i],
|
|
216
|
+
self.__dicom.cell_frame_id)
|
|
217
|
+
for n in range(len(ind_series_id)):
|
|
218
|
+
if ind_series_id[n] < len(self.__dicom.cell_path_rs):
|
|
219
|
+
self.__dicom.cell_path_rs[ind_series_id[n]] += [self.__dicom.stack_path_rs[i]]
|
|
220
|
+
print('DONE')
|
|
221
|
+
|
|
222
|
+
def __read_all_dicoms(self) -> None:
|
|
223
|
+
"""Reads all the dicom files in the all the paths of the attribute `_path_to_dicoms`
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
None
|
|
227
|
+
"""
|
|
228
|
+
# SCANNING ALL FOLDERS IN INITIAL DIRECTORY
|
|
229
|
+
print('\n--> Scanning all folders in initial directory...', end='')
|
|
230
|
+
p = Path(self.paths._path_to_dicoms)
|
|
231
|
+
e_rglob = '*.dcm'
|
|
232
|
+
|
|
233
|
+
# EXTRACT ALL FILES IN THE PATH TO DICOMS
|
|
234
|
+
if self.paths._path_to_dicoms.is_dir():
|
|
235
|
+
stack_folder_temp = list(p.rglob(e_rglob))
|
|
236
|
+
stack_folder = [x for x in stack_folder_temp if not x.is_dir()]
|
|
237
|
+
elif str(self.paths._path_to_dicoms).find('json') != -1:
|
|
238
|
+
with open(self.paths._path_to_dicoms) as f:
|
|
239
|
+
data = json.load(f)
|
|
240
|
+
for value in data.values():
|
|
241
|
+
stack_folder_temp = value
|
|
242
|
+
directory_name = str(stack_folder_temp).replace("'", '').replace('[', '').replace(']', '')
|
|
243
|
+
stack_folder = self.__get_list_of_files(directory_name)
|
|
244
|
+
else:
|
|
245
|
+
raise ValueError("The given dicom folder path either doesn't exist or not a folder.")
|
|
246
|
+
# READ ALL DICOM FILES AND UPDATE ATTRIBUTES FOR FURTHER PROCESSING
|
|
247
|
+
for file in tqdm(stack_folder):
|
|
248
|
+
if pydicom.misc.is_dicom(file):
|
|
249
|
+
try:
|
|
250
|
+
info = pydicom.dcmread(str(file))
|
|
251
|
+
if info.Modality in ['MR', 'PT', 'CT']:
|
|
252
|
+
ind_series_id = self.__find_uid_cell_index(
|
|
253
|
+
info.SeriesInstanceUID,
|
|
254
|
+
self.__dicom.cell_series_id)[0]
|
|
255
|
+
if ind_series_id == len(self.__dicom.cell_series_id): # New volume
|
|
256
|
+
self.__dicom.cell_series_id = self.__dicom.cell_series_id + [info.SeriesInstanceUID]
|
|
257
|
+
self.__dicom.cell_frame_id += [info.FrameOfReferenceUID]
|
|
258
|
+
self.__dicom.cell_path_images += [[]]
|
|
259
|
+
self.__dicom.cell_path_rs = self.__dicom.cell_path_rs + [[]]
|
|
260
|
+
self.__dicom.cell_path_images[ind_series_id] += [file]
|
|
261
|
+
elif info.Modality == 'RTSTRUCT':
|
|
262
|
+
self.__dicom.stack_path_rs += [file]
|
|
263
|
+
try:
|
|
264
|
+
series_uid = info.ReferencedFrameOfReferenceSequence[
|
|
265
|
+
0].RTReferencedStudySequence[
|
|
266
|
+
0].RTReferencedSeriesSequence[
|
|
267
|
+
0].SeriesInstanceUID
|
|
268
|
+
except:
|
|
269
|
+
series_uid = 'NotFound'
|
|
270
|
+
self.__dicom.stack_series_rs += [series_uid]
|
|
271
|
+
try:
|
|
272
|
+
frame_uid = info.ReferencedFrameOfReferenceSequence[0].FrameOfReferenceUID
|
|
273
|
+
except:
|
|
274
|
+
frame_uid = info.FrameOfReferenceUID
|
|
275
|
+
self.__dicom.stack_frame_rs += [frame_uid]
|
|
276
|
+
else:
|
|
277
|
+
print("Modality not supported: ", info.Modality)
|
|
278
|
+
|
|
279
|
+
except Exception as e:
|
|
280
|
+
print(f'Error while reading: {file}, error: {e}\n')
|
|
281
|
+
continue
|
|
282
|
+
print('DONE')
|
|
283
|
+
|
|
284
|
+
# ASSOCIATE ALL VOLUMES TO THEIR MASK
|
|
285
|
+
self.__associate_rt_stuct()
|
|
286
|
+
|
|
287
|
+
def process_all_dicoms(self) -> Union[List[MEDscan], None]:
|
|
288
|
+
"""This function reads the DICOM content of all the sub-folder tree of a starting directory defined by
|
|
289
|
+
`path_to_dicoms`. It then organizes the data (files throughout the starting directory are associated by
|
|
290
|
+
'SeriesInstanceUID') in the MEDscan class including the region of interest (ROI) defined by an
|
|
291
|
+
associated RTstruct. All MEDscan classes hereby created are saved in `path_save` with a name
|
|
292
|
+
varying with every scan.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
List[MEDscan]: List of MEDscan instances.
|
|
296
|
+
"""
|
|
297
|
+
ray.init(local_mode=True, include_dashboard=True)
|
|
298
|
+
|
|
299
|
+
print('--> Reading all DICOM objects to create MEDscan classes')
|
|
300
|
+
self.__read_all_dicoms()
|
|
301
|
+
|
|
302
|
+
print('--> Processing DICOMs and creating MEDscan objects')
|
|
303
|
+
n_scans = len(self.__dicom.cell_path_images)
|
|
304
|
+
if self.n_batch is None:
|
|
305
|
+
n_batch = 1
|
|
306
|
+
elif n_scans < self.n_batch:
|
|
307
|
+
n_batch = n_scans
|
|
308
|
+
else:
|
|
309
|
+
n_batch = self.n_batch
|
|
310
|
+
|
|
311
|
+
# Distribute the first tasks to all workers
|
|
312
|
+
pds = [ProcessDICOM(
|
|
313
|
+
self.__dicom.cell_path_images[i],
|
|
314
|
+
self.__dicom.cell_path_rs[i],
|
|
315
|
+
self.paths._path_save,
|
|
316
|
+
self.save)
|
|
317
|
+
for i in range(n_batch)]
|
|
318
|
+
|
|
319
|
+
ids = [pd.process_files() for pd in pds]
|
|
320
|
+
|
|
321
|
+
# Update the path to the created instances
|
|
322
|
+
for name_save in ray.get(ids):
|
|
323
|
+
if self.paths._path_save:
|
|
324
|
+
self.path_to_objects.append(str(self.paths._path_save / name_save))
|
|
325
|
+
# Update processing summary
|
|
326
|
+
if name_save.split('_')[0].count('-') >= 2:
|
|
327
|
+
scan_type = name_save[name_save.find('__')+2 : name_save.find('.')]
|
|
328
|
+
if name_save.split('-')[0] not in self.__studies:
|
|
329
|
+
self.__studies.append(name_save.split('-')[0]) # add new study
|
|
330
|
+
if name_save.split('-')[1] not in self.__institutions:
|
|
331
|
+
self.__institutions.append(name_save.split('-')[1]) # add new study
|
|
332
|
+
if name_save.split('-')[0] not in self.summary:
|
|
333
|
+
self.summary[name_save.split('-')[0]] = {}
|
|
334
|
+
if name_save.split('-')[1] not in self.summary[name_save.split('-')[0]]:
|
|
335
|
+
self.summary[name_save.split('-')[0]][name_save.split('-')[1]] = {} # add new institution
|
|
336
|
+
if scan_type not in self.__scans:
|
|
337
|
+
self.__scans.append(scan_type)
|
|
338
|
+
if scan_type not in self.summary[name_save.split('-')[0]][name_save.split('-')[1]]:
|
|
339
|
+
self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type] = []
|
|
340
|
+
if name_save not in self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type]:
|
|
341
|
+
self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type].append(name_save)
|
|
342
|
+
else:
|
|
343
|
+
if self.save:
|
|
344
|
+
logging.warning(f"The patient ID of the following file: {name_save} does not respect the MEDiml "\
|
|
345
|
+
"naming convention 'study-institution-id' (Ex: Glioma-TCGA-001)")
|
|
346
|
+
|
|
347
|
+
nb_job_left = n_scans - n_batch
|
|
348
|
+
|
|
349
|
+
# Distribute the remaining tasks
|
|
350
|
+
for _ in trange(n_scans):
|
|
351
|
+
_, ids = ray.wait(ids, num_returns=1)
|
|
352
|
+
if nb_job_left > 0:
|
|
353
|
+
idx = n_scans - nb_job_left
|
|
354
|
+
pd = ProcessDICOM(
|
|
355
|
+
self.__dicom.cell_path_images[idx],
|
|
356
|
+
self.__dicom.cell_path_rs[idx],
|
|
357
|
+
self.paths._path_save,
|
|
358
|
+
self.save)
|
|
359
|
+
ids.extend([pd.process_files()])
|
|
360
|
+
nb_job_left -= 1
|
|
361
|
+
|
|
362
|
+
# Update the path to the created instances
|
|
363
|
+
for name_save in ray.get(ids):
|
|
364
|
+
if self.paths._path_save:
|
|
365
|
+
self.path_to_objects.extend(str(self.paths._path_save / name_save))
|
|
366
|
+
# Update processing summary
|
|
367
|
+
if name_save.split('_')[0].count('-') >= 2:
|
|
368
|
+
scan_type = name_save[name_save.find('__')+2 : name_save.find('.')]
|
|
369
|
+
if name_save.split('-')[0] not in self.__studies:
|
|
370
|
+
self.__studies.append(name_save.split('-')[0]) # add new study
|
|
371
|
+
if name_save.split('-')[1] not in self.__institutions:
|
|
372
|
+
self.__institutions.append(name_save.split('-')[1]) # add new study
|
|
373
|
+
if name_save.split('-')[0] not in self.summary:
|
|
374
|
+
self.summary[name_save.split('-')[0]] = {}
|
|
375
|
+
if name_save.split('-')[1] not in self.summary[name_save.split('-')[0]]:
|
|
376
|
+
self.summary[name_save.split('-')[0]][name_save.split('-')[1]] = {} # add new institution
|
|
377
|
+
if scan_type not in self.__scans:
|
|
378
|
+
self.__scans.append(scan_type)
|
|
379
|
+
if scan_type not in self.summary[name_save.split('-')[0]][name_save.split('-')[1]]:
|
|
380
|
+
self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type] = []
|
|
381
|
+
if name_save not in self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type]:
|
|
382
|
+
self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type].append(name_save)
|
|
383
|
+
else:
|
|
384
|
+
if self.save:
|
|
385
|
+
logging.warning(f"The patient ID of the following file: {name_save} does not respect the MEDiml "\
|
|
386
|
+
"naming convention 'study-institution-id' (Ex: Glioma-TCGA-001)")
|
|
387
|
+
print('DONE')
|
|
388
|
+
|
|
389
|
+
def __read_all_niftis(self) -> None:
|
|
390
|
+
"""Reads all files in the initial path and organizes other path to images and roi
|
|
391
|
+
in the class attributes.
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
None.
|
|
395
|
+
"""
|
|
396
|
+
print('\n--> Scanning all folders in initial directory')
|
|
397
|
+
if not self.paths._path_to_niftis:
|
|
398
|
+
raise ValueError("The path to the niftis is not defined")
|
|
399
|
+
p = Path(self.paths._path_to_niftis)
|
|
400
|
+
e_rglob1 = '*.nii'
|
|
401
|
+
e_rglob2 = '*.nii.gz'
|
|
402
|
+
|
|
403
|
+
# EXTRACT ALL FILES IN THE PATH TO DICOMS
|
|
404
|
+
if p.is_dir():
|
|
405
|
+
self.__nifti.stack_path_all = list(p.rglob(e_rglob1))
|
|
406
|
+
self.__nifti.stack_path_all.extend(list(p.rglob(e_rglob2)))
|
|
407
|
+
else:
|
|
408
|
+
raise TypeError(f"{p} must be a path to a directory")
|
|
409
|
+
|
|
410
|
+
all_niftis = list(self.__nifti.stack_path_all)
|
|
411
|
+
for i in trange(0, len(all_niftis)):
|
|
412
|
+
if 'ROI' in all_niftis[i].name.split("."):
|
|
413
|
+
self.__nifti.stack_path_roi.append(all_niftis[i])
|
|
414
|
+
else:
|
|
415
|
+
self.__nifti.stack_path_images.append(all_niftis[i])
|
|
416
|
+
print('DONE')
|
|
417
|
+
|
|
418
|
+
def __associate_roi_to_image(
|
|
419
|
+
self,
|
|
420
|
+
image_file: Union[Path, str],
|
|
421
|
+
medscan: MEDscan,
|
|
422
|
+
nifti: nib.Nifti1Image,
|
|
423
|
+
path_roi_data: Path = None
|
|
424
|
+
) -> MEDscan:
|
|
425
|
+
"""Extracts all ROI data from the given path for the given patient ID and updates all class attributes with
|
|
426
|
+
the new extracted data.
|
|
427
|
+
|
|
428
|
+
Args:
|
|
429
|
+
image_file(Union[Path, str]): Path to the ROI data.
|
|
430
|
+
medscan (MEDscan): MEDscan class instance that will hold the data.
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
MEDscan: Returns a MEDscan instance with updated roi attributes.
|
|
434
|
+
"""
|
|
435
|
+
image_file = Path(image_file)
|
|
436
|
+
roi_index = 0
|
|
437
|
+
|
|
438
|
+
if not path_roi_data:
|
|
439
|
+
if not self.paths._path_to_niftis:
|
|
440
|
+
raise ValueError("The path to the niftis is not defined")
|
|
441
|
+
else:
|
|
442
|
+
path_roi_data = self.paths._path_to_niftis
|
|
443
|
+
|
|
444
|
+
for file in path_roi_data.glob('*.nii.gz'):
|
|
445
|
+
_id = image_file.name.split("(")[0] # id is PatientID__ImagingScanName
|
|
446
|
+
# Load the patient's ROI nifti files:
|
|
447
|
+
if file.name.startswith(_id) and 'ROI' in file.name.split("."):
|
|
448
|
+
roi = nib.load(file)
|
|
449
|
+
roi = image.resample_to_img(roi, nifti, interpolation='nearest')
|
|
450
|
+
roi_data = roi.get_fdata()
|
|
451
|
+
roi_name = file.name[file.name.find("(") + 1 : file.name.find(")")]
|
|
452
|
+
name_set = file.name[file.name.find("_") + 2 : file.name.find("(")]
|
|
453
|
+
medscan.data.ROI.update_indexes(key=roi_index, indexes=np.nonzero(roi_data.flatten()))
|
|
454
|
+
medscan.data.ROI.update_name_set(key=roi_index, name_set=name_set)
|
|
455
|
+
medscan.data.ROI.update_roi_name(key=roi_index, roi_name=roi_name)
|
|
456
|
+
roi_index += 1
|
|
457
|
+
return medscan
|
|
458
|
+
|
|
459
|
+
def __associate_spatialRef(self, nifti_file: Union[Path, str], medscan: MEDscan) -> MEDscan:
|
|
460
|
+
"""Computes the imref3d spatialRef using a NIFTI file and updates the spatialRef attribute.
|
|
461
|
+
|
|
462
|
+
Args:
|
|
463
|
+
nifti_file(Union[Path, str]): Path to the nifti data.
|
|
464
|
+
medscan (MEDscan): MEDscan class instance that will hold the data.
|
|
465
|
+
|
|
466
|
+
Returns:
|
|
467
|
+
MEDscan: Returns a MEDscan instance with updated spatialRef attribute.
|
|
468
|
+
"""
|
|
469
|
+
# Loading the nifti file :
|
|
470
|
+
nifti = nib.load(nifti_file)
|
|
471
|
+
nifti_data = medscan.data.volume.array
|
|
472
|
+
|
|
473
|
+
# spatialRef Creation
|
|
474
|
+
pixel_x = abs(nifti.affine[0, 0])
|
|
475
|
+
pixel_y = abs(nifti.affine[1, 1])
|
|
476
|
+
slices = abs(nifti.affine[2, 2])
|
|
477
|
+
min_grid = nifti.affine[:3, 3] * [-1.0, -1.0, 1.0] # x and y are flipped
|
|
478
|
+
min_x_grid = min_grid[0]
|
|
479
|
+
min_y_grid = min_grid[1]
|
|
480
|
+
min_z_grid = min_grid[2]
|
|
481
|
+
size_image = np.shape(nifti_data)
|
|
482
|
+
spatialRef = imref3d(size_image, abs(pixel_x), abs(pixel_y), abs(slices))
|
|
483
|
+
spatialRef.XWorldLimits = (np.array(spatialRef.XWorldLimits) -
|
|
484
|
+
(spatialRef.XWorldLimits[0] -
|
|
485
|
+
(min_x_grid-pixel_x/2))
|
|
486
|
+
).tolist()
|
|
487
|
+
spatialRef.YWorldLimits = (np.array(spatialRef.YWorldLimits) -
|
|
488
|
+
(spatialRef.YWorldLimits[0] -
|
|
489
|
+
(min_y_grid-pixel_y/2))
|
|
490
|
+
).tolist()
|
|
491
|
+
spatialRef.ZWorldLimits = (np.array(spatialRef.ZWorldLimits) -
|
|
492
|
+
(spatialRef.ZWorldLimits[0] -
|
|
493
|
+
(min_z_grid-slices/2))
|
|
494
|
+
).tolist()
|
|
495
|
+
|
|
496
|
+
# Converting the results into lists
|
|
497
|
+
spatialRef.ImageSize = spatialRef.ImageSize.tolist()
|
|
498
|
+
spatialRef.XIntrinsicLimits = spatialRef.XIntrinsicLimits.tolist()
|
|
499
|
+
spatialRef.YIntrinsicLimits = spatialRef.YIntrinsicLimits.tolist()
|
|
500
|
+
spatialRef.ZIntrinsicLimits = spatialRef.ZIntrinsicLimits.tolist()
|
|
501
|
+
|
|
502
|
+
# update spatialRef in the volume sub-class
|
|
503
|
+
medscan.data.volume.update_spatialRef(spatialRef)
|
|
504
|
+
|
|
505
|
+
return medscan
|
|
506
|
+
|
|
507
|
+
def __process_one_nifti(self, nifti_file: Union[Path, str], path_data) -> MEDscan:
|
|
508
|
+
"""
|
|
509
|
+
Processes one NIfTI file to create a MEDscan class instance.
|
|
510
|
+
|
|
511
|
+
Args:
|
|
512
|
+
nifti_file (Union[Path, str]): Path to the NIfTI file.
|
|
513
|
+
path_data (Union[Path, str]): Path to the data.
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
MEDscan: MEDscan class instance.
|
|
517
|
+
"""
|
|
518
|
+
medscan = MEDscan()
|
|
519
|
+
medscan.patientID = os.path.basename(nifti_file).split("_")[0]
|
|
520
|
+
medscan.type = os.path.basename(nifti_file).split(".")[-3]
|
|
521
|
+
medscan.series_description = nifti_file.name[nifti_file.name.find('__') + 2: nifti_file.name.find('(')]
|
|
522
|
+
medscan.format = "nifti"
|
|
523
|
+
medscan.data.set_orientation(orientation="Axial")
|
|
524
|
+
medscan.data.set_patient_position(patient_position="HFS")
|
|
525
|
+
medscan.data.volume.array = nib.load(nifti_file).get_fdata()
|
|
526
|
+
medscan.data.volume.scan_rot = None
|
|
527
|
+
|
|
528
|
+
# Update spatialRef
|
|
529
|
+
self.__associate_spatialRef(nifti_file, medscan)
|
|
530
|
+
|
|
531
|
+
# Assiocate ROI
|
|
532
|
+
medscan = self.__associate_roi_to_image(nifti_file, medscan, nib.load(nifti_file), path_data)
|
|
533
|
+
|
|
534
|
+
return medscan
|
|
535
|
+
|
|
536
|
+
def process_all(self) -> None:
|
|
537
|
+
"""Processes both DICOM & NIfTI content to create MEDscan classes
|
|
538
|
+
"""
|
|
539
|
+
self.process_all_dicoms()
|
|
540
|
+
self.process_all_niftis()
|
|
541
|
+
|
|
542
|
+
def process_all_niftis(self) -> List[MEDscan]:
|
|
543
|
+
"""This function reads the NIfTI content of all the sub-folder tree of a starting directory.
|
|
544
|
+
It then organizes the data in the MEDscan class including the region of interest (ROI)
|
|
545
|
+
defined by an associated mask file. All MEDscan classes hereby created are saved in a specific path
|
|
546
|
+
with a name specific name varying with every scan.
|
|
547
|
+
|
|
548
|
+
Args:
|
|
549
|
+
None.
|
|
550
|
+
|
|
551
|
+
Returns:
|
|
552
|
+
List[MEDscan]: List of MEDscan instances.
|
|
553
|
+
"""
|
|
554
|
+
|
|
555
|
+
# Reading all NIfTI files
|
|
556
|
+
self.__read_all_niftis()
|
|
557
|
+
|
|
558
|
+
# Create the MEDscan instances
|
|
559
|
+
print('--> Reading all NIfTI objects (imaging volumes & masks) to create MEDscan classes')
|
|
560
|
+
list_instances = []
|
|
561
|
+
for file in tqdm(self.__nifti.stack_path_images):
|
|
562
|
+
# Assert the list of instances does not exceed the a size of 10
|
|
563
|
+
if len(list_instances) >= 10:
|
|
564
|
+
print('The number of MEDscan instances exceeds 10, please consider saving the instances')
|
|
565
|
+
break
|
|
566
|
+
# INITIALIZE MEDscan INSTANCE AND UPDATE ATTRIBUTES
|
|
567
|
+
medscan = MEDscan()
|
|
568
|
+
medscan.patientID = os.path.basename(file).split("_")[0]
|
|
569
|
+
medscan.type = os.path.basename(file).split(".")[-3]
|
|
570
|
+
medscan.series_description = file.name[file.name.find('__') + 2: file.name.find('(')]
|
|
571
|
+
medscan.format = "nifti"
|
|
572
|
+
medscan.data.set_orientation(orientation="Axial")
|
|
573
|
+
medscan.data.set_patient_position(patient_position="HFS")
|
|
574
|
+
medscan.data.volume.array = nib.load(file).get_fdata()
|
|
575
|
+
|
|
576
|
+
# RAS to LPS
|
|
577
|
+
#medscan.data.volume.convert_to_LPS()
|
|
578
|
+
medscan.data.volume.scan_rot = None
|
|
579
|
+
|
|
580
|
+
# Update spatialRef
|
|
581
|
+
medscan = self.__associate_spatialRef(file, medscan)
|
|
582
|
+
|
|
583
|
+
# Get ROI
|
|
584
|
+
medscan = self.__associate_roi_to_image(file, medscan, nib.load(file))
|
|
585
|
+
|
|
586
|
+
# SAVE MEDscan INSTANCE
|
|
587
|
+
if self.save and self.paths._path_save:
|
|
588
|
+
save_MEDscan(medscan, self.paths._path_save)
|
|
589
|
+
else:
|
|
590
|
+
list_instances.append(medscan)
|
|
591
|
+
|
|
592
|
+
# Update the path to the created instances
|
|
593
|
+
name_save = self.__get_MEDscan_name_save(medscan)
|
|
594
|
+
|
|
595
|
+
# Clear memory
|
|
596
|
+
del medscan
|
|
597
|
+
|
|
598
|
+
# Update the path to the created instances
|
|
599
|
+
if self.paths._path_save:
|
|
600
|
+
self.path_to_objects.append(str(self.paths._path_save / name_save))
|
|
601
|
+
|
|
602
|
+
# Update processing summary
|
|
603
|
+
if name_save.split('_')[0].count('-') >= 2:
|
|
604
|
+
scan_type = name_save[name_save.find('__')+2 : name_save.find('.')]
|
|
605
|
+
if name_save.split('-')[0] not in self.__studies:
|
|
606
|
+
self.__studies.append(name_save.split('-')[0]) # add new study
|
|
607
|
+
if name_save.split('-')[1] not in self.__institutions:
|
|
608
|
+
self.__institutions.append(name_save.split('-')[1]) # add new institution
|
|
609
|
+
if name_save.split('-')[0] not in self.summary:
|
|
610
|
+
self.summary[name_save.split('-')[0]] = {} # add new study to summary
|
|
611
|
+
if name_save.split('-')[1] not in self.summary[name_save.split('-')[0]]:
|
|
612
|
+
self.summary[name_save.split('-')[0]][name_save.split('-')[1]] = {} # add new institution
|
|
613
|
+
if scan_type not in self.__scans:
|
|
614
|
+
self.__scans.append(scan_type)
|
|
615
|
+
if scan_type not in self.summary[name_save.split('-')[0]][name_save.split('-')[1]]:
|
|
616
|
+
self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type] = []
|
|
617
|
+
if name_save not in self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type]:
|
|
618
|
+
self.summary[name_save.split('-')[0]][name_save.split('-')[1]][scan_type].append(name_save)
|
|
619
|
+
else:
|
|
620
|
+
if self.save:
|
|
621
|
+
logging.warning(f"The patient ID of the following file: {name_save} does not respect the MEDiml "\
|
|
622
|
+
"naming convention 'study-institution-id' (Ex: Glioma-TCGA-001)")
|
|
623
|
+
print('DONE')
|
|
624
|
+
|
|
625
|
+
if list_instances:
|
|
626
|
+
return list_instances
|
|
627
|
+
|
|
628
|
+
def update_from_csv(self, path_csv: Union[str, Path] = None) -> None:
|
|
629
|
+
"""Updates the class from a given CSV and summarizes the processed scans again according to it.
|
|
630
|
+
|
|
631
|
+
Args:
|
|
632
|
+
path_csv(optional, Union[str, Path]): Path to a csv file, if not given, will check
|
|
633
|
+
for csv info in the class attributes.
|
|
634
|
+
|
|
635
|
+
Returns:
|
|
636
|
+
None
|
|
637
|
+
"""
|
|
638
|
+
if not (path_csv or self.paths._path_csv):
|
|
639
|
+
print('No csv provided, no updates will be made')
|
|
640
|
+
else:
|
|
641
|
+
if path_csv:
|
|
642
|
+
self.paths._path_csv = path_csv
|
|
643
|
+
# Extract roi type label from csv file name
|
|
644
|
+
name_csv = self.paths._path_csv.name
|
|
645
|
+
roi_type_label = name_csv[name_csv.find('_')+1 : name_csv.find('.')]
|
|
646
|
+
|
|
647
|
+
# Create a dictionary
|
|
648
|
+
csv_data = {}
|
|
649
|
+
csv_data[roi_type_label] = pd.read_csv(self.paths._path_csv)
|
|
650
|
+
self.csv_data = csv_data
|
|
651
|
+
self.summarize()
|
|
652
|
+
|
|
653
|
+
def summarize(self, retrun_summary: bool = False) -> None:
|
|
654
|
+
"""Creates and shows a summary of processed scans organized by study, institution, scan type and roi type
|
|
655
|
+
|
|
656
|
+
Args:
|
|
657
|
+
retrun_summary (bool, optional): If True, will return the summary as a dictionary.
|
|
658
|
+
|
|
659
|
+
Returns:
|
|
660
|
+
None
|
|
661
|
+
"""
|
|
662
|
+
def count_scans(summary):
|
|
663
|
+
count = 0
|
|
664
|
+
if type(summary) == dict:
|
|
665
|
+
for study in summary:
|
|
666
|
+
if type(summary[study]) == dict:
|
|
667
|
+
for institution in summary[study]:
|
|
668
|
+
if type(summary[study][institution]) == dict:
|
|
669
|
+
for scan in self.summary[study][institution]:
|
|
670
|
+
count += len(summary[study][institution][scan])
|
|
671
|
+
else:
|
|
672
|
+
count += len(summary[study][institution])
|
|
673
|
+
else:
|
|
674
|
+
count += len(summary[study])
|
|
675
|
+
elif type(summary) == list:
|
|
676
|
+
count = len(summary)
|
|
677
|
+
return count
|
|
678
|
+
|
|
679
|
+
summary_df = pd.DataFrame(columns=['study', 'institution', 'scan_type', 'roi_type', 'count'])
|
|
680
|
+
|
|
681
|
+
for study in self.summary:
|
|
682
|
+
summary_df = summary_df.append({
|
|
683
|
+
'study': study,
|
|
684
|
+
'institution': "",
|
|
685
|
+
'scan_type': "",
|
|
686
|
+
'roi_type': "",
|
|
687
|
+
'count' : count_scans(self.summary)
|
|
688
|
+
}, ignore_index=True)
|
|
689
|
+
for institution in self.summary[study]:
|
|
690
|
+
summary_df = summary_df.append({
|
|
691
|
+
'study': study,
|
|
692
|
+
'institution': institution,
|
|
693
|
+
'scan_type': "",
|
|
694
|
+
'roi_type': "",
|
|
695
|
+
'count' : count_scans(self.summary[study][institution])
|
|
696
|
+
}, ignore_index=True)
|
|
697
|
+
for scan in self.summary[study][institution]:
|
|
698
|
+
summary_df = summary_df.append({
|
|
699
|
+
'study': study,
|
|
700
|
+
'institution': institution,
|
|
701
|
+
'scan_type': scan,
|
|
702
|
+
'roi_type': "",
|
|
703
|
+
'count' : count_scans(self.summary[study][institution][scan])
|
|
704
|
+
}, ignore_index=True)
|
|
705
|
+
if self.csv_data:
|
|
706
|
+
roi_count = 0
|
|
707
|
+
for roi_type in self.csv_data:
|
|
708
|
+
csv_table = pd.DataFrame(self.csv_data[roi_type])
|
|
709
|
+
csv_table['under'] = '_'
|
|
710
|
+
csv_table['dot'] = '.'
|
|
711
|
+
csv_table['npy'] = '.npy'
|
|
712
|
+
name_patients = (pd.Series(
|
|
713
|
+
csv_table[['PatientID', 'under', 'under',
|
|
714
|
+
'ImagingScanName',
|
|
715
|
+
'dot',
|
|
716
|
+
'ImagingModality',
|
|
717
|
+
'npy']].fillna('').values.tolist()).str.join('')).tolist()
|
|
718
|
+
for patient_id in self.summary[study][institution][scan]:
|
|
719
|
+
if patient_id in name_patients:
|
|
720
|
+
roi_count += 1
|
|
721
|
+
summary_df = summary_df.append({
|
|
722
|
+
'study': study,
|
|
723
|
+
'institution': institution,
|
|
724
|
+
'scan_type': scan,
|
|
725
|
+
'roi_type': roi_type,
|
|
726
|
+
'count' : roi_count
|
|
727
|
+
}, ignore_index=True)
|
|
728
|
+
print(summary_df.to_markdown(index=False))
|
|
729
|
+
|
|
730
|
+
if retrun_summary:
|
|
731
|
+
return summary_df
|
|
732
|
+
|
|
733
|
+
def __pre_radiomics_checks_dimensions(
|
|
734
|
+
self,
|
|
735
|
+
path_data: Union[Path, str] = None,
|
|
736
|
+
wildcards_dimensions: List[str] = [],
|
|
737
|
+
min_percentile: float = 0.05,
|
|
738
|
+
max_percentile: float = 0.95,
|
|
739
|
+
save: bool = False
|
|
740
|
+
) -> None:
|
|
741
|
+
"""Finds proper voxels dimension options for radiomics analyses for a group of scans
|
|
742
|
+
|
|
743
|
+
Args:
|
|
744
|
+
path_data (Path, optional): Path to the MEDscan objects, if not specified will use ``path_save`` from the
|
|
745
|
+
inner-class ``Paths`` in the current instance.
|
|
746
|
+
wildcards_dimensions(List[str], optional): List of wildcards that determines the scans
|
|
747
|
+
that will be analyzed. You can learn more about wildcards in
|
|
748
|
+
:ref:`this link <https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html>`.
|
|
749
|
+
min_percentile (float, optional): Minimum percentile to use for the histograms. Defaults to 0.05.
|
|
750
|
+
max_percentile (float, optional): Maximum percentile to use for the histograms. Defaults to 0.95.
|
|
751
|
+
save (bool, optional): If True, will save the results in a json file. Defaults to False.
|
|
752
|
+
|
|
753
|
+
Returns:
|
|
754
|
+
None.
|
|
755
|
+
"""
|
|
756
|
+
xy_dim = {
|
|
757
|
+
"data": [],
|
|
758
|
+
"mean": [],
|
|
759
|
+
"median": [],
|
|
760
|
+
"std": [],
|
|
761
|
+
"min": [],
|
|
762
|
+
"max": [],
|
|
763
|
+
f"p{min_percentile}": [],
|
|
764
|
+
f"p{max_percentile}": []
|
|
765
|
+
}
|
|
766
|
+
z_dim = {
|
|
767
|
+
"data": [],
|
|
768
|
+
"mean": [],
|
|
769
|
+
"median": [],
|
|
770
|
+
"std": [],
|
|
771
|
+
"min": [],
|
|
772
|
+
"max": [],
|
|
773
|
+
f"p{min_percentile}": [],
|
|
774
|
+
f"p{max_percentile}": []
|
|
775
|
+
}
|
|
776
|
+
if type(wildcards_dimensions) is str:
|
|
777
|
+
wildcards_dimensions = [wildcards_dimensions]
|
|
778
|
+
|
|
779
|
+
if len(wildcards_dimensions) == 0:
|
|
780
|
+
print("Wildcard is empty, the pre-checks will be aborted")
|
|
781
|
+
return
|
|
782
|
+
|
|
783
|
+
# Updating plotting params
|
|
784
|
+
plt.rcParams["figure.figsize"] = (20,20)
|
|
785
|
+
plt.rcParams.update({'font.size': 22})
|
|
786
|
+
|
|
787
|
+
# TODO: seperate by studies and scan type (MRscan, CTscan...)
|
|
788
|
+
# TODO: Two summaries (df, list of names saves) ->
|
|
789
|
+
# name_save = name_save(ROI) : Glioma-Huashan-001__T1.MRscan.npy({GTV})
|
|
790
|
+
file_paths = list()
|
|
791
|
+
for w in range(len(wildcards_dimensions)):
|
|
792
|
+
wildcard = wildcards_dimensions[w]
|
|
793
|
+
if path_data:
|
|
794
|
+
file_paths = get_file_paths(path_data, wildcard)
|
|
795
|
+
elif self.paths._path_save:
|
|
796
|
+
file_paths = get_file_paths(self.paths._path_save, wildcard)
|
|
797
|
+
else:
|
|
798
|
+
raise ValueError("Path data is invalid.")
|
|
799
|
+
n_files = len(file_paths)
|
|
800
|
+
xy_dim["data"] = np.zeros((n_files, 1))
|
|
801
|
+
xy_dim["data"] = np.multiply(xy_dim["data"], np.nan)
|
|
802
|
+
z_dim["data"] = np.zeros((n_files, 1))
|
|
803
|
+
z_dim["data"] = np.multiply(z_dim["data"], np.nan)
|
|
804
|
+
for f in tqdm(range(len(file_paths))):
|
|
805
|
+
try:
|
|
806
|
+
if file_paths[f].name.endswith("nii.gz") or file_paths[f].name.endswith("nii"):
|
|
807
|
+
medscan = nib.load(file_paths[f])
|
|
808
|
+
xy_dim["data"][f] = medscan.header.get_zooms()[0]
|
|
809
|
+
z_dim["data"][f] = medscan.header.get_zooms()[2]
|
|
810
|
+
else:
|
|
811
|
+
medscan = np.load(file_paths[f], allow_pickle=True)
|
|
812
|
+
xy_dim["data"][f] = medscan.data.volume.spatialRef.PixelExtentInWorldX
|
|
813
|
+
z_dim["data"][f] = medscan.data.volume.spatialRef.PixelExtentInWorldZ
|
|
814
|
+
except Exception as e:
|
|
815
|
+
print(e)
|
|
816
|
+
|
|
817
|
+
# Running analysis
|
|
818
|
+
xy_dim["data"] = np.concatenate(xy_dim["data"])
|
|
819
|
+
xy_dim["mean"] = np.mean(xy_dim["data"][~np.isnan(xy_dim["data"])])
|
|
820
|
+
xy_dim["median"] = np.median(xy_dim["data"][~np.isnan(xy_dim["data"])])
|
|
821
|
+
xy_dim["std"] = np.std(xy_dim["data"][~np.isnan(xy_dim["data"])])
|
|
822
|
+
xy_dim["min"] = np.min(xy_dim["data"][~np.isnan(xy_dim["data"])])
|
|
823
|
+
xy_dim["max"] = np.max(xy_dim["data"][~np.isnan(xy_dim["data"])])
|
|
824
|
+
xy_dim[f"p{min_percentile}"] = np.percentile(xy_dim["data"][~np.isnan(xy_dim["data"])],
|
|
825
|
+
min_percentile)
|
|
826
|
+
xy_dim[f"p{max_percentile}"] = np.percentile(xy_dim["data"][~np.isnan(xy_dim["data"])],
|
|
827
|
+
max_percentile)
|
|
828
|
+
z_dim["mean"] = np.mean(z_dim["data"][~np.isnan(z_dim["data"])])
|
|
829
|
+
z_dim["median"] = np.median(z_dim["data"][~np.isnan(z_dim["data"])])
|
|
830
|
+
z_dim["std"] = np.std(z_dim["data"][~np.isnan(z_dim["data"])])
|
|
831
|
+
z_dim["min"] = np.min(z_dim["data"][~np.isnan(z_dim["data"])])
|
|
832
|
+
z_dim["max"] = np.max(z_dim["data"][~np.isnan(z_dim["data"])])
|
|
833
|
+
z_dim[f"p{min_percentile}"] = np.percentile(z_dim["data"][~np.isnan(z_dim["data"])],
|
|
834
|
+
min_percentile)
|
|
835
|
+
z_dim[f"p{max_percentile}"] = np.percentile(z_dim["data"][~np.isnan(z_dim["data"])], max_percentile)
|
|
836
|
+
xy_dim["data"] = xy_dim["data"].tolist()
|
|
837
|
+
z_dim["data"] = z_dim["data"].tolist()
|
|
838
|
+
|
|
839
|
+
# Plotting xy-spacing data histogram
|
|
840
|
+
df_xy = pd.DataFrame(xy_dim["data"], columns=['data'])
|
|
841
|
+
del xy_dim["data"] # no interest in keeping data (we only need statistics)
|
|
842
|
+
ax = df_xy.hist(column='data')
|
|
843
|
+
min_quant, max_quant, median = df_xy.quantile(min_percentile), df_xy.quantile(max_percentile), df_xy.median()
|
|
844
|
+
for x in ax[0]:
|
|
845
|
+
x.axvline(min_quant.data, linestyle=':', color='r', label=f"Min Percentile: {float(min_quant):.3f}")
|
|
846
|
+
x.axvline(max_quant.data, linestyle=':', color='g', label=f"Max Percentile: {float(max_quant):.3f}")
|
|
847
|
+
x.axvline(median.data, linestyle='solid', color='gold', label=f"Median: {float(median.data):.3f}")
|
|
848
|
+
x.grid(False)
|
|
849
|
+
plt.title(f"Voxels xy-spacing checks for {wildcard}")
|
|
850
|
+
plt.legend()
|
|
851
|
+
# Save the plot
|
|
852
|
+
if save:
|
|
853
|
+
plt.savefig(self.paths._path_save_checks / ('Voxels_xy_check.png'))
|
|
854
|
+
else:
|
|
855
|
+
plt.show()
|
|
856
|
+
|
|
857
|
+
# Plotting z-spacing data histogram
|
|
858
|
+
df_z = pd.DataFrame(z_dim["data"], columns=['data'])
|
|
859
|
+
del z_dim["data"] # no interest in keeping data (we only need statistics)
|
|
860
|
+
ax = df_z.hist(column='data')
|
|
861
|
+
min_quant, max_quant, median = df_z.quantile(min_percentile), df_z.quantile(max_percentile), df_z.median()
|
|
862
|
+
for x in ax[0]:
|
|
863
|
+
x.axvline(min_quant.data, linestyle=':', color='r', label=f"Min Percentile: {float(min_quant):.3f}")
|
|
864
|
+
x.axvline(max_quant.data, linestyle=':', color='g', label=f"Max Percentile: {float(max_quant):.3f}")
|
|
865
|
+
x.axvline(median.data, linestyle='solid', color='gold', label=f"Median: {float(median.data):.3f}")
|
|
866
|
+
x.grid(False)
|
|
867
|
+
plt.title(f"Voxels z-spacing checks for {wildcard}")
|
|
868
|
+
plt.legend()
|
|
869
|
+
# Save the plot
|
|
870
|
+
if save:
|
|
871
|
+
plt.savefig(self.paths._path_save_checks / ('Voxels_z_check.png'))
|
|
872
|
+
else:
|
|
873
|
+
plt.show()
|
|
874
|
+
|
|
875
|
+
# Saving files using wildcard for name
|
|
876
|
+
if save:
|
|
877
|
+
wildcard = str(wildcard).replace('*', '').replace('.npy', '.json')
|
|
878
|
+
save_json(self.paths._path_save_checks / ('xyDim_' + wildcard), xy_dim, cls=NumpyEncoder)
|
|
879
|
+
save_json(self.paths._path_save_checks / ('zDim_' + wildcard), z_dim, cls=NumpyEncoder)
|
|
880
|
+
|
|
881
|
+
def __pre_radiomics_checks_window(
|
|
882
|
+
self,
|
|
883
|
+
path_data: Union[str, Path] = None,
|
|
884
|
+
wildcards_window: List = [],
|
|
885
|
+
path_csv: Union[str, Path] = None,
|
|
886
|
+
min_percentile: float = 0.05,
|
|
887
|
+
max_percentile: float = 0.95,
|
|
888
|
+
bin_width: int = 0,
|
|
889
|
+
hist_range: list = [],
|
|
890
|
+
nifti: bool = True,
|
|
891
|
+
save: bool = False
|
|
892
|
+
) -> None:
|
|
893
|
+
"""Finds proper re-segmentation ranges options for radiomics analyses for a group of scans
|
|
894
|
+
|
|
895
|
+
Args:
|
|
896
|
+
path_data (Path, optional): Path to the MEDscan objects, if not specified will use ``path_save`` from the
|
|
897
|
+
inner-class ``Paths`` in the current instance.
|
|
898
|
+
wildcards_window(List[str], optional): List of wildcards that determines the scans
|
|
899
|
+
that will be analyzed. You can learn more about wildcards in
|
|
900
|
+
:ref:`this link <https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html>`.
|
|
901
|
+
path_csv(Union[str, Path], optional): Path to a csv file containing a list of the scans that will be
|
|
902
|
+
analyzed (a CSV file for a single ROI type).
|
|
903
|
+
min_percentile (float, optional): Minimum percentile to use for the histograms. Defaults to 0.05.
|
|
904
|
+
max_percentile (float, optional): Maximum percentile to use for the histograms. Defaults to 0.95.
|
|
905
|
+
bin_width(int, optional): Width of the bins for the histograms. If not provided, will use the
|
|
906
|
+
default number of bins in the method
|
|
907
|
+
:ref:`pandas.DataFrame.hist <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.hist.html>`: 10 bins.
|
|
908
|
+
hist_range(list, optional): Range of the histograms. If empty, will use the minimum and maximum values.
|
|
909
|
+
nifti(bool, optional): If True, will use the NIfTI files, otherwise will use the numpy files.
|
|
910
|
+
save (bool, optional): If True, will save the results in a json file. Defaults to False.
|
|
911
|
+
|
|
912
|
+
Returns:
|
|
913
|
+
None.
|
|
914
|
+
"""
|
|
915
|
+
# Updating plotting params
|
|
916
|
+
plt.rcParams["figure.figsize"] = (20,20)
|
|
917
|
+
plt.rcParams.update({'font.size': 22})
|
|
918
|
+
|
|
919
|
+
if type(wildcards_window) is str:
|
|
920
|
+
wildcards_window = [wildcards_window]
|
|
921
|
+
|
|
922
|
+
if len(wildcards_window) == 0:
|
|
923
|
+
print("Wilcards is empty")
|
|
924
|
+
return
|
|
925
|
+
if path_csv:
|
|
926
|
+
self.paths._path_csv = Path(path_csv)
|
|
927
|
+
roi_table = pd.read_csv(self.paths._path_csv)
|
|
928
|
+
if nifti:
|
|
929
|
+
roi_table['under'] = '_'
|
|
930
|
+
roi_table['dot'] = '.'
|
|
931
|
+
roi_table['roi_label'] = 'GTV'
|
|
932
|
+
roi_table['oparenthesis'] = '('
|
|
933
|
+
roi_table['cparenthesis'] = ')'
|
|
934
|
+
roi_table['ext'] = '.nii.gz'
|
|
935
|
+
patient_names = (pd.Series(
|
|
936
|
+
roi_table[['PatientID', 'under', 'under',
|
|
937
|
+
'ImagingScanName',
|
|
938
|
+
'oparenthesis',
|
|
939
|
+
'roi_label',
|
|
940
|
+
'cparenthesis',
|
|
941
|
+
'dot',
|
|
942
|
+
'ImagingModality',
|
|
943
|
+
'ext']].fillna('').values.tolist()).str.join('')).tolist()
|
|
944
|
+
else:
|
|
945
|
+
roi_names = [[], [], []]
|
|
946
|
+
roi_names[0] = roi_table['PatientID']
|
|
947
|
+
roi_names[1] = roi_table['ImagingScanName']
|
|
948
|
+
roi_names[2] = roi_table['ImagingModality']
|
|
949
|
+
patient_names = get_patient_names(roi_names)
|
|
950
|
+
for w in range(len(wildcards_window)):
|
|
951
|
+
temp_val = []
|
|
952
|
+
temp = []
|
|
953
|
+
file_paths = []
|
|
954
|
+
roi_data= {
|
|
955
|
+
"data": [],
|
|
956
|
+
"mean": [],
|
|
957
|
+
"median": [],
|
|
958
|
+
"std": [],
|
|
959
|
+
"min": [],
|
|
960
|
+
"max": [],
|
|
961
|
+
f"p{min_percentile}": [],
|
|
962
|
+
f"p{max_percentile}": []
|
|
963
|
+
}
|
|
964
|
+
wildcard = wildcards_window[w]
|
|
965
|
+
if path_data:
|
|
966
|
+
file_paths = get_file_paths(path_data, wildcard)
|
|
967
|
+
elif self.paths._path_save:
|
|
968
|
+
path_data = self.paths._path_save
|
|
969
|
+
file_paths = get_file_paths(self.paths._path_save, wildcard)
|
|
970
|
+
else:
|
|
971
|
+
raise ValueError("Path data is invalid.")
|
|
972
|
+
n_files = len(file_paths)
|
|
973
|
+
i = 0
|
|
974
|
+
for f in tqdm(range(n_files)):
|
|
975
|
+
file = file_paths[f]
|
|
976
|
+
_, filename = os.path.split(file)
|
|
977
|
+
filename, ext = os.path.splitext(filename)
|
|
978
|
+
patient_name = filename + ext
|
|
979
|
+
try:
|
|
980
|
+
if file.name.endswith('nii.gz') or file.name.endswith('nii'):
|
|
981
|
+
medscan = self.__process_one_nifti(file, path_data)
|
|
982
|
+
else:
|
|
983
|
+
medscan = np.load(file, allow_pickle=True)
|
|
984
|
+
if re.search('PTscan', wildcard) and medscan.format != 'nifti':
|
|
985
|
+
medscan.data.volume.array = compute_suv_map(
|
|
986
|
+
np.double(medscan.data.volume.array),
|
|
987
|
+
medscan.dicomH[2])
|
|
988
|
+
patient_names = pd.Index(patient_names)
|
|
989
|
+
ind_roi = patient_names.get_loc(patient_name)
|
|
990
|
+
name_roi = roi_table.loc[ind_roi][3]
|
|
991
|
+
vol_obj_init, roi_obj_init = get_roi_from_indexes(medscan, name_roi, 'box')
|
|
992
|
+
temp = vol_obj_init.data[roi_obj_init.data == 1]
|
|
993
|
+
temp_val.append(len(temp))
|
|
994
|
+
roi_data["data"].append(np.zeros(shape=(n_files, temp_val[i])))
|
|
995
|
+
roi_data["data"][i] = temp
|
|
996
|
+
i+=1
|
|
997
|
+
del medscan
|
|
998
|
+
del vol_obj_init
|
|
999
|
+
del roi_obj_init
|
|
1000
|
+
except Exception as e:
|
|
1001
|
+
print(f"Problem with patient {patient_name}, error: {e}")
|
|
1002
|
+
|
|
1003
|
+
roi_data["data"] = np.concatenate(roi_data["data"])
|
|
1004
|
+
roi_data["mean"] = np.mean(roi_data["data"][~np.isnan(roi_data["data"])])
|
|
1005
|
+
roi_data["median"] = np.median(roi_data["data"][~np.isnan(roi_data["data"])])
|
|
1006
|
+
roi_data["std"] = np.std(roi_data["data"][~np.isnan(roi_data["data"])])
|
|
1007
|
+
roi_data["min"] = np.min(roi_data["data"][~np.isnan(roi_data["data"])])
|
|
1008
|
+
roi_data["max"] = np.max(roi_data["data"][~np.isnan(roi_data["data"])])
|
|
1009
|
+
roi_data[f"p{min_percentile}"] = np.percentile(roi_data["data"][~np.isnan(roi_data["data"])],
|
|
1010
|
+
min_percentile)
|
|
1011
|
+
roi_data[f"p{max_percentile}"] = np.percentile(roi_data["data"][~np.isnan(roi_data["data"])],
|
|
1012
|
+
max_percentile)
|
|
1013
|
+
|
|
1014
|
+
# Set bin width if not provided
|
|
1015
|
+
if bin_width != 0:
|
|
1016
|
+
if hist_range:
|
|
1017
|
+
nb_bins = (round(hist_range[1]) - round(hist_range[0])) // bin_width
|
|
1018
|
+
else:
|
|
1019
|
+
nb_bins = (round(roi_data["max"]) - round(roi_data["min"])) // bin_width
|
|
1020
|
+
else:
|
|
1021
|
+
nb_bins = 10
|
|
1022
|
+
if hist_range:
|
|
1023
|
+
bin_width = int((round(hist_range[1]) - round(hist_range[0])) // nb_bins)
|
|
1024
|
+
else:
|
|
1025
|
+
bin_width = int((round(roi_data["max"]) - round(roi_data["min"])) // nb_bins)
|
|
1026
|
+
nb_bins = int(nb_bins)
|
|
1027
|
+
|
|
1028
|
+
# Set histogram range if not provided
|
|
1029
|
+
if not hist_range:
|
|
1030
|
+
hist_range = (roi_data["min"], roi_data["max"])
|
|
1031
|
+
|
|
1032
|
+
# re-segment data according to histogram range
|
|
1033
|
+
roi_data["data"] = roi_data["data"][(roi_data["data"] > hist_range[0]) & (roi_data["data"] < hist_range[1])]
|
|
1034
|
+
df_data = pd.DataFrame(roi_data["data"], columns=['data'])
|
|
1035
|
+
del roi_data["data"] # no interest in keeping data (we only need statistics)
|
|
1036
|
+
|
|
1037
|
+
# Plot histogram
|
|
1038
|
+
ax = df_data.hist(column='data', bins=nb_bins, range=(hist_range[0], hist_range[1]), edgecolor='black')
|
|
1039
|
+
min_quant, max_quant= df_data.quantile(min_percentile), df_data.quantile(max_percentile)
|
|
1040
|
+
for x in ax[0]:
|
|
1041
|
+
x.axvline(min_quant.data, linestyle=':', color='r', label=f"{min_percentile*100}% Percentile: {float(min_quant):.3f}")
|
|
1042
|
+
x.axvline(max_quant.data, linestyle=':', color='g', label=f"{max_percentile*100}% Percentile: {float(max_quant):.3f}")
|
|
1043
|
+
x.grid(False)
|
|
1044
|
+
x.xaxis.set_ticks(np.arange(hist_range[0], hist_range[1], bin_width, dtype=int))
|
|
1045
|
+
x.set_xticklabels(x.get_xticks(), rotation=45)
|
|
1046
|
+
x.xaxis.set_tick_params(pad=15)
|
|
1047
|
+
plt.title(f"Intensity range checks for {wildcard}, bw={bin_width}")
|
|
1048
|
+
plt.legend()
|
|
1049
|
+
# Save the plot
|
|
1050
|
+
if save:
|
|
1051
|
+
plt.savefig(self.paths._path_save_checks / ('Intensity_range_check_' + f'bw_{bin_width}.png'))
|
|
1052
|
+
else:
|
|
1053
|
+
plt.show()
|
|
1054
|
+
|
|
1055
|
+
# save final checks
|
|
1056
|
+
if save:
|
|
1057
|
+
wildcard = str(wildcard).replace('*', '').replace('.npy', '.json')
|
|
1058
|
+
save_json(self.paths._path_save_checks / ('roi_data_' + wildcard), roi_data, cls=NumpyEncoder)
|
|
1059
|
+
|
|
1060
|
+
def pre_radiomics_checks(self,
|
|
1061
|
+
path_data: Union[str, Path] = None,
|
|
1062
|
+
wildcards_dimensions: List = [],
|
|
1063
|
+
wildcards_window: List = [],
|
|
1064
|
+
path_csv: Union[str, Path] = None,
|
|
1065
|
+
min_percentile: float = 0.05,
|
|
1066
|
+
max_percentile: float = 0.95,
|
|
1067
|
+
bin_width: int = 0,
|
|
1068
|
+
hist_range: list = [],
|
|
1069
|
+
nifti: bool = False,
|
|
1070
|
+
save: bool = False) -> None:
|
|
1071
|
+
"""Finds proper dimension and re-segmentation ranges options for radiomics analyses.
|
|
1072
|
+
|
|
1073
|
+
The resulting files from this method can then be analyzed and used to set up radiomics
|
|
1074
|
+
parameters options in computation methods.
|
|
1075
|
+
|
|
1076
|
+
Args:
|
|
1077
|
+
path_data (Path, optional): Path to the MEDscan objects, if not specified will use ``path_save`` from the
|
|
1078
|
+
inner-class ``Paths`` in the current instance.
|
|
1079
|
+
wildcards_dimensions(List[str], optional): List of wildcards that determines the scans
|
|
1080
|
+
that will be analyzed. You can learn more about wildcards in
|
|
1081
|
+
`this link <https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html>`_.
|
|
1082
|
+
wildcards_window(List[str], optional): List of wildcards that determines the scans
|
|
1083
|
+
that will be analyzed. You can learn more about wildcards in
|
|
1084
|
+
`this link <https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html>`_.
|
|
1085
|
+
path_csv(Union[str, Path], optional): Path to a csv file containing a list of the scans that will be
|
|
1086
|
+
analyzed (a CSV file for a single ROI type).
|
|
1087
|
+
min_percentile (float, optional): Minimum percentile to use for the histograms. Defaults to 0.05.
|
|
1088
|
+
max_percentile (float, optional): Maximum percentile to use for the histograms. Defaults to 0.95.
|
|
1089
|
+
bin_width(int, optional): Width of the bins for the histograms. If not provided, will use the
|
|
1090
|
+
default number of bins in the method
|
|
1091
|
+
:ref:`pandas.DataFrame.hist <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.hist.html>`: 10 bins.
|
|
1092
|
+
hist_range(list, optional): Range of the histograms. If empty, will use the minimum and maximum values.
|
|
1093
|
+
nifti (bool, optional): Set to True if the scans are nifti files. Defaults to False.
|
|
1094
|
+
save (bool, optional): If True, will save the results in a json file. Defaults to False.
|
|
1095
|
+
|
|
1096
|
+
Returns:
|
|
1097
|
+
None
|
|
1098
|
+
"""
|
|
1099
|
+
# Initialization
|
|
1100
|
+
path_study = Path.cwd()
|
|
1101
|
+
|
|
1102
|
+
# Load params
|
|
1103
|
+
if not self.paths._path_pre_checks_settings:
|
|
1104
|
+
if not wildcards_dimensions or not wildcards_window:
|
|
1105
|
+
raise ValueError("path to pre-checks settings is None.\
|
|
1106
|
+
wildcards_dimensions and wildcards_window need to be specified")
|
|
1107
|
+
else:
|
|
1108
|
+
settings = self.paths._path_pre_checks_settings
|
|
1109
|
+
settings = load_json(settings)
|
|
1110
|
+
settings = settings['pre_radiomics_checks']
|
|
1111
|
+
|
|
1112
|
+
# Setting up paths
|
|
1113
|
+
if 'path_save_checks' in settings and settings['path_save_checks']:
|
|
1114
|
+
self.paths._path_save_checks = Path(settings['path_save_checks'])
|
|
1115
|
+
if 'path_csv' in settings and settings['path_csv']:
|
|
1116
|
+
self.paths._path_csv = Path(settings['path_csv'])
|
|
1117
|
+
|
|
1118
|
+
# Wildcards of groups of files to analyze for dimensions in path_data.
|
|
1119
|
+
# See for example: https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html
|
|
1120
|
+
# Keep the cell empty if no dimension checks are to be performed.
|
|
1121
|
+
if not wildcards_dimensions:
|
|
1122
|
+
wildcards_dimensions = []
|
|
1123
|
+
for i in range(len(settings['wildcards_dimensions'])):
|
|
1124
|
+
wildcards_dimensions.append(settings['wildcards_dimensions'][i])
|
|
1125
|
+
|
|
1126
|
+
# ROI intensity window checks params
|
|
1127
|
+
if not wildcards_window:
|
|
1128
|
+
wildcards_window = []
|
|
1129
|
+
for i in range(len(settings['wildcards_window'])):
|
|
1130
|
+
wildcards_window.append(settings['wildcards_window'][i])
|
|
1131
|
+
|
|
1132
|
+
# PRE-RADIOMICS CHECKS
|
|
1133
|
+
if not self.paths._path_save_checks:
|
|
1134
|
+
if (path_study / 'checks').exists():
|
|
1135
|
+
self.paths._path_save_checks = Path(path_study / 'checks')
|
|
1136
|
+
else:
|
|
1137
|
+
os.mkdir(path_study / 'checks')
|
|
1138
|
+
self.paths._path_save_checks = Path(path_study / 'checks')
|
|
1139
|
+
else:
|
|
1140
|
+
if self.paths._path_save_checks.name != 'checks':
|
|
1141
|
+
if (self.paths._path_save_checks / 'checks').exists():
|
|
1142
|
+
self.paths._path_save_checks /= 'checks'
|
|
1143
|
+
else:
|
|
1144
|
+
os.mkdir(self.paths._path_save_checks / 'checks')
|
|
1145
|
+
self.paths._path_save_checks = Path(self.paths._path_save_checks / 'checks')
|
|
1146
|
+
|
|
1147
|
+
# Initializing plotting params
|
|
1148
|
+
plt.rcParams["figure.figsize"] = (20,20)
|
|
1149
|
+
plt.rcParams.update({'font.size': 22})
|
|
1150
|
+
|
|
1151
|
+
start = time()
|
|
1152
|
+
print('\n\n************************* PRE-RADIOMICS CHECKS *************************', end='')
|
|
1153
|
+
|
|
1154
|
+
# 1. PRE-RADIOMICS CHECKS -- DIMENSIONS
|
|
1155
|
+
start1 = time()
|
|
1156
|
+
print('\n--> PRE-RADIOMICS CHECKS -- DIMENSIONS ... ', end='')
|
|
1157
|
+
self.__pre_radiomics_checks_dimensions(
|
|
1158
|
+
path_data,
|
|
1159
|
+
wildcards_dimensions,
|
|
1160
|
+
min_percentile,
|
|
1161
|
+
max_percentile,
|
|
1162
|
+
save)
|
|
1163
|
+
print('DONE', end='')
|
|
1164
|
+
time1 = f"{time() - start1:.2f}"
|
|
1165
|
+
print(f'\nElapsed time: {time1} sec', end='')
|
|
1166
|
+
|
|
1167
|
+
# 2. PRE-RADIOMICS CHECKS - WINDOW
|
|
1168
|
+
start2 = time()
|
|
1169
|
+
print('\n\n--> PRE-RADIOMICS CHECKS -- WINDOW ... \n', end='')
|
|
1170
|
+
self.__pre_radiomics_checks_window(
|
|
1171
|
+
path_data,
|
|
1172
|
+
wildcards_window,
|
|
1173
|
+
path_csv,
|
|
1174
|
+
min_percentile,
|
|
1175
|
+
max_percentile,
|
|
1176
|
+
bin_width,
|
|
1177
|
+
hist_range,
|
|
1178
|
+
nifti,
|
|
1179
|
+
save)
|
|
1180
|
+
print('DONE', end='')
|
|
1181
|
+
time2 = f"{time() - start2:.2f}"
|
|
1182
|
+
print(f'\nElapsed time: {time2} sec', end='')
|
|
1183
|
+
|
|
1184
|
+
time_elapsed = f"{time() - start:.2f}"
|
|
1185
|
+
print(f'\n\n--> TOTAL TIME FOR PRE-RADIOMICS CHECKS: {time_elapsed} seconds')
|
|
1186
|
+
print('-------------------------------------------------------------------------------------')
|
|
1187
|
+
|
|
1188
|
+
def perform_mr_imaging_summary(self,
|
|
1189
|
+
wildcards_scans: List[str],
|
|
1190
|
+
path_data: Path = None,
|
|
1191
|
+
path_save_checks: Path = None,
|
|
1192
|
+
min_percentile: float = 0.05,
|
|
1193
|
+
max_percentile: float = 0.95
|
|
1194
|
+
) -> None:
|
|
1195
|
+
"""
|
|
1196
|
+
Summarizes MRI imaging acquisition parameters. Plots summary histograms
|
|
1197
|
+
for different dimensions and saves all acquisition parameters locally in JSON files.
|
|
1198
|
+
|
|
1199
|
+
Args:
|
|
1200
|
+
wildcards_scans (List[str]): List of wildcards that determines the scans
|
|
1201
|
+
that will be analyzed (Only MRI scans will be analyzed). You can learn more about wildcards in
|
|
1202
|
+
`this link <https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html>`_.
|
|
1203
|
+
For example: ``[\"STS*.MRscan.npy\"]``.
|
|
1204
|
+
path_data (Path, optional): Path to the MEDscan objects, if not specified will use ``path_save`` from the
|
|
1205
|
+
inner-class ``Paths`` in the current instance.
|
|
1206
|
+
path_save_checks (Path, optional): Path where to save the checks, if not specified will use the one
|
|
1207
|
+
in the current instance.
|
|
1208
|
+
min_percentile (float, optional): Minimum percentile to use for the histograms. Defaults to 0.05.
|
|
1209
|
+
max_percentile (float, optional): Maximum percentile to use for the histograms. Defaults to 0.95.
|
|
1210
|
+
|
|
1211
|
+
Returns:
|
|
1212
|
+
None.
|
|
1213
|
+
"""
|
|
1214
|
+
# Initializing data structures
|
|
1215
|
+
class param:
|
|
1216
|
+
dates = []
|
|
1217
|
+
manufacturer = []
|
|
1218
|
+
scanning_sequence = []
|
|
1219
|
+
class years:
|
|
1220
|
+
data = []
|
|
1221
|
+
|
|
1222
|
+
class fieldStrength:
|
|
1223
|
+
data = []
|
|
1224
|
+
|
|
1225
|
+
class repetitionTime:
|
|
1226
|
+
data = []
|
|
1227
|
+
|
|
1228
|
+
class echoTime:
|
|
1229
|
+
data = []
|
|
1230
|
+
|
|
1231
|
+
class inversionTime:
|
|
1232
|
+
data = []
|
|
1233
|
+
|
|
1234
|
+
class echoTrainLength:
|
|
1235
|
+
data = []
|
|
1236
|
+
|
|
1237
|
+
class flipAngle:
|
|
1238
|
+
data = []
|
|
1239
|
+
|
|
1240
|
+
class numberAverages:
|
|
1241
|
+
data = []
|
|
1242
|
+
|
|
1243
|
+
class xyDim:
|
|
1244
|
+
data = []
|
|
1245
|
+
|
|
1246
|
+
class zDim:
|
|
1247
|
+
data = []
|
|
1248
|
+
|
|
1249
|
+
if len(wildcards_scans) == 0:
|
|
1250
|
+
print('wildcards_scans is empty')
|
|
1251
|
+
|
|
1252
|
+
# wildcards checks:
|
|
1253
|
+
no_mr_scan = True
|
|
1254
|
+
for wildcard in wildcards_scans:
|
|
1255
|
+
if 'MRscan' in wildcard:
|
|
1256
|
+
no_mr_scan = False
|
|
1257
|
+
if no_mr_scan:
|
|
1258
|
+
raise ValueError(f"wildcards: {wildcards_scans} does not include MR scans. (Only MR scans are supported)")
|
|
1259
|
+
|
|
1260
|
+
# Initialization
|
|
1261
|
+
if path_data is None:
|
|
1262
|
+
if self.paths._path_save:
|
|
1263
|
+
path_data = Path(self.paths._path_save)
|
|
1264
|
+
else:
|
|
1265
|
+
print("No path to data was given and path save is None.")
|
|
1266
|
+
return 0
|
|
1267
|
+
|
|
1268
|
+
if not path_save_checks:
|
|
1269
|
+
if self.paths._path_save_checks:
|
|
1270
|
+
path_save_checks = Path(self.paths._path_save_checks)
|
|
1271
|
+
else:
|
|
1272
|
+
if (Path(os.getcwd()) / "checks").exists():
|
|
1273
|
+
path_save_checks = Path(os.getcwd()) / "checks"
|
|
1274
|
+
else:
|
|
1275
|
+
path_save_checks = (Path(os.getcwd()) / "checks").mkdir()
|
|
1276
|
+
# Looping through all the different wildcards
|
|
1277
|
+
for i in tqdm(range(len(wildcards_scans))):
|
|
1278
|
+
wildcard = wildcards_scans[i]
|
|
1279
|
+
file_paths = get_file_paths(path_data, wildcard)
|
|
1280
|
+
n_files = len(file_paths)
|
|
1281
|
+
param.dates = np.zeros(n_files)
|
|
1282
|
+
param.years.data = np.zeros((n_files, 1))
|
|
1283
|
+
param.years.data = np.multiply(param.years.data, np.NaN)
|
|
1284
|
+
param.manufacturer = [None] * n_files
|
|
1285
|
+
param.scanning_sequence = [None] * n_files
|
|
1286
|
+
param.fieldStrength.data = np.zeros((n_files, 1))
|
|
1287
|
+
param.fieldStrength.data = np.multiply(param.fieldStrength.data, np.NaN)
|
|
1288
|
+
param.repetitionTime.data = np.zeros((n_files, 1))
|
|
1289
|
+
param.repetitionTime.data = np.multiply(param.repetitionTime.data, np.NaN)
|
|
1290
|
+
param.echoTime.data = np.zeros((n_files, 1))
|
|
1291
|
+
param.echoTime.data = np.multiply(param.echoTime.data, np.NaN)
|
|
1292
|
+
param.inversionTime.data = np.zeros((n_files, 1))
|
|
1293
|
+
param.inversionTime.data = np.multiply(param.inversionTime.data, np.NaN)
|
|
1294
|
+
param.echoTrainLength.data = np.zeros((n_files, 1))
|
|
1295
|
+
param.echoTrainLength.data = np.multiply(param.echoTrainLength.data, np.NaN)
|
|
1296
|
+
param.flipAngle.data = np.zeros((n_files, 1))
|
|
1297
|
+
param.flipAngle.data = np.multiply(param.flipAngle.data, np.NaN)
|
|
1298
|
+
param.numberAverages.data = np.zeros((n_files, 1))
|
|
1299
|
+
param.numberAverages.data = np.multiply(param.numberAverages.data, np.NaN)
|
|
1300
|
+
param.xyDim.data = np.zeros((n_files, 1))
|
|
1301
|
+
param.xyDim.data = np.multiply(param.xyDim.data, np.NaN)
|
|
1302
|
+
param.zDim.data = np.zeros((n_files, 1))
|
|
1303
|
+
param.zDim.data = np.multiply(param.zDim.data, np.NaN)
|
|
1304
|
+
|
|
1305
|
+
# Loading and recording data
|
|
1306
|
+
for f in tqdm(range(n_files)):
|
|
1307
|
+
file = file_paths[f]
|
|
1308
|
+
|
|
1309
|
+
#Open file for warning
|
|
1310
|
+
try:
|
|
1311
|
+
warn_file = open(path_save_checks / 'imaging_summary_mr_warnings.txt', 'a')
|
|
1312
|
+
except IOError:
|
|
1313
|
+
print("Could not open warning file")
|
|
1314
|
+
|
|
1315
|
+
# Loading Data
|
|
1316
|
+
try:
|
|
1317
|
+
print(f'\nCurrently working on: {file}', file = warn_file)
|
|
1318
|
+
with open(path_data / file, 'rb') as fe: medscan = pickle.load(fe)
|
|
1319
|
+
|
|
1320
|
+
# Example of DICOM header
|
|
1321
|
+
info = medscan.dicomH[1]
|
|
1322
|
+
# Recording dates (info.AcquistionDates)
|
|
1323
|
+
try:
|
|
1324
|
+
param.dates[f] = info.AcquisitionDate
|
|
1325
|
+
except AttributeError:
|
|
1326
|
+
param.dates[f] = info.StudyDate
|
|
1327
|
+
# Recording years
|
|
1328
|
+
try:
|
|
1329
|
+
y = str(param.dates[f]) # Only the first four characters represent the years
|
|
1330
|
+
param.years.data[f] = y[0:4]
|
|
1331
|
+
except Exception as e:
|
|
1332
|
+
print(f'Cannot read years of: {file}. Error: {e}', file = warn_file)
|
|
1333
|
+
# Recording manufacturers
|
|
1334
|
+
try:
|
|
1335
|
+
param.manufacturer[f] = info.Manufacturer
|
|
1336
|
+
except Exception as e:
|
|
1337
|
+
print(f'Cannot read manufacturer of: {file}. Error: {e}', file = warn_file)
|
|
1338
|
+
# Recording scanning sequence
|
|
1339
|
+
try:
|
|
1340
|
+
param.scanning_sequence[f] = info.scanning_sequence
|
|
1341
|
+
except Exception as e:
|
|
1342
|
+
print(f'Cannot read scanning sequence of: {file}. Error: {e}', file = warn_file)
|
|
1343
|
+
# Recording field strength
|
|
1344
|
+
try:
|
|
1345
|
+
param.fieldStrength.data[f] = info.MagneticFieldStrength
|
|
1346
|
+
except Exception as e:
|
|
1347
|
+
print(f'Cannot read field strength of: {file}. Error: {e}', file = warn_file)
|
|
1348
|
+
# Recording repetition time
|
|
1349
|
+
try:
|
|
1350
|
+
param.repetitionTime.data[f] = info.RepetitionTime
|
|
1351
|
+
except Exception as e:
|
|
1352
|
+
print(f'Cannot read repetition time of: {file}. Error: {e}', file = warn_file)
|
|
1353
|
+
# Recording echo time
|
|
1354
|
+
try:
|
|
1355
|
+
param.echoTime.data[f] = info.EchoTime
|
|
1356
|
+
except Exception as e:
|
|
1357
|
+
print(f'Cannot read echo time of: {file}. Error: {e}', file = warn_file)
|
|
1358
|
+
# Recording inversion time
|
|
1359
|
+
try:
|
|
1360
|
+
param.inversionTime.data[f] = info.InversionTime
|
|
1361
|
+
except Exception as e:
|
|
1362
|
+
print(f'Cannot read inversion time of: {file}. Error: {e}', file = warn_file)
|
|
1363
|
+
# Recording echo train length
|
|
1364
|
+
try:
|
|
1365
|
+
param.echoTrainLength.data[f] = info.EchoTrainLength
|
|
1366
|
+
except Exception as e:
|
|
1367
|
+
print(f'Cannot read echo train length of: {file}. Error: {e}', file = warn_file)
|
|
1368
|
+
# Recording flip angle
|
|
1369
|
+
try:
|
|
1370
|
+
param.flipAngle.data[f] = info.FlipAngle
|
|
1371
|
+
except Exception as e:
|
|
1372
|
+
print(f'Cannot read flip angle of: {file}. Error: {e}', file = warn_file)
|
|
1373
|
+
# Recording number of averages
|
|
1374
|
+
try:
|
|
1375
|
+
param.numberAverages.data[f] = info.NumberOfAverages
|
|
1376
|
+
except Exception as e:
|
|
1377
|
+
print(f'Cannot read number averages of: {file}. Error: {e}', file = warn_file)
|
|
1378
|
+
# Recording xy spacing
|
|
1379
|
+
try:
|
|
1380
|
+
param.xyDim.data[f] = medscan.data.volume.spatialRef.PixelExtentInWorldX
|
|
1381
|
+
except Exception as e:
|
|
1382
|
+
print(f'Cannot read x spacing of: {file}. Error: {e}', file = warn_file)
|
|
1383
|
+
# Recording z spacing
|
|
1384
|
+
try:
|
|
1385
|
+
param.zDim.data[f] = medscan.data.volume.spatialRef.PixelExtentInWorldZ
|
|
1386
|
+
except Exception as e:
|
|
1387
|
+
print(f'Cannot read z spacing of: {file}', file = warn_file)
|
|
1388
|
+
except Exception as e:
|
|
1389
|
+
print(f'Cannot read file: {file}. Error: {e}', file = warn_file)
|
|
1390
|
+
|
|
1391
|
+
warn_file.close()
|
|
1392
|
+
|
|
1393
|
+
# Summarize data
|
|
1394
|
+
# Summarizing years
|
|
1395
|
+
df_years = pd.DataFrame(param.years.data,
|
|
1396
|
+
columns=['years']).describe(percentiles=[min_percentile, max_percentile],
|
|
1397
|
+
include='all')
|
|
1398
|
+
# Summarizing field strength
|
|
1399
|
+
df_fs = pd.DataFrame(param.fieldStrength.data,
|
|
1400
|
+
columns=['fieldStrength']).describe(percentiles=[min_percentile, max_percentile],
|
|
1401
|
+
include='all')
|
|
1402
|
+
# Summarizing repetition time
|
|
1403
|
+
df_rt = pd.DataFrame(param.repetitionTime.data,
|
|
1404
|
+
columns=['repetitionTime']).describe(percentiles=[min_percentile, max_percentile],
|
|
1405
|
+
include='all')
|
|
1406
|
+
# Summarizing echo time
|
|
1407
|
+
df_et = pd.DataFrame(param.echoTime.data,
|
|
1408
|
+
columns=['echoTime']).describe(percentiles=[min_percentile, max_percentile],
|
|
1409
|
+
include='all')
|
|
1410
|
+
# Summarizing inversion time
|
|
1411
|
+
df_it = pd.DataFrame(param.inversionTime.data,
|
|
1412
|
+
columns=['inversionTime']).describe(percentiles=[min_percentile, max_percentile],
|
|
1413
|
+
include='all')
|
|
1414
|
+
# Summarizing echo train length
|
|
1415
|
+
df_etl = pd.DataFrame(param.echoTrainLength.data,
|
|
1416
|
+
columns=['echoTrainLength']).describe(percentiles=[min_percentile, max_percentile],
|
|
1417
|
+
include='all')
|
|
1418
|
+
# Summarizing flip angle
|
|
1419
|
+
df_fa = pd.DataFrame(param.flipAngle.data,
|
|
1420
|
+
columns=['flipAngle']).describe(percentiles=[min_percentile, max_percentile],
|
|
1421
|
+
include='all')
|
|
1422
|
+
# Summarizing number of averages
|
|
1423
|
+
df_na = pd.DataFrame(param.numberAverages.data,
|
|
1424
|
+
columns=['numberAverages']).describe(percentiles=[min_percentile, max_percentile],
|
|
1425
|
+
include='all')
|
|
1426
|
+
# Summarizing xy-spacing
|
|
1427
|
+
df_xy = pd.DataFrame(param.xyDim.data,
|
|
1428
|
+
columns=['xyDim'])
|
|
1429
|
+
# Summarizing z-spacing
|
|
1430
|
+
df_z = pd.DataFrame(param.zDim.data,
|
|
1431
|
+
columns=['zDim'])
|
|
1432
|
+
|
|
1433
|
+
# Plotting xy-spacing histogram
|
|
1434
|
+
ax = df_xy.hist(column='xyDim')
|
|
1435
|
+
min_quant, max_quant, average = df_xy.quantile(min_percentile), df_xy.quantile(max_percentile), param.xyDim.data.mean()
|
|
1436
|
+
for x in ax[0]:
|
|
1437
|
+
x.axvline(min_quant.xyDim, linestyle=':', color='r', label=f"Min Percentile: {float(min_quant):.3f}")
|
|
1438
|
+
x.axvline(max_quant.xyDim, linestyle=':', color='g', label=f"Max Percentile: {float(max_quant):.3f}")
|
|
1439
|
+
x.axvline(average, linestyle='solid', color='gold', label=f"Average: {float(average):.3f}")
|
|
1440
|
+
x.grid(False)
|
|
1441
|
+
plt.title(f"MR xy-spacing imaging summary for {wildcard}")
|
|
1442
|
+
plt.legend()
|
|
1443
|
+
plt.show()
|
|
1444
|
+
|
|
1445
|
+
# Plotting z-spacing histogram
|
|
1446
|
+
ax = df_z.hist(column='zDim')
|
|
1447
|
+
min_quant, max_quant, average = df_z.quantile(min_percentile), df_z.quantile(max_percentile), param.zDim.data.mean()
|
|
1448
|
+
for x in ax[0]:
|
|
1449
|
+
x.axvline(min_quant.zDim, linestyle=':', color='r', label=f"Min Percentile: {float(min_quant):.3f}")
|
|
1450
|
+
x.axvline(max_quant.zDim, linestyle=':', color='g', label=f"Max Percentile: {float(max_quant):.3f}")
|
|
1451
|
+
x.axvline(average, linestyle='solid', color='gold', label=f"Average: {float(average):.3f}")
|
|
1452
|
+
x.grid(False)
|
|
1453
|
+
plt.title(f"MR z-spacing imaging summary for {wildcard}")
|
|
1454
|
+
plt.legend()
|
|
1455
|
+
plt.show()
|
|
1456
|
+
|
|
1457
|
+
# Summarizing xy-spacing
|
|
1458
|
+
df_xy = df_xy.describe(percentiles=[min_percentile, max_percentile], include='all')
|
|
1459
|
+
# Summarizing z-spacing
|
|
1460
|
+
df_z = df_z.describe(percentiles=[min_percentile, max_percentile], include='all')
|
|
1461
|
+
|
|
1462
|
+
# Saving data
|
|
1463
|
+
name_save = wildcard.replace('*', '').replace('.npy', '')
|
|
1464
|
+
save_name = 'imagingSummary__' + name_save + ".json"
|
|
1465
|
+
df_all = [df_years, df_fs, df_rt, df_et, df_it, df_etl, df_fa, df_na, df_xy, df_z]
|
|
1466
|
+
df_all = df_all[0].join(df_all[1:])
|
|
1467
|
+
df_all.to_json(path_save_checks / save_name, orient='columns', indent=4)
|
|
1468
|
+
|
|
1469
|
+
def perform_ct_imaging_summary(self,
|
|
1470
|
+
wildcards_scans: List[str],
|
|
1471
|
+
path_data: Path = None,
|
|
1472
|
+
path_save_checks: Path = None,
|
|
1473
|
+
min_percentile: float = 0.05,
|
|
1474
|
+
max_percentile: float = 0.95
|
|
1475
|
+
) -> None:
|
|
1476
|
+
"""
|
|
1477
|
+
Summarizes CT imaging acquisition parameters. Plots summary histograms
|
|
1478
|
+
for different dimensions and saves all acquisition parameters locally in JSON files.
|
|
1479
|
+
|
|
1480
|
+
Args:
|
|
1481
|
+
wildcards_scans (List[str]): List of wildcards that determines the scans
|
|
1482
|
+
that will be analyzed (Only MRI scans will be analyzed). You can learn more about wildcards in
|
|
1483
|
+
`this link <https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html>`_.
|
|
1484
|
+
For example: ``[\"STS*.CTscan.npy\"]``.
|
|
1485
|
+
path_data (Path, optional): Path to the MEDscan objects, if not specified will use ``path_save`` from the
|
|
1486
|
+
inner-class ``Paths`` in the current instance.
|
|
1487
|
+
path_save_checks (Path, optional): Path where to save the checks, if not specified will use the one
|
|
1488
|
+
in the current instance.
|
|
1489
|
+
min_percentile (float, optional): Minimum percentile to use for the histograms. Defaults to 0.05.
|
|
1490
|
+
max_percentile (float, optional): Maximum percentile to use for the histograms. Defaults to 0.95.
|
|
1491
|
+
|
|
1492
|
+
Returns:
|
|
1493
|
+
None.
|
|
1494
|
+
"""
|
|
1495
|
+
|
|
1496
|
+
class param:
|
|
1497
|
+
manufacturer = []
|
|
1498
|
+
dates = []
|
|
1499
|
+
kernel = []
|
|
1500
|
+
|
|
1501
|
+
class years:
|
|
1502
|
+
data = []
|
|
1503
|
+
class voltage:
|
|
1504
|
+
data = []
|
|
1505
|
+
class exposure:
|
|
1506
|
+
data = []
|
|
1507
|
+
class xyDim:
|
|
1508
|
+
data = []
|
|
1509
|
+
class zDim:
|
|
1510
|
+
data = []
|
|
1511
|
+
|
|
1512
|
+
if len(wildcards_scans) == 0:
|
|
1513
|
+
print('wildcards_scans is empty')
|
|
1514
|
+
|
|
1515
|
+
# wildcards checks:
|
|
1516
|
+
no_mr_scan = True
|
|
1517
|
+
for wildcard in wildcards_scans:
|
|
1518
|
+
if 'CTscan' in wildcard:
|
|
1519
|
+
no_mr_scan = False
|
|
1520
|
+
if no_mr_scan:
|
|
1521
|
+
raise ValueError(f"wildcards: {wildcards_scans} does not include CT scans. (Only CT scans are supported)")
|
|
1522
|
+
|
|
1523
|
+
# Initialization
|
|
1524
|
+
if path_data is None:
|
|
1525
|
+
if self.paths._path_save:
|
|
1526
|
+
path_data = Path(self.paths._path_save)
|
|
1527
|
+
else:
|
|
1528
|
+
print("No path to data was given and path save is None.")
|
|
1529
|
+
return 0
|
|
1530
|
+
|
|
1531
|
+
if not path_save_checks:
|
|
1532
|
+
if self.paths._path_save_checks:
|
|
1533
|
+
path_save_checks = Path(self.paths._path_save_checks)
|
|
1534
|
+
else:
|
|
1535
|
+
if (Path(os.getcwd()) / "checks").exists():
|
|
1536
|
+
path_save_checks = Path(os.getcwd()) / "checks"
|
|
1537
|
+
else:
|
|
1538
|
+
path_save_checks = (Path(os.getcwd()) / "checks").mkdir()
|
|
1539
|
+
|
|
1540
|
+
# Looping through all the different wildcards
|
|
1541
|
+
for i in tqdm(range(len(wildcards_scans))):
|
|
1542
|
+
wildcard = wildcards_scans[i]
|
|
1543
|
+
file_paths = get_file_paths(path_data, wildcard)
|
|
1544
|
+
n_files = len(file_paths)
|
|
1545
|
+
param.dates = np.zeros(n_files)
|
|
1546
|
+
param.years.data = np.zeros(n_files)
|
|
1547
|
+
param.years.data = np.multiply(param.years.data, np.NaN)
|
|
1548
|
+
param.manufacturer = [None] * n_files
|
|
1549
|
+
param.voltage.data = np.zeros(n_files)
|
|
1550
|
+
param.voltage.data = np.multiply(param.voltage.data, np.NaN)
|
|
1551
|
+
param.exposure.data = np.zeros(n_files)
|
|
1552
|
+
param.exposure.data = np.multiply(param.exposure.data, np.NaN)
|
|
1553
|
+
param.kernel = [None] * n_files
|
|
1554
|
+
param.xyDim.data = np.zeros(n_files)
|
|
1555
|
+
param.xyDim.data = np.multiply(param.xyDim.data, np.NaN)
|
|
1556
|
+
param.zDim.data = np.zeros(n_files)
|
|
1557
|
+
param.zDim.data = np.multiply(param.zDim.data, np.NaN)
|
|
1558
|
+
|
|
1559
|
+
# Loading and recording data
|
|
1560
|
+
for f in tqdm(range(n_files)):
|
|
1561
|
+
file = file_paths[f]
|
|
1562
|
+
|
|
1563
|
+
# Open file for warning
|
|
1564
|
+
try:
|
|
1565
|
+
warn_file = open(path_save_checks / 'imaging_summary_ct_warnings.txt', 'a')
|
|
1566
|
+
except IOError:
|
|
1567
|
+
print("Could not open file")
|
|
1568
|
+
|
|
1569
|
+
# Loading Data
|
|
1570
|
+
try:
|
|
1571
|
+
with open(path_data / file, 'rb') as fe: medscan = pickle.load(fe)
|
|
1572
|
+
print(f'Currently working on: {file}', file=warn_file)
|
|
1573
|
+
|
|
1574
|
+
# DICOM header
|
|
1575
|
+
info = medscan.dicomH[1]
|
|
1576
|
+
|
|
1577
|
+
# Recording dates
|
|
1578
|
+
try:
|
|
1579
|
+
param.dates[f] = info.AcquisitionDate
|
|
1580
|
+
except AttributeError:
|
|
1581
|
+
param.dates[f] = info.StudyDate
|
|
1582
|
+
# Recording years
|
|
1583
|
+
try:
|
|
1584
|
+
years = str(param.dates[f]) # Only the first four characters represent the years
|
|
1585
|
+
param.years.data[f] = years[0:4]
|
|
1586
|
+
except Exception as e:
|
|
1587
|
+
print(f'Cannot read dates of : {file}. Error: {e}', file=warn_file)
|
|
1588
|
+
# Recording manufacturers
|
|
1589
|
+
try:
|
|
1590
|
+
param.manufacturer[f] = info.Manufacturer
|
|
1591
|
+
except Exception as e:
|
|
1592
|
+
print(f'Cannot read Manufacturer of: {file}. Error: {e}', file=warn_file)
|
|
1593
|
+
# Recording voltage
|
|
1594
|
+
try:
|
|
1595
|
+
param.voltage.data[f] = info.KVP
|
|
1596
|
+
except Exception as e:
|
|
1597
|
+
print(f'Cannot read voltage of: {file}. Error: {e}', file=warn_file)
|
|
1598
|
+
# Recording exposure
|
|
1599
|
+
try:
|
|
1600
|
+
param.exposure.data[f] = info.Exposure
|
|
1601
|
+
except Exception as e:
|
|
1602
|
+
print(f'Cannot read exposure of: {file}. Error: {e}', file=warn_file)
|
|
1603
|
+
# Recording reconstruction kernel
|
|
1604
|
+
try:
|
|
1605
|
+
param.kernel[f] = info.ConvolutionKernel
|
|
1606
|
+
except Exception as e:
|
|
1607
|
+
print(f'Cannot read Kernel of: {file}. Error: {e}', file=warn_file)
|
|
1608
|
+
# Recording xy spacing
|
|
1609
|
+
try:
|
|
1610
|
+
param.xyDim.data[f] = medscan.data.volume.spatialRef.PixelExtentInWorldX
|
|
1611
|
+
except Exception as e:
|
|
1612
|
+
print(f'Cannot read x spacing of: {file}. Error: {e}', file=warn_file)
|
|
1613
|
+
# Recording z spacing
|
|
1614
|
+
try:
|
|
1615
|
+
param.zDim.data[f] = medscan.data.volume.spatialRef.PixelExtentInWorldZ
|
|
1616
|
+
except Exception as e:
|
|
1617
|
+
print(f'Cannot read z spacing of: {file}. Error: {e}', file=warn_file)
|
|
1618
|
+
except Exception as e:
|
|
1619
|
+
print(f'Cannot load file: {file}', file=warn_file)
|
|
1620
|
+
|
|
1621
|
+
warn_file.close()
|
|
1622
|
+
|
|
1623
|
+
# Summarize data
|
|
1624
|
+
# Summarizing years
|
|
1625
|
+
df_years = pd.DataFrame(param.years.data, columns=['years']).describe(percentiles=[min_percentile, max_percentile], include='all')
|
|
1626
|
+
# Summarizing voltage
|
|
1627
|
+
df_voltage = pd.DataFrame(param.voltage.data, columns=['voltage']).describe(percentiles=[min_percentile, max_percentile], include='all')
|
|
1628
|
+
# Summarizing exposure
|
|
1629
|
+
df_exposure = pd.DataFrame(param.exposure.data, columns=['exposure']).describe(percentiles=[min_percentile, max_percentile], include='all')
|
|
1630
|
+
# Summarizing kernel
|
|
1631
|
+
df_kernel = pd.DataFrame(param.kernel, columns=['kernel']).describe(percentiles=[min_percentile, max_percentile], include='all')
|
|
1632
|
+
# Summarize xy spacing
|
|
1633
|
+
df_xy = pd.DataFrame(param.xyDim.data, columns=['xyDim']).describe(percentiles=[min_percentile, max_percentile], include='all')
|
|
1634
|
+
# Summarize z spacing
|
|
1635
|
+
df_z = pd.DataFrame(param.zDim.data, columns=['zDim']).describe(percentiles=[min_percentile, max_percentile], include='all')
|
|
1636
|
+
# Summarizing xy-spacing
|
|
1637
|
+
df_xy = pd.DataFrame(param.xyDim.data, columns=['xyDim'])
|
|
1638
|
+
# Summarizing z-spacing
|
|
1639
|
+
df_z = pd.DataFrame(param.zDim.data, columns=['zDim'])
|
|
1640
|
+
|
|
1641
|
+
# Plotting xy-spacing histogram
|
|
1642
|
+
ax = df_xy.hist(column='xyDim')
|
|
1643
|
+
min_quant, max_quant, average = df_xy.quantile(min_percentile), df_xy.quantile(max_percentile), param.xyDim.data.mean()
|
|
1644
|
+
for x in ax[0]:
|
|
1645
|
+
x.axvline(min_quant.xyDim, linestyle=':', color='r', label=f"Min Percentile: {float(min_quant):.3f}")
|
|
1646
|
+
x.axvline(max_quant.xyDim, linestyle=':', color='g', label=f"Max Percentile: {float(max_quant):.3f}")
|
|
1647
|
+
x.axvline(average, linestyle='solid', color='gold', label=f"Average: {float(average):.3f}")
|
|
1648
|
+
x.grid(False)
|
|
1649
|
+
plt.title(f"CT xy-spacing imaging summary for {wildcard}")
|
|
1650
|
+
plt.legend()
|
|
1651
|
+
plt.show()
|
|
1652
|
+
|
|
1653
|
+
# Plotting z-spacing histogram
|
|
1654
|
+
ax = df_z.hist(column='zDim')
|
|
1655
|
+
min_quant, max_quant, average = df_z.quantile(min_percentile), df_z.quantile(max_percentile), param.zDim.data.mean()
|
|
1656
|
+
for x in ax[0]:
|
|
1657
|
+
x.axvline(min_quant.zDim, linestyle=':', color='r', label=f"Min Percentile: {float(min_quant):.3f}")
|
|
1658
|
+
x.axvline(max_quant.zDim, linestyle=':', color='g', label=f"Max Percentile: {float(max_quant):.3f}")
|
|
1659
|
+
x.axvline(average, linestyle='solid', color='gold', label=f"Average: {float(average):.3f}")
|
|
1660
|
+
x.grid(False)
|
|
1661
|
+
plt.title(f"CT z-spacing imaging summary for {wildcard}")
|
|
1662
|
+
plt.legend()
|
|
1663
|
+
plt.show()
|
|
1664
|
+
|
|
1665
|
+
# Summarizing xy-spacing
|
|
1666
|
+
df_xy = df_xy.describe(percentiles=[min_percentile, max_percentile], include='all')
|
|
1667
|
+
# Summarizing z-spacing
|
|
1668
|
+
df_z = df_z.describe(percentiles=[min_percentile, max_percentile], include='all')
|
|
1669
|
+
|
|
1670
|
+
# Saving data
|
|
1671
|
+
name_save = wildcard.replace('*', '').replace('.npy', '')
|
|
1672
|
+
save_name = 'imagingSummary__' + name_save + ".json"
|
|
1673
|
+
df_all = [df_years, df_voltage, df_exposure, df_kernel, df_xy, df_z]
|
|
1674
|
+
df_all = df_all[0].join(df_all[1:])
|
|
1675
|
+
df_all.to_json(path_save_checks / save_name, orient='columns', indent=4)
|
|
1676
|
+
|
|
1677
|
+
def perform_imaging_summary(self,
|
|
1678
|
+
wildcards_scans: List[str],
|
|
1679
|
+
path_data: Path = None,
|
|
1680
|
+
path_save_checks: Path = None,
|
|
1681
|
+
min_percentile: float = 0.05,
|
|
1682
|
+
max_percentile: float = 0.95
|
|
1683
|
+
) -> None:
|
|
1684
|
+
"""
|
|
1685
|
+
Summarizes CT and MR imaging acquisition parameters. Plots summary histograms
|
|
1686
|
+
for different dimensions and saves all acquisition parameters locally in JSON files.
|
|
1687
|
+
|
|
1688
|
+
Args:
|
|
1689
|
+
wildcards_scans (List[str]): List of wildcards that determines the scans
|
|
1690
|
+
that will be analyzed (CT and MRI scans will be analyzed). You can learn more about wildcards in
|
|
1691
|
+
`this link <https://www.linuxtechtips.com/2013/11/how-wildcards-work-in-linux-and-unix.html>`_.
|
|
1692
|
+
For example: ``[\"STS*.CTscan.npy\", \"STS*.MRscan.npy\"]``.
|
|
1693
|
+
path_data (Path, optional): Path to the MEDscan objects, if not specified will use ``path_save`` from the
|
|
1694
|
+
inner-class ``Paths`` in the current instance.
|
|
1695
|
+
path_save_checks (Path, optional): Path where to save the checks, if not specified will use the one
|
|
1696
|
+
in the current instance.
|
|
1697
|
+
min_percentile (float, optional): Minimum percentile to use for the histograms. Defaults to 0.05.
|
|
1698
|
+
max_percentile (float, optional): Maximum percentile to use for the histograms. Defaults to 0.95.
|
|
1699
|
+
|
|
1700
|
+
Returns:
|
|
1701
|
+
None.
|
|
1702
|
+
"""
|
|
1703
|
+
# MR imaging summary
|
|
1704
|
+
wildcards_scans_mr = [wildcard for wildcard in wildcards_scans if 'MRscan' in wildcard]
|
|
1705
|
+
if len(wildcards_scans_mr) == 0:
|
|
1706
|
+
print("Cannot perform imaging summary for MR, no MR scan wildcard was given! ")
|
|
1707
|
+
else:
|
|
1708
|
+
self.perform_mr_imaging_summary(
|
|
1709
|
+
wildcards_scans_mr,
|
|
1710
|
+
path_data,
|
|
1711
|
+
path_save_checks,
|
|
1712
|
+
min_percentile,
|
|
1713
|
+
max_percentile)
|
|
1714
|
+
# CT imaging summary
|
|
1715
|
+
wildcards_scans_ct = [wildcard for wildcard in wildcards_scans if 'CTscan' in wildcard]
|
|
1716
|
+
if len(wildcards_scans_ct) == 0:
|
|
1717
|
+
print("Cannot perform imaging summary for CT, no CT scan wildcard was given! ")
|
|
1718
|
+
else:
|
|
1719
|
+
self.perform_ct_imaging_summary(
|
|
1720
|
+
wildcards_scans_ct,
|
|
1721
|
+
path_data,
|
|
1722
|
+
path_save_checks,
|
|
1723
|
+
min_percentile,
|
|
1724
|
+
max_percentile)
|