fastMONAI 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fastMONAI/__init__.py +1 -1
- fastMONAI/_modidx.py +6 -20
- fastMONAI/dataset_info.py +58 -50
- fastMONAI/external_data.py +181 -91
- fastMONAI/utils.py +10 -12
- fastMONAI/vision_augmentation.py +160 -139
- fastMONAI/vision_core.py +43 -27
- fastMONAI/vision_data.py +175 -85
- fastMONAI/vision_inference.py +37 -22
- fastMONAI/vision_loss.py +51 -42
- fastMONAI/vision_metrics.py +46 -23
- fastMONAI/vision_plot.py +15 -13
- {fastMONAI-0.3.1.dist-info → fastMONAI-0.3.3.dist-info}/METADATA +1 -1
- fastMONAI-0.3.3.dist-info/RECORD +20 -0
- fastMONAI-0.3.1.dist-info/RECORD +0 -20
- {fastMONAI-0.3.1.dist-info → fastMONAI-0.3.3.dist-info}/LICENSE +0 -0
- {fastMONAI-0.3.1.dist-info → fastMONAI-0.3.3.dist-info}/WHEEL +0 -0
- {fastMONAI-0.3.1.dist-info → fastMONAI-0.3.3.dist-info}/entry_points.txt +0 -0
- {fastMONAI-0.3.1.dist-info → fastMONAI-0.3.3.dist-info}/top_level.txt +0 -0
fastMONAI/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.3.
|
|
1
|
+
__version__ = "0.3.3"
|
fastMONAI/_modidx.py
CHANGED
|
@@ -29,16 +29,18 @@ d = { 'settings': { 'branch': 'master',
|
|
|
29
29
|
'fastMONAI/external_data.py'),
|
|
30
30
|
'fastMONAI.external_data._process_ixi_xls': ( 'external_data.html#_process_ixi_xls',
|
|
31
31
|
'fastMONAI/external_data.py'),
|
|
32
|
-
'fastMONAI.external_data.
|
|
33
|
-
|
|
34
|
-
'fastMONAI.external_data.
|
|
35
|
-
|
|
32
|
+
'fastMONAI.external_data._process_medmnist_img': ( 'external_data.html#_process_medmnist_img',
|
|
33
|
+
'fastMONAI/external_data.py'),
|
|
34
|
+
'fastMONAI.external_data.download_example_endometrial_cancer_data': ( 'external_data.html#download_example_endometrial_cancer_data',
|
|
35
|
+
'fastMONAI/external_data.py'),
|
|
36
36
|
'fastMONAI.external_data.download_example_spine_data': ( 'external_data.html#download_example_spine_data',
|
|
37
37
|
'fastMONAI/external_data.py'),
|
|
38
38
|
'fastMONAI.external_data.download_ixi_data': ( 'external_data.html#download_ixi_data',
|
|
39
39
|
'fastMONAI/external_data.py'),
|
|
40
40
|
'fastMONAI.external_data.download_ixi_tiny': ( 'external_data.html#download_ixi_tiny',
|
|
41
41
|
'fastMONAI/external_data.py'),
|
|
42
|
+
'fastMONAI.external_data.download_medmnist3d_dataset': ( 'external_data.html#download_medmnist3d_dataset',
|
|
43
|
+
'fastMONAI/external_data.py'),
|
|
42
44
|
'fastMONAI.external_data.download_spine_test_data': ( 'external_data.html#download_spine_test_data',
|
|
43
45
|
'fastMONAI/external_data.py')},
|
|
44
46
|
'fastMONAI.research_utils': { 'fastMONAI.research_utils.pred_postprocess': ( 'research_utils.html#pred_postprocess',
|
|
@@ -131,22 +133,6 @@ d = { 'settings': { 'branch': 'master',
|
|
|
131
133
|
'fastMONAI/vision_augmentation.py'),
|
|
132
134
|
'fastMONAI.vision_augmentation.ZNormalization.encodes': ( 'vision_augment.html#znormalization.encodes',
|
|
133
135
|
'fastMONAI/vision_augmentation.py'),
|
|
134
|
-
'fastMONAI.vision_augmentation._do_rand_biasfield': ( 'vision_augment.html#_do_rand_biasfield',
|
|
135
|
-
'fastMONAI/vision_augmentation.py'),
|
|
136
|
-
'fastMONAI.vision_augmentation._do_rand_blur': ( 'vision_augment.html#_do_rand_blur',
|
|
137
|
-
'fastMONAI/vision_augmentation.py'),
|
|
138
|
-
'fastMONAI.vision_augmentation._do_rand_gamma': ( 'vision_augment.html#_do_rand_gamma',
|
|
139
|
-
'fastMONAI/vision_augmentation.py'),
|
|
140
|
-
'fastMONAI.vision_augmentation._do_rand_ghosting': ( 'vision_augment.html#_do_rand_ghosting',
|
|
141
|
-
'fastMONAI/vision_augmentation.py'),
|
|
142
|
-
'fastMONAI.vision_augmentation._do_rand_motion': ( 'vision_augment.html#_do_rand_motion',
|
|
143
|
-
'fastMONAI/vision_augmentation.py'),
|
|
144
|
-
'fastMONAI.vision_augmentation._do_rand_noise': ( 'vision_augment.html#_do_rand_noise',
|
|
145
|
-
'fastMONAI/vision_augmentation.py'),
|
|
146
|
-
'fastMONAI.vision_augmentation._do_rand_spike': ( 'vision_augment.html#_do_rand_spike',
|
|
147
|
-
'fastMONAI/vision_augmentation.py'),
|
|
148
|
-
'fastMONAI.vision_augmentation._do_z_normalization': ( 'vision_augment.html#_do_z_normalization',
|
|
149
|
-
'fastMONAI/vision_augmentation.py'),
|
|
150
136
|
'fastMONAI.vision_augmentation.do_pad_or_crop': ( 'vision_augment.html#do_pad_or_crop',
|
|
151
137
|
'fastMONAI/vision_augmentation.py')},
|
|
152
138
|
'fastMONAI.vision_core': { 'fastMONAI.vision_core.MedBase': ('vision_core.html#medbase', 'fastMONAI/vision_core.py'),
|
fastMONAI/dataset_info.py
CHANGED
|
@@ -14,18 +14,23 @@ import torch
|
|
|
14
14
|
import glob
|
|
15
15
|
|
|
16
16
|
# %% ../nbs/08_dataset_info.ipynb 4
|
|
17
|
-
class MedDataset
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def __init__(self, path=None, # Path to the image folder
|
|
21
|
-
postfix:str='', # Specify the file type if there are different files in the folder
|
|
22
|
-
img_list:list=None, # Alternatively pass in a list with image paths
|
|
23
|
-
reorder:bool=False, # Whether to reorder the data to be closest to canonical (RAS+) orientation
|
|
24
|
-
dtype:(MedImage, MedMask)=MedImage, # Load data as datatype
|
|
25
|
-
max_workers:int=1 # The number of worker threads
|
|
26
|
-
):
|
|
27
|
-
'''Constructs all the necessary attributes for the MedDataset object.'''
|
|
17
|
+
class MedDataset:
|
|
18
|
+
"""A class to extract and present information about the dataset."""
|
|
28
19
|
|
|
20
|
+
def __init__(self, path=None, postfix: str = '', img_list: list = None,
|
|
21
|
+
reorder: bool = False, dtype: (MedImage, MedMask) = MedImage,
|
|
22
|
+
max_workers: int = 1):
|
|
23
|
+
"""Constructs MedDataset object.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
path (str, optional): Path to the image folder.
|
|
27
|
+
postfix (str, optional): Specify the file type if there are different files in the folder.
|
|
28
|
+
img_list (List[str], optional): Alternatively, pass in a list with image paths.
|
|
29
|
+
reorder (bool, optional): Whether to reorder the data to be closest to canonical (RAS+) orientation.
|
|
30
|
+
dtype (Union[MedImage, MedMask], optional): Load data as datatype. Default is MedImage.
|
|
31
|
+
max_workers (int, optional): The number of worker threads. Default is 1.
|
|
32
|
+
"""
|
|
33
|
+
|
|
29
34
|
self.path = path
|
|
30
35
|
self.postfix = postfix
|
|
31
36
|
self.img_list = img_list
|
|
@@ -35,48 +40,43 @@ class MedDataset():
|
|
|
35
40
|
self.df = self._create_data_frame()
|
|
36
41
|
|
|
37
42
|
def _create_data_frame(self):
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
Returns:
|
|
41
|
-
DataFrame: A DataFrame with information about the dataset.
|
|
42
|
-
'''
|
|
43
|
+
"""Private method that returns a dataframe with information about the dataset."""
|
|
43
44
|
|
|
44
45
|
if self.path:
|
|
45
46
|
self.img_list = glob.glob(f'{self.path}/*{self.postfix}*')
|
|
46
47
|
if not self.img_list: print('Could not find images. Check the image path')
|
|
47
|
-
|
|
48
|
+
|
|
48
49
|
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
|
49
50
|
data_info_dict = list(executor.map(self._get_data_info, self.img_list))
|
|
50
|
-
|
|
51
|
+
|
|
51
52
|
df = pd.DataFrame(data_info_dict)
|
|
52
|
-
|
|
53
|
+
|
|
54
|
+
if df.orientation.nunique() > 1:
|
|
55
|
+
print('The volumes in this dataset have different orientations. '
|
|
56
|
+
'Recommended to pass in the argument reorder=True when creating a MedDataset object for this dataset')
|
|
57
|
+
|
|
53
58
|
return df
|
|
54
59
|
|
|
55
60
|
def summary(self):
|
|
56
|
-
|
|
57
|
-
|
|
61
|
+
"""Summary DataFrame of the dataset with example path for similar data."""
|
|
62
|
+
|
|
58
63
|
columns = ['dim_0', 'dim_1', 'dim_2', 'voxel_0', 'voxel_1', 'voxel_2', 'orientation']
|
|
59
|
-
|
|
64
|
+
|
|
65
|
+
return self.df.groupby(columns, as_index=False).agg(
|
|
66
|
+
example_path=('path', 'min'), total=('path', 'size')
|
|
67
|
+
).sort_values('total', ascending=False)
|
|
60
68
|
|
|
61
69
|
def suggestion(self):
|
|
62
|
-
|
|
70
|
+
"""Voxel value that appears most often in dim_0, dim_1 and dim_2, and whether the data should be reoriented."""
|
|
71
|
+
|
|
63
72
|
resample = [self.df.voxel_0.mode()[0], self.df.voxel_1.mode()[0], self.df.voxel_2.mode()[0]]
|
|
64
|
-
|
|
65
73
|
return resample, self.reorder
|
|
66
74
|
|
|
67
|
-
def _get_data_info(self, fn:str):
|
|
68
|
-
|
|
75
|
+
def _get_data_info(self, fn: str):
|
|
76
|
+
"""Private method to collect information about an image file."""
|
|
77
|
+
_, o, _ = med_img_reader(fn, dtype=self.dtype, reorder=self.reorder, only_tensor=False)
|
|
69
78
|
|
|
70
|
-
|
|
71
|
-
fn: Image file path.
|
|
72
|
-
|
|
73
|
-
Returns:
|
|
74
|
-
dict: A dictionary with information about the image file
|
|
75
|
-
'''
|
|
76
|
-
|
|
77
|
-
_,o,_ = med_img_reader(fn, dtype=self.dtype, reorder=self.reorder, only_tensor=False)
|
|
78
|
-
|
|
79
|
-
info_dict = {'path': fn, 'dim_0': o.shape[1], 'dim_1': o.shape[2], 'dim_2' :o.shape[3],
|
|
79
|
+
info_dict = {'path': fn, 'dim_0': o.shape[1], 'dim_1': o.shape[2], 'dim_2': o.shape[3],
|
|
80
80
|
'voxel_0': round(o.spacing[0], 4), 'voxel_1': round(o.spacing[1], 4), 'voxel_2': round(o.spacing[2], 4),
|
|
81
81
|
'orientation': f'{"".join(o.orientation)}+'}
|
|
82
82
|
|
|
@@ -87,28 +87,36 @@ class MedDataset():
|
|
|
87
87
|
|
|
88
88
|
return info_dict
|
|
89
89
|
|
|
90
|
-
def get_largest_img_size(self,
|
|
91
|
-
|
|
92
|
-
) -> list:
|
|
93
|
-
'''Get the largest image size in the dataset.'''
|
|
94
|
-
dims = None
|
|
90
|
+
def get_largest_img_size(self, resample: list = None) -> list:
|
|
91
|
+
"""Get the largest image size in the dataset."""
|
|
95
92
|
|
|
96
|
-
|
|
97
|
-
|
|
93
|
+
dims = None
|
|
94
|
+
|
|
95
|
+
if resample is not None:
|
|
98
96
|
org_voxels = self.df[["voxel_0", "voxel_1", 'voxel_2']].values
|
|
99
97
|
org_dims = self.df[["dim_0", "dim_1", 'dim_2']].values
|
|
100
|
-
|
|
98
|
+
|
|
101
99
|
ratio = org_voxels/resample
|
|
102
100
|
new_dims = (org_dims * ratio).T
|
|
103
101
|
dims = [new_dims[0].max().round(), new_dims[1].max().round(), new_dims[2].max().round()]
|
|
104
|
-
|
|
105
|
-
else:
|
|
106
|
-
|
|
102
|
+
|
|
103
|
+
else:
|
|
104
|
+
dims = [df.dim_0.max(), df.dim_1.max(), df.dim_2.max()]
|
|
105
|
+
|
|
107
106
|
return dims
|
|
108
107
|
|
|
109
108
|
# %% ../nbs/08_dataset_info.ipynb 5
|
|
110
|
-
def get_class_weights(
|
|
111
|
-
|
|
109
|
+
def get_class_weights(labels: (np.array, list), class_weight: str = 'balanced') -> torch.Tensor:
|
|
110
|
+
"""Calculates and returns the class weights.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
labels: An array or list of class labels for each instance in the dataset.
|
|
114
|
+
class_weight: Defaults to 'balanced'.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
A tensor of class weights.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
class_weights = compute_class_weight(class_weight=class_weight, classes=np.unique(labels), y=labels)
|
|
112
121
|
|
|
113
|
-
class_weights = compute_class_weight(class_weight=class_weight, classes=np.unique(train_labels), y=train_labels)
|
|
114
122
|
return torch.Tensor(class_weights)
|
fastMONAI/external_data.py
CHANGED
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
# %% auto 0
|
|
4
4
|
__all__ = ['MURLs', 'download_ixi_data', 'download_ixi_tiny', 'download_spine_test_data', 'download_example_spine_data',
|
|
5
|
-
'
|
|
5
|
+
'download_medmnist3d_dataset', 'download_example_endometrial_cancer_data']
|
|
6
6
|
|
|
7
|
-
# %% ../nbs/09_external_data.ipynb
|
|
7
|
+
# %% ../nbs/09_external_data.ipynb 1
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from glob import glob
|
|
10
10
|
from numpy import load
|
|
@@ -15,27 +15,36 @@ from torchio import ScalarImage
|
|
|
15
15
|
import multiprocessing as mp
|
|
16
16
|
from functools import partial
|
|
17
17
|
|
|
18
|
-
# %% ../nbs/09_external_data.ipynb
|
|
18
|
+
# %% ../nbs/09_external_data.ipynb 3
|
|
19
19
|
class MURLs():
|
|
20
|
-
|
|
20
|
+
"""A class with external medical dataset URLs."""
|
|
21
21
|
|
|
22
22
|
IXI_DATA = 'http://biomedic.doc.ic.ac.uk/brain-development/downloads/IXI/IXI-T1.tar'
|
|
23
23
|
IXI_DEMOGRAPHIC_INFORMATION = 'http://biomedic.doc.ic.ac.uk/brain-development/downloads/IXI/IXI.xls'
|
|
24
24
|
CHENGWEN_CHU_SPINE_DATA = 'https://drive.google.com/uc?id=1rbm9-KKAexpNm2mC9FsSbfnS8VJaF3Kn&confirm=t'
|
|
25
25
|
EXAMPLE_SPINE_DATA = 'https://drive.google.com/uc?id=1Ms3Q6MYQrQUA_PKZbJ2t2NeYFQ5jloMh'
|
|
26
|
-
|
|
26
|
+
MEDMNIST_DICT = {'OrganMNIST3D': 'https://zenodo.org/record/6496656/files/organmnist3d.npz?download=1',
|
|
27
|
+
'NoduleMNIST3D': 'https://zenodo.org/record/6496656/files/nodulemnist3d.npz?download=1',
|
|
28
|
+
'AdrenalMNIST3D': 'https://zenodo.org/record/6496656/files/adrenalmnist3d.npz?download=1',
|
|
29
|
+
'FractureMNIST3D': 'https://zenodo.org/record/6496656/files/fracturemnist3d.npz?download=1',
|
|
30
|
+
'VesselMNIST3D': 'https://zenodo.org/record/6496656/files/vesselmnist3d.npz?download=1',
|
|
31
|
+
'SynapseMNIST3D': 'https://zenodo.org/record/6496656/files/synapsemnist3d.npz?download=1'}
|
|
32
|
+
EXAMPLE_EC_DATA = 'https://drive.google.com/uc?id=1cjOBhkdRsoX3unxHiL377R5j8ottN4An'
|
|
27
33
|
|
|
28
|
-
# %% ../nbs/09_external_data.ipynb
|
|
29
|
-
def _process_ixi_xls(xls_path:(str, Path), img_path: Path):
|
|
30
|
-
|
|
34
|
+
# %% ../nbs/09_external_data.ipynb 4
|
|
35
|
+
def _process_ixi_xls(xls_path: (str, Path), img_path: Path) -> pd.DataFrame:
|
|
36
|
+
"""Private method to process the demographic information for the IXI dataset.
|
|
31
37
|
|
|
32
38
|
Args:
|
|
33
39
|
xls_path: File path to the xls file with the demographic information.
|
|
34
|
-
img_path: Folder path to the images
|
|
40
|
+
img_path: Folder path to the images.
|
|
35
41
|
|
|
36
42
|
Returns:
|
|
37
|
-
|
|
38
|
-
|
|
43
|
+
A processed dataframe with image path and demographic information.
|
|
44
|
+
|
|
45
|
+
Raises:
|
|
46
|
+
ValueError: If xls_path or img_path do not exist.
|
|
47
|
+
"""
|
|
39
48
|
|
|
40
49
|
print('Preprocessing ' + str(xls_path))
|
|
41
50
|
|
|
@@ -45,14 +54,14 @@ def _process_ixi_xls(xls_path:(str, Path), img_path: Path):
|
|
|
45
54
|
|
|
46
55
|
for subject_id in duplicate_subject_ids:
|
|
47
56
|
age = df.loc[df.IXI_ID == subject_id].AGE.nunique()
|
|
48
|
-
if age != 1: df = df.loc[df.IXI_ID != subject_id]
|
|
57
|
+
if age != 1: df = df.loc[df.IXI_ID != subject_id] # Remove duplicates with two different age values
|
|
49
58
|
|
|
50
59
|
df = df.drop_duplicates(subset='IXI_ID', keep='first').reset_index(drop=True)
|
|
51
60
|
|
|
52
61
|
df['subject_id'] = ['IXI' + str(subject_id).zfill(3) for subject_id in df.IXI_ID.values]
|
|
53
62
|
df = df.rename(columns={'SEX_ID (1=m, 2=f)': 'gender'})
|
|
54
63
|
df['age_at_scan'] = df.AGE.round(2)
|
|
55
|
-
df = df.replace({'gender': {1:'M', 2:'F'}})
|
|
64
|
+
df = df.replace({'gender': {1: 'M', 2: 'F'}})
|
|
56
65
|
|
|
57
66
|
img_list = list(img_path.glob('*.nii.gz'))
|
|
58
67
|
for path in img_list:
|
|
@@ -61,50 +70,58 @@ def _process_ixi_xls(xls_path:(str, Path), img_path: Path):
|
|
|
61
70
|
|
|
62
71
|
df = df.dropna()
|
|
63
72
|
df = df[['t1_path', 'subject_id', 'gender', 'age_at_scan']]
|
|
73
|
+
|
|
64
74
|
return df
|
|
65
75
|
|
|
66
|
-
# %% ../nbs/09_external_data.ipynb
|
|
67
|
-
def download_ixi_data(path:(str, Path)='../data'
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
76
|
+
# %% ../nbs/09_external_data.ipynb 6
|
|
77
|
+
def download_ixi_data(path: (str, Path) = '../data') -> Path:
|
|
78
|
+
"""Download T1 scans and demographic information from the IXI dataset.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
path: Path to the directory where the data will be stored. Defaults to '../data'.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
The path to the stored CSV file.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
path = Path(path) / 'IXI'
|
|
88
|
+
img_path = path / 'T1_images'
|
|
75
89
|
|
|
76
90
|
# Check whether image data already present in img_path:
|
|
77
|
-
is_extracted=False
|
|
91
|
+
is_extracted = False
|
|
78
92
|
try:
|
|
79
|
-
if len(list(img_path.iterdir())) >= 581:
|
|
80
|
-
is_extracted=True
|
|
93
|
+
if len(list(img_path.iterdir())) >= 581: # 581 imgs in the IXI dataset
|
|
94
|
+
is_extracted = True
|
|
81
95
|
print(f"Images already downloaded and extracted to {img_path}")
|
|
82
96
|
except:
|
|
83
|
-
is_extracted=False
|
|
84
|
-
|
|
85
|
-
# Download and extract images
|
|
86
|
-
if not is_extracted:
|
|
87
|
-
download_and_extract(url=MURLs.IXI_DATA, filepath=path/'IXI-T1.tar', output_dir=img_path)
|
|
88
|
-
(path/'IXI-T1.tar').unlink()
|
|
97
|
+
is_extracted = False
|
|
89
98
|
|
|
99
|
+
if not is_extracted:
|
|
100
|
+
download_and_extract(url=MURLs.IXI_DATA, filepath=path / 'IXI-T1.tar', output_dir=img_path)
|
|
101
|
+
(path / 'IXI-T1.tar').unlink()
|
|
90
102
|
|
|
91
|
-
|
|
92
|
-
download_url(url=MURLs.IXI_DEMOGRAPHIC_INFORMATION, filepath=path/'IXI.xls')
|
|
103
|
+
download_url(url=MURLs.IXI_DEMOGRAPHIC_INFORMATION, filepath=path / 'IXI.xls')
|
|
93
104
|
|
|
94
|
-
processed_df = _process_ixi_xls(xls_path=path/'IXI.xls', img_path=img_path)
|
|
95
|
-
processed_df.to_csv(path/'dataset.csv',index=False)
|
|
105
|
+
processed_df = _process_ixi_xls(xls_path=path / 'IXI.xls', img_path=img_path)
|
|
106
|
+
processed_df.to_csv(path / 'dataset.csv', index=False)
|
|
96
107
|
|
|
97
108
|
return path
|
|
98
109
|
|
|
99
|
-
# %% ../nbs/09_external_data.ipynb
|
|
100
|
-
def download_ixi_tiny(path:(str, Path)='../data'):
|
|
101
|
-
|
|
110
|
+
# %% ../nbs/09_external_data.ipynb 8
|
|
111
|
+
def download_ixi_tiny(path: (str, Path) = '../data') -> Path:
|
|
112
|
+
"""Download the tiny version of the IXI dataset provided by TorchIO.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
path: The directory where the data will be
|
|
116
|
+
stored. If not provided, defaults to '../data'.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
The path to the directory where the data is stored.
|
|
120
|
+
"""
|
|
102
121
|
|
|
103
|
-
path = Path(path)/'IXITiny'
|
|
122
|
+
path = Path(path) / 'IXITiny'
|
|
104
123
|
|
|
105
|
-
#Download MR scans and segmentation masks
|
|
106
124
|
IXITiny(root=str(path), download=True)
|
|
107
|
-
# Download demographic info
|
|
108
125
|
download_url(url=MURLs.IXI_DEMOGRAPHIC_INFORMATION, filepath=path/'IXI.xls')
|
|
109
126
|
|
|
110
127
|
processed_df = _process_ixi_xls(xls_path=path/'IXI.xls', img_path=path/'image')
|
|
@@ -115,96 +132,169 @@ def download_ixi_tiny(path:(str, Path)='../data'):
|
|
|
115
132
|
return path
|
|
116
133
|
|
|
117
134
|
# %% ../nbs/09_external_data.ipynb 10
|
|
118
|
-
def _create_spine_df(
|
|
119
|
-
|
|
120
|
-
img_list = glob(str(test_dir/'img/*.nii.gz'))
|
|
135
|
+
def _create_spine_df(dir: Path) -> pd.DataFrame:
|
|
136
|
+
"""Create a pandas DataFrame containing information about spinal images.
|
|
121
137
|
|
|
122
|
-
|
|
123
|
-
|
|
138
|
+
Args:
|
|
139
|
+
dir: Directory path where data (image and segmentation
|
|
140
|
+
mask files) are stored.
|
|
124
141
|
|
|
125
|
-
|
|
142
|
+
Returns:
|
|
143
|
+
A DataFrame containing the paths to the image files and their
|
|
144
|
+
corresponding mask files, the subject IDs, and a flag indicating that
|
|
145
|
+
these are test data.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
img_list = glob(str(dir / 'img/*.nii.gz'))
|
|
149
|
+
mask_list = [str(fn).replace('img', 'seg') for fn in img_list]
|
|
126
150
|
subject_id_list = [fn.split('_')[-1].split('.')[0] for fn in mask_list]
|
|
127
151
|
|
|
128
|
-
|
|
129
|
-
|
|
152
|
+
test_data = {
|
|
153
|
+
't2_img_path': img_list,
|
|
154
|
+
't2_mask_path': mask_list,
|
|
155
|
+
'subject_id': subject_id_list,
|
|
156
|
+
'is_test': True,
|
|
157
|
+
}
|
|
130
158
|
|
|
131
|
-
# Create a DataFrame from the example data dictionary
|
|
132
159
|
return pd.DataFrame(test_data)
|
|
133
160
|
|
|
134
|
-
# %% ../nbs/09_external_data.ipynb
|
|
135
|
-
def download_spine_test_data(path:(str, Path)='../data'):
|
|
161
|
+
# %% ../nbs/09_external_data.ipynb 11
|
|
162
|
+
def download_spine_test_data(path: (str, Path) = '../data') -> pd.DataFrame:
|
|
163
|
+
"""Downloads T2w scans from the study 'Fully Automatic Localization and
|
|
164
|
+
Segmentation of 3D Vertebral Bodies from CT/MR Images via a Learning-Based
|
|
165
|
+
Method' by Chu et. al.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
path: Directory where the downloaded data
|
|
169
|
+
will be stored and extracted. Defaults to '../data'.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Processed dataframe containing image paths, label paths, and subject IDs.
|
|
173
|
+
"""
|
|
136
174
|
|
|
137
|
-
''' Download T2w scans from 'Fully Automatic Localization and Segmentation of 3D Vertebral Bodies from CT/MR Images via a Learning-Based Method' study by Chu et. al.
|
|
138
|
-
Returns a processed dataframe with image path, label path and subject IDs.
|
|
139
|
-
'''
|
|
140
175
|
study = 'chengwen_chu_2015'
|
|
141
176
|
|
|
142
|
-
download_and_extract(
|
|
177
|
+
download_and_extract(
|
|
178
|
+
url=MURLs.CHENGWEN_CHU_SPINE_DATA,
|
|
179
|
+
filepath=f'{study}.zip',
|
|
180
|
+
output_dir=path
|
|
181
|
+
)
|
|
143
182
|
Path(f'{study}.zip').unlink()
|
|
144
183
|
|
|
145
|
-
return _create_spine_df(Path(path)/study)
|
|
184
|
+
return _create_spine_df(Path(path) / study)
|
|
146
185
|
|
|
147
|
-
# %% ../nbs/09_external_data.ipynb
|
|
148
|
-
def download_example_spine_data(path:(str, Path)='../data'):
|
|
186
|
+
# %% ../nbs/09_external_data.ipynb 12
|
|
187
|
+
def download_example_spine_data(path: (str, Path) = '../data') -> Path:
|
|
188
|
+
"""Downloads example T2w scan and corresponding predicted mask.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
path: Directory where the downloaded data
|
|
192
|
+
will be stored and extracted. Defaults to '../data'.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Path to the directory where the example data has been extracted.
|
|
196
|
+
"""
|
|
149
197
|
|
|
150
|
-
'''Download example T2w scan and predicted mask.'''
|
|
151
198
|
study = 'example_data'
|
|
152
199
|
|
|
153
|
-
download_and_extract(
|
|
200
|
+
download_and_extract(
|
|
201
|
+
url=MURLs.EXAMPLE_SPINE_DATA,
|
|
202
|
+
filepath='example_data.zip',
|
|
203
|
+
output_dir=path
|
|
204
|
+
)
|
|
154
205
|
Path('example_data.zip').unlink()
|
|
155
206
|
|
|
156
|
-
return Path(path/study
|
|
207
|
+
return Path(path) / study
|
|
157
208
|
|
|
158
|
-
# %% ../nbs/09_external_data.ipynb
|
|
159
|
-
def
|
|
160
|
-
|
|
209
|
+
# %% ../nbs/09_external_data.ipynb 14
|
|
210
|
+
def _process_medmnist_img(path, idx_arr):
|
|
211
|
+
"""Save tensor as NIfTI."""
|
|
212
|
+
|
|
161
213
|
idx, arr = idx_arr
|
|
162
214
|
img = ScalarImage(tensor=arr[None, :])
|
|
163
215
|
fn = path/f'{idx}_nodule.nii.gz'
|
|
164
216
|
img.save(fn)
|
|
165
217
|
return str(fn)
|
|
166
218
|
|
|
167
|
-
# %% ../nbs/09_external_data.ipynb
|
|
219
|
+
# %% ../nbs/09_external_data.ipynb 15
|
|
168
220
|
def _df_sort_and_add_columns(df, label_list, is_val):
|
|
169
|
-
|
|
221
|
+
"""Sort the dataframe based on img_idx and add labels and if it is validation data column."""
|
|
222
|
+
|
|
170
223
|
df = df.sort_values(by='img_idx').reset_index(drop=True)
|
|
171
224
|
df['labels'], df['is_val'] = label_list, is_val
|
|
172
|
-
df = df.replace({"labels": {0:'b', 1:'m'}})
|
|
225
|
+
#df = df.replace({"labels": {0:'b', 1:'m'}})
|
|
173
226
|
df = df.drop('img_idx', axis=1)
|
|
174
227
|
|
|
175
228
|
return df
|
|
176
229
|
|
|
177
|
-
# %% ../nbs/09_external_data.ipynb
|
|
230
|
+
# %% ../nbs/09_external_data.ipynb 16
|
|
178
231
|
def _create_nodule_df(pool, output_dir, imgs, labels, is_val=False):
|
|
179
|
-
|
|
180
|
-
|
|
232
|
+
"""Create dataframe for MedMNIST data."""
|
|
233
|
+
|
|
234
|
+
img_path_list = pool.map(partial(_process_medmnist_img, output_dir), enumerate(imgs))
|
|
181
235
|
img_idx = [float(Path(fn).parts[-1].split('_')[0]) for fn in img_path_list]
|
|
182
236
|
|
|
183
237
|
df = pd.DataFrame(list(zip(img_path_list, img_idx)), columns=['img_path','img_idx'])
|
|
184
238
|
return _df_sort_and_add_columns(df, labels, is_val)
|
|
185
239
|
|
|
186
|
-
# %% ../nbs/09_external_data.ipynb
|
|
187
|
-
def
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
study = 'NoduleMNIST3D'
|
|
191
|
-
path = Path(path)/study
|
|
192
|
-
|
|
193
|
-
download_url(url=MURLs.NODULE_MNIST_DATA, filepath=path/f'{study}.npz');
|
|
194
|
-
data = load(path/f'{study}.npz')
|
|
195
|
-
key_fn = ['train_images', 'val_images', 'test_images']
|
|
196
|
-
for fn in key_fn: (path/fn).mkdir(exist_ok=True)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
train_imgs, val_imgs, test_imgs = data[key_fn[0]], data[key_fn[1]], data[key_fn[2]]
|
|
240
|
+
# %% ../nbs/09_external_data.ipynb 17
|
|
241
|
+
def download_medmnist3d_dataset(study: str, path: (str, Path) = '../data',
|
|
242
|
+
max_workers: int = 1):
|
|
243
|
+
"""Downloads and processes a particular MedMNIST3D dataset.
|
|
200
244
|
|
|
245
|
+
Args:
|
|
246
|
+
study: MedMNIST dataset ('OrganMNIST3D', 'NoduleMNIST3D',
|
|
247
|
+
'AdrenalMNIST3D', 'FractureMNIST3D', 'VesselMNIST3D', 'SynapseMNIST3D')
|
|
248
|
+
path: Directory to store and extract downloaded data. Defaults to '../data'.
|
|
249
|
+
max_workers: Maximum number of worker processes for data processing.
|
|
250
|
+
Defaults to 1.
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
Two pandas DataFrames. The first DataFrame combines training and validation
|
|
254
|
+
data, and the second DataFrame contains the testing data.
|
|
255
|
+
"""
|
|
256
|
+
path = Path(path) / study
|
|
257
|
+
dataset_file_path = path / f'{study}.npz'
|
|
258
|
+
|
|
259
|
+
try:
|
|
260
|
+
#todo: check if dataset is downloaded
|
|
261
|
+
download_url(url=MURLs.MEDMNIST_DICT[study], filepath=dataset_file_path)
|
|
262
|
+
except:
|
|
263
|
+
raise ValueError(f"Dataset '{study}' does not exist.")
|
|
264
|
+
|
|
265
|
+
data = load(dataset_file_path)
|
|
266
|
+
keys = ['train_images', 'val_images', 'test_images']
|
|
267
|
+
|
|
268
|
+
for key in keys:
|
|
269
|
+
(path / key).mkdir(exist_ok=True)
|
|
270
|
+
|
|
271
|
+
train_imgs = data[keys[0]]
|
|
272
|
+
val_imgs = data[keys[1]]
|
|
273
|
+
test_imgs = data[keys[2]]
|
|
201
274
|
|
|
202
275
|
with mp.Pool(processes=max_workers) as pool:
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
val_df = _create_nodule_df(pool, path/
|
|
206
|
-
|
|
207
|
-
|
|
276
|
+
train_df = _create_nodule_df(pool, path / keys[0], train_imgs,
|
|
277
|
+
data['train_labels'])
|
|
278
|
+
val_df = _create_nodule_df(pool, path / keys[1], val_imgs,
|
|
279
|
+
data['val_labels'], is_val=True)
|
|
280
|
+
test_df = _create_nodule_df(pool, path / keys[2], test_imgs,
|
|
281
|
+
data['test_labels'])
|
|
282
|
+
|
|
208
283
|
train_val_df = pd.concat([train_df, val_df], ignore_index=True)
|
|
209
|
-
|
|
284
|
+
|
|
285
|
+
dataset_file_path.unlink()
|
|
286
|
+
|
|
210
287
|
return train_val_df, test_df
|
|
288
|
+
|
|
289
|
+
# %% ../nbs/09_external_data.ipynb 19
|
|
290
|
+
def download_example_endometrial_cancer_data(path: (str, Path) = '../data') -> Path:
|
|
291
|
+
study = 'ec'
|
|
292
|
+
|
|
293
|
+
download_and_extract(
|
|
294
|
+
url=MURLs.EXAMPLE_EC_DATA,
|
|
295
|
+
filepath='ec.zip',
|
|
296
|
+
output_dir=path
|
|
297
|
+
)
|
|
298
|
+
Path('ec.zip').unlink()
|
|
299
|
+
|
|
300
|
+
return Path(path) / study
|
fastMONAI/utils.py
CHANGED
|
@@ -9,12 +9,8 @@ import torch
|
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
|
|
11
11
|
# %% ../nbs/07_utils.ipynb 3
|
|
12
|
-
def store_variables(pkl_fn:(str, Path),
|
|
13
|
-
|
|
14
|
-
reorder:bool,
|
|
15
|
-
resample:(int,list),
|
|
16
|
-
) -> None:
|
|
17
|
-
'''Save variable values in a pickle file.'''
|
|
12
|
+
def store_variables(pkl_fn: (str, Path), size: list, reorder: bool, resample: (int, list)):
|
|
13
|
+
"""Save variable values in a pickle file."""
|
|
18
14
|
|
|
19
15
|
var_vals = [size, reorder, resample]
|
|
20
16
|
|
|
@@ -22,19 +18,21 @@ def store_variables(pkl_fn:(str, Path),
|
|
|
22
18
|
pickle.dump(var_vals, f)
|
|
23
19
|
|
|
24
20
|
# %% ../nbs/07_utils.ipynb 4
|
|
25
|
-
def load_variables(pkl_fn
|
|
26
|
-
|
|
27
|
-
'''Load stored variable values from a pickle file.
|
|
21
|
+
def load_variables(pkl_fn: (str, Path)):
|
|
22
|
+
"""Loads stored variable values from a pickle file.
|
|
28
23
|
|
|
29
|
-
|
|
30
|
-
|
|
24
|
+
Args:
|
|
25
|
+
pkl_fn: File path of the pickle file to be loaded.
|
|
31
26
|
|
|
27
|
+
Returns:
|
|
28
|
+
The deserialized value of the pickled data.
|
|
29
|
+
"""
|
|
32
30
|
with open(pkl_fn, 'rb') as f:
|
|
33
31
|
return pickle.load(f)
|
|
34
32
|
|
|
35
33
|
# %% ../nbs/07_utils.ipynb 5
|
|
36
34
|
def print_colab_gpu_info():
|
|
37
|
-
|
|
35
|
+
"""Check if we have a GPU attached to the runtime."""
|
|
38
36
|
|
|
39
37
|
colab_gpu_msg =(f"{'#'*80}\n"
|
|
40
38
|
"Remember to attach a GPU to your Colab Runtime:"
|