fastMONAI 0.3.1__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fastMONAI-0.3.3/CONTRIBUTING.md +11 -0
- {fastMONAI-0.3.1/fastMONAI.egg-info → fastMONAI-0.3.3}/PKG-INFO +1 -1
- fastMONAI-0.3.3/fastMONAI/__init__.py +1 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI/_modidx.py +6 -20
- fastMONAI-0.3.3/fastMONAI/dataset_info.py +122 -0
- fastMONAI-0.3.3/fastMONAI/external_data.py +300 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI/utils.py +10 -12
- fastMONAI-0.3.3/fastMONAI/vision_augmentation.py +281 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI/vision_core.py +43 -27
- fastMONAI-0.3.3/fastMONAI/vision_data.py +267 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI/vision_inference.py +37 -22
- fastMONAI-0.3.3/fastMONAI/vision_loss.py +107 -0
- fastMONAI-0.3.3/fastMONAI/vision_metrics.py +100 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI/vision_plot.py +15 -13
- {fastMONAI-0.3.1 → fastMONAI-0.3.3/fastMONAI.egg-info}/PKG-INFO +1 -1
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/settings.ini +1 -1
- fastMONAI-0.3.1/CONTRIBUTING.md +0 -2
- fastMONAI-0.3.1/fastMONAI/__init__.py +0 -1
- fastMONAI-0.3.1/fastMONAI/dataset_info.py +0 -114
- fastMONAI-0.3.1/fastMONAI/external_data.py +0 -210
- fastMONAI-0.3.1/fastMONAI/vision_augmentation.py +0 -260
- fastMONAI-0.3.1/fastMONAI/vision_data.py +0 -177
- fastMONAI-0.3.1/fastMONAI/vision_loss.py +0 -98
- fastMONAI-0.3.1/fastMONAI/vision_metrics.py +0 -77
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/LICENSE +0 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/MANIFEST.in +0 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/README.md +0 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI/research_utils.py +0 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI/vision_all.py +0 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI.egg-info/SOURCES.txt +0 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI.egg-info/dependency_links.txt +0 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI.egg-info/entry_points.txt +0 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI.egg-info/not-zip-safe +0 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI.egg-info/requires.txt +0 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI.egg-info/top_level.txt +0 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/setup.cfg +0 -0
- {fastMONAI-0.3.1 → fastMONAI-0.3.3}/setup.py +0 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# How to contribute
|
|
2
|
+
Contributions to the source code are greatly appreciated. If you find any issues or have suggestions for improvements, please open an issue in the corresponding GitHub repository. To contribute code changes, we encourage you to follow these [steps](https://docs.github.com/en/get-started/quickstart/contributing-to-projects).
|
|
3
|
+
|
|
4
|
+
## Getting Started
|
|
5
|
+
1. Install an editable version of the forked project: `pip install -e 'fastMONAI[dev]'`
|
|
6
|
+
2. Git hooks are used to run automatic scripts during each commit and merge to strip the notebooks of superfluous metadata and avoid merge conflicts. To set up Git hooks, run the following command inside the project folder: `nbdev_install_hooks`
|
|
7
|
+
3. Once you have installed the project and set up the Git hooks, you can start making changes to the notebooks.
|
|
8
|
+
4. Run: `nbdev_prepare` to build .py modules from notebooks and test them (if unit tests are written).
|
|
9
|
+
|
|
10
|
+
## Contact
|
|
11
|
+
If you have any questions or need further assistance, please contact us at [skaliyugarasan@hotmail.com]. We appreciate your interest and look forward to your contributions.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.3.3"
|
|
@@ -29,16 +29,18 @@ d = { 'settings': { 'branch': 'master',
|
|
|
29
29
|
'fastMONAI/external_data.py'),
|
|
30
30
|
'fastMONAI.external_data._process_ixi_xls': ( 'external_data.html#_process_ixi_xls',
|
|
31
31
|
'fastMONAI/external_data.py'),
|
|
32
|
-
'fastMONAI.external_data.
|
|
33
|
-
|
|
34
|
-
'fastMONAI.external_data.
|
|
35
|
-
|
|
32
|
+
'fastMONAI.external_data._process_medmnist_img': ( 'external_data.html#_process_medmnist_img',
|
|
33
|
+
'fastMONAI/external_data.py'),
|
|
34
|
+
'fastMONAI.external_data.download_example_endometrial_cancer_data': ( 'external_data.html#download_example_endometrial_cancer_data',
|
|
35
|
+
'fastMONAI/external_data.py'),
|
|
36
36
|
'fastMONAI.external_data.download_example_spine_data': ( 'external_data.html#download_example_spine_data',
|
|
37
37
|
'fastMONAI/external_data.py'),
|
|
38
38
|
'fastMONAI.external_data.download_ixi_data': ( 'external_data.html#download_ixi_data',
|
|
39
39
|
'fastMONAI/external_data.py'),
|
|
40
40
|
'fastMONAI.external_data.download_ixi_tiny': ( 'external_data.html#download_ixi_tiny',
|
|
41
41
|
'fastMONAI/external_data.py'),
|
|
42
|
+
'fastMONAI.external_data.download_medmnist3d_dataset': ( 'external_data.html#download_medmnist3d_dataset',
|
|
43
|
+
'fastMONAI/external_data.py'),
|
|
42
44
|
'fastMONAI.external_data.download_spine_test_data': ( 'external_data.html#download_spine_test_data',
|
|
43
45
|
'fastMONAI/external_data.py')},
|
|
44
46
|
'fastMONAI.research_utils': { 'fastMONAI.research_utils.pred_postprocess': ( 'research_utils.html#pred_postprocess',
|
|
@@ -131,22 +133,6 @@ d = { 'settings': { 'branch': 'master',
|
|
|
131
133
|
'fastMONAI/vision_augmentation.py'),
|
|
132
134
|
'fastMONAI.vision_augmentation.ZNormalization.encodes': ( 'vision_augment.html#znormalization.encodes',
|
|
133
135
|
'fastMONAI/vision_augmentation.py'),
|
|
134
|
-
'fastMONAI.vision_augmentation._do_rand_biasfield': ( 'vision_augment.html#_do_rand_biasfield',
|
|
135
|
-
'fastMONAI/vision_augmentation.py'),
|
|
136
|
-
'fastMONAI.vision_augmentation._do_rand_blur': ( 'vision_augment.html#_do_rand_blur',
|
|
137
|
-
'fastMONAI/vision_augmentation.py'),
|
|
138
|
-
'fastMONAI.vision_augmentation._do_rand_gamma': ( 'vision_augment.html#_do_rand_gamma',
|
|
139
|
-
'fastMONAI/vision_augmentation.py'),
|
|
140
|
-
'fastMONAI.vision_augmentation._do_rand_ghosting': ( 'vision_augment.html#_do_rand_ghosting',
|
|
141
|
-
'fastMONAI/vision_augmentation.py'),
|
|
142
|
-
'fastMONAI.vision_augmentation._do_rand_motion': ( 'vision_augment.html#_do_rand_motion',
|
|
143
|
-
'fastMONAI/vision_augmentation.py'),
|
|
144
|
-
'fastMONAI.vision_augmentation._do_rand_noise': ( 'vision_augment.html#_do_rand_noise',
|
|
145
|
-
'fastMONAI/vision_augmentation.py'),
|
|
146
|
-
'fastMONAI.vision_augmentation._do_rand_spike': ( 'vision_augment.html#_do_rand_spike',
|
|
147
|
-
'fastMONAI/vision_augmentation.py'),
|
|
148
|
-
'fastMONAI.vision_augmentation._do_z_normalization': ( 'vision_augment.html#_do_z_normalization',
|
|
149
|
-
'fastMONAI/vision_augmentation.py'),
|
|
150
136
|
'fastMONAI.vision_augmentation.do_pad_or_crop': ( 'vision_augment.html#do_pad_or_crop',
|
|
151
137
|
'fastMONAI/vision_augmentation.py')},
|
|
152
138
|
'fastMONAI.vision_core': { 'fastMONAI.vision_core.MedBase': ('vision_core.html#medbase', 'fastMONAI/vision_core.py'),
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/08_dataset_info.ipynb.
|
|
2
|
+
|
|
3
|
+
# %% auto 0
|
|
4
|
+
__all__ = ['MedDataset', 'get_class_weights']
|
|
5
|
+
|
|
6
|
+
# %% ../nbs/08_dataset_info.ipynb 2
|
|
7
|
+
from .vision_core import *
|
|
8
|
+
|
|
9
|
+
from sklearn.utils.class_weight import compute_class_weight
|
|
10
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import numpy as np
|
|
13
|
+
import torch
|
|
14
|
+
import glob
|
|
15
|
+
|
|
16
|
+
# %% ../nbs/08_dataset_info.ipynb 4
|
|
17
|
+
class MedDataset:
|
|
18
|
+
"""A class to extract and present information about the dataset."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, path=None, postfix: str = '', img_list: list = None,
|
|
21
|
+
reorder: bool = False, dtype: (MedImage, MedMask) = MedImage,
|
|
22
|
+
max_workers: int = 1):
|
|
23
|
+
"""Constructs MedDataset object.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
path (str, optional): Path to the image folder.
|
|
27
|
+
postfix (str, optional): Specify the file type if there are different files in the folder.
|
|
28
|
+
img_list (List[str], optional): Alternatively, pass in a list with image paths.
|
|
29
|
+
reorder (bool, optional): Whether to reorder the data to be closest to canonical (RAS+) orientation.
|
|
30
|
+
dtype (Union[MedImage, MedMask], optional): Load data as datatype. Default is MedImage.
|
|
31
|
+
max_workers (int, optional): The number of worker threads. Default is 1.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
self.path = path
|
|
35
|
+
self.postfix = postfix
|
|
36
|
+
self.img_list = img_list
|
|
37
|
+
self.reorder = reorder
|
|
38
|
+
self.dtype = dtype
|
|
39
|
+
self.max_workers = max_workers
|
|
40
|
+
self.df = self._create_data_frame()
|
|
41
|
+
|
|
42
|
+
def _create_data_frame(self):
|
|
43
|
+
"""Private method that returns a dataframe with information about the dataset."""
|
|
44
|
+
|
|
45
|
+
if self.path:
|
|
46
|
+
self.img_list = glob.glob(f'{self.path}/*{self.postfix}*')
|
|
47
|
+
if not self.img_list: print('Could not find images. Check the image path')
|
|
48
|
+
|
|
49
|
+
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
|
50
|
+
data_info_dict = list(executor.map(self._get_data_info, self.img_list))
|
|
51
|
+
|
|
52
|
+
df = pd.DataFrame(data_info_dict)
|
|
53
|
+
|
|
54
|
+
if df.orientation.nunique() > 1:
|
|
55
|
+
print('The volumes in this dataset have different orientations. '
|
|
56
|
+
'Recommended to pass in the argument reorder=True when creating a MedDataset object for this dataset')
|
|
57
|
+
|
|
58
|
+
return df
|
|
59
|
+
|
|
60
|
+
def summary(self):
|
|
61
|
+
"""Summary DataFrame of the dataset with example path for similar data."""
|
|
62
|
+
|
|
63
|
+
columns = ['dim_0', 'dim_1', 'dim_2', 'voxel_0', 'voxel_1', 'voxel_2', 'orientation']
|
|
64
|
+
|
|
65
|
+
return self.df.groupby(columns, as_index=False).agg(
|
|
66
|
+
example_path=('path', 'min'), total=('path', 'size')
|
|
67
|
+
).sort_values('total', ascending=False)
|
|
68
|
+
|
|
69
|
+
def suggestion(self):
|
|
70
|
+
"""Voxel value that appears most often in dim_0, dim_1 and dim_2, and whether the data should be reoriented."""
|
|
71
|
+
|
|
72
|
+
resample = [self.df.voxel_0.mode()[0], self.df.voxel_1.mode()[0], self.df.voxel_2.mode()[0]]
|
|
73
|
+
return resample, self.reorder
|
|
74
|
+
|
|
75
|
+
def _get_data_info(self, fn: str):
|
|
76
|
+
"""Private method to collect information about an image file."""
|
|
77
|
+
_, o, _ = med_img_reader(fn, dtype=self.dtype, reorder=self.reorder, only_tensor=False)
|
|
78
|
+
|
|
79
|
+
info_dict = {'path': fn, 'dim_0': o.shape[1], 'dim_1': o.shape[2], 'dim_2': o.shape[3],
|
|
80
|
+
'voxel_0': round(o.spacing[0], 4), 'voxel_1': round(o.spacing[1], 4), 'voxel_2': round(o.spacing[2], 4),
|
|
81
|
+
'orientation': f'{"".join(o.orientation)}+'}
|
|
82
|
+
|
|
83
|
+
if self.dtype is MedMask:
|
|
84
|
+
mask_labels_dict = o.count_labels()
|
|
85
|
+
mask_labels_dict = {f'voxel_count_{int(key)}': val for key, val in mask_labels_dict.items()}
|
|
86
|
+
info_dict.update(mask_labels_dict)
|
|
87
|
+
|
|
88
|
+
return info_dict
|
|
89
|
+
|
|
90
|
+
def get_largest_img_size(self, resample: list = None) -> list:
|
|
91
|
+
"""Get the largest image size in the dataset."""
|
|
92
|
+
|
|
93
|
+
dims = None
|
|
94
|
+
|
|
95
|
+
if resample is not None:
|
|
96
|
+
org_voxels = self.df[["voxel_0", "voxel_1", 'voxel_2']].values
|
|
97
|
+
org_dims = self.df[["dim_0", "dim_1", 'dim_2']].values
|
|
98
|
+
|
|
99
|
+
ratio = org_voxels/resample
|
|
100
|
+
new_dims = (org_dims * ratio).T
|
|
101
|
+
dims = [new_dims[0].max().round(), new_dims[1].max().round(), new_dims[2].max().round()]
|
|
102
|
+
|
|
103
|
+
else:
|
|
104
|
+
dims = [df.dim_0.max(), df.dim_1.max(), df.dim_2.max()]
|
|
105
|
+
|
|
106
|
+
return dims
|
|
107
|
+
|
|
108
|
+
# %% ../nbs/08_dataset_info.ipynb 5
|
|
109
|
+
def get_class_weights(labels: (np.array, list), class_weight: str = 'balanced') -> torch.Tensor:
|
|
110
|
+
"""Calculates and returns the class weights.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
labels: An array or list of class labels for each instance in the dataset.
|
|
114
|
+
class_weight: Defaults to 'balanced'.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
A tensor of class weights.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
class_weights = compute_class_weight(class_weight=class_weight, classes=np.unique(labels), y=labels)
|
|
121
|
+
|
|
122
|
+
return torch.Tensor(class_weights)
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/09_external_data.ipynb.
|
|
2
|
+
|
|
3
|
+
# %% auto 0
|
|
4
|
+
__all__ = ['MURLs', 'download_ixi_data', 'download_ixi_tiny', 'download_spine_test_data', 'download_example_spine_data',
|
|
5
|
+
'download_medmnist3d_dataset', 'download_example_endometrial_cancer_data']
|
|
6
|
+
|
|
7
|
+
# %% ../nbs/09_external_data.ipynb 1
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from glob import glob
|
|
10
|
+
from numpy import load
|
|
11
|
+
import pandas as pd
|
|
12
|
+
from monai.apps import download_url, download_and_extract
|
|
13
|
+
from torchio.datasets.ixi import IXITiny
|
|
14
|
+
from torchio import ScalarImage
|
|
15
|
+
import multiprocessing as mp
|
|
16
|
+
from functools import partial
|
|
17
|
+
|
|
18
|
+
# %% ../nbs/09_external_data.ipynb 3
|
|
19
|
+
class MURLs():
|
|
20
|
+
"""A class with external medical dataset URLs."""
|
|
21
|
+
|
|
22
|
+
IXI_DATA = 'http://biomedic.doc.ic.ac.uk/brain-development/downloads/IXI/IXI-T1.tar'
|
|
23
|
+
IXI_DEMOGRAPHIC_INFORMATION = 'http://biomedic.doc.ic.ac.uk/brain-development/downloads/IXI/IXI.xls'
|
|
24
|
+
CHENGWEN_CHU_SPINE_DATA = 'https://drive.google.com/uc?id=1rbm9-KKAexpNm2mC9FsSbfnS8VJaF3Kn&confirm=t'
|
|
25
|
+
EXAMPLE_SPINE_DATA = 'https://drive.google.com/uc?id=1Ms3Q6MYQrQUA_PKZbJ2t2NeYFQ5jloMh'
|
|
26
|
+
MEDMNIST_DICT = {'OrganMNIST3D': 'https://zenodo.org/record/6496656/files/organmnist3d.npz?download=1',
|
|
27
|
+
'NoduleMNIST3D': 'https://zenodo.org/record/6496656/files/nodulemnist3d.npz?download=1',
|
|
28
|
+
'AdrenalMNIST3D': 'https://zenodo.org/record/6496656/files/adrenalmnist3d.npz?download=1',
|
|
29
|
+
'FractureMNIST3D': 'https://zenodo.org/record/6496656/files/fracturemnist3d.npz?download=1',
|
|
30
|
+
'VesselMNIST3D': 'https://zenodo.org/record/6496656/files/vesselmnist3d.npz?download=1',
|
|
31
|
+
'SynapseMNIST3D': 'https://zenodo.org/record/6496656/files/synapsemnist3d.npz?download=1'}
|
|
32
|
+
EXAMPLE_EC_DATA = 'https://drive.google.com/uc?id=1cjOBhkdRsoX3unxHiL377R5j8ottN4An'
|
|
33
|
+
|
|
34
|
+
# %% ../nbs/09_external_data.ipynb 4
|
|
35
|
+
def _process_ixi_xls(xls_path: (str, Path), img_path: Path) -> pd.DataFrame:
|
|
36
|
+
"""Private method to process the demographic information for the IXI dataset.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
xls_path: File path to the xls file with the demographic information.
|
|
40
|
+
img_path: Folder path to the images.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
A processed dataframe with image path and demographic information.
|
|
44
|
+
|
|
45
|
+
Raises:
|
|
46
|
+
ValueError: If xls_path or img_path do not exist.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
print('Preprocessing ' + str(xls_path))
|
|
50
|
+
|
|
51
|
+
df = pd.read_excel(xls_path)
|
|
52
|
+
|
|
53
|
+
duplicate_subject_ids = df[df.duplicated(['IXI_ID'], keep=False)].IXI_ID.unique()
|
|
54
|
+
|
|
55
|
+
for subject_id in duplicate_subject_ids:
|
|
56
|
+
age = df.loc[df.IXI_ID == subject_id].AGE.nunique()
|
|
57
|
+
if age != 1: df = df.loc[df.IXI_ID != subject_id] # Remove duplicates with two different age values
|
|
58
|
+
|
|
59
|
+
df = df.drop_duplicates(subset='IXI_ID', keep='first').reset_index(drop=True)
|
|
60
|
+
|
|
61
|
+
df['subject_id'] = ['IXI' + str(subject_id).zfill(3) for subject_id in df.IXI_ID.values]
|
|
62
|
+
df = df.rename(columns={'SEX_ID (1=m, 2=f)': 'gender'})
|
|
63
|
+
df['age_at_scan'] = df.AGE.round(2)
|
|
64
|
+
df = df.replace({'gender': {1: 'M', 2: 'F'}})
|
|
65
|
+
|
|
66
|
+
img_list = list(img_path.glob('*.nii.gz'))
|
|
67
|
+
for path in img_list:
|
|
68
|
+
subject_id = path.parts[-1].split('-')[0]
|
|
69
|
+
df.loc[df.subject_id == subject_id, 't1_path'] = str(path)
|
|
70
|
+
|
|
71
|
+
df = df.dropna()
|
|
72
|
+
df = df[['t1_path', 'subject_id', 'gender', 'age_at_scan']]
|
|
73
|
+
|
|
74
|
+
return df
|
|
75
|
+
|
|
76
|
+
# %% ../nbs/09_external_data.ipynb 6
|
|
77
|
+
def download_ixi_data(path: (str, Path) = '../data') -> Path:
|
|
78
|
+
"""Download T1 scans and demographic information from the IXI dataset.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
path: Path to the directory where the data will be stored. Defaults to '../data'.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
The path to the stored CSV file.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
path = Path(path) / 'IXI'
|
|
88
|
+
img_path = path / 'T1_images'
|
|
89
|
+
|
|
90
|
+
# Check whether image data already present in img_path:
|
|
91
|
+
is_extracted = False
|
|
92
|
+
try:
|
|
93
|
+
if len(list(img_path.iterdir())) >= 581: # 581 imgs in the IXI dataset
|
|
94
|
+
is_extracted = True
|
|
95
|
+
print(f"Images already downloaded and extracted to {img_path}")
|
|
96
|
+
except:
|
|
97
|
+
is_extracted = False
|
|
98
|
+
|
|
99
|
+
if not is_extracted:
|
|
100
|
+
download_and_extract(url=MURLs.IXI_DATA, filepath=path / 'IXI-T1.tar', output_dir=img_path)
|
|
101
|
+
(path / 'IXI-T1.tar').unlink()
|
|
102
|
+
|
|
103
|
+
download_url(url=MURLs.IXI_DEMOGRAPHIC_INFORMATION, filepath=path / 'IXI.xls')
|
|
104
|
+
|
|
105
|
+
processed_df = _process_ixi_xls(xls_path=path / 'IXI.xls', img_path=img_path)
|
|
106
|
+
processed_df.to_csv(path / 'dataset.csv', index=False)
|
|
107
|
+
|
|
108
|
+
return path
|
|
109
|
+
|
|
110
|
+
# %% ../nbs/09_external_data.ipynb 8
|
|
111
|
+
def download_ixi_tiny(path: (str, Path) = '../data') -> Path:
|
|
112
|
+
"""Download the tiny version of the IXI dataset provided by TorchIO.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
path: The directory where the data will be
|
|
116
|
+
stored. If not provided, defaults to '../data'.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
The path to the directory where the data is stored.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
path = Path(path) / 'IXITiny'
|
|
123
|
+
|
|
124
|
+
IXITiny(root=str(path), download=True)
|
|
125
|
+
download_url(url=MURLs.IXI_DEMOGRAPHIC_INFORMATION, filepath=path/'IXI.xls')
|
|
126
|
+
|
|
127
|
+
processed_df = _process_ixi_xls(xls_path=path/'IXI.xls', img_path=path/'image')
|
|
128
|
+
processed_df['labels'] = processed_df['t1_path'].str.replace('image','label')
|
|
129
|
+
|
|
130
|
+
processed_df.to_csv(path/'dataset.csv', index=False)
|
|
131
|
+
|
|
132
|
+
return path
|
|
133
|
+
|
|
134
|
+
# %% ../nbs/09_external_data.ipynb 10
|
|
135
|
+
def _create_spine_df(dir: Path) -> pd.DataFrame:
|
|
136
|
+
"""Create a pandas DataFrame containing information about spinal images.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
dir: Directory path where data (image and segmentation
|
|
140
|
+
mask files) are stored.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
A DataFrame containing the paths to the image files and their
|
|
144
|
+
corresponding mask files, the subject IDs, and a flag indicating that
|
|
145
|
+
these are test data.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
img_list = glob(str(dir / 'img/*.nii.gz'))
|
|
149
|
+
mask_list = [str(fn).replace('img', 'seg') for fn in img_list]
|
|
150
|
+
subject_id_list = [fn.split('_')[-1].split('.')[0] for fn in mask_list]
|
|
151
|
+
|
|
152
|
+
test_data = {
|
|
153
|
+
't2_img_path': img_list,
|
|
154
|
+
't2_mask_path': mask_list,
|
|
155
|
+
'subject_id': subject_id_list,
|
|
156
|
+
'is_test': True,
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return pd.DataFrame(test_data)
|
|
160
|
+
|
|
161
|
+
# %% ../nbs/09_external_data.ipynb 11
|
|
162
|
+
def download_spine_test_data(path: (str, Path) = '../data') -> pd.DataFrame:
|
|
163
|
+
"""Downloads T2w scans from the study 'Fully Automatic Localization and
|
|
164
|
+
Segmentation of 3D Vertebral Bodies from CT/MR Images via a Learning-Based
|
|
165
|
+
Method' by Chu et. al.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
path: Directory where the downloaded data
|
|
169
|
+
will be stored and extracted. Defaults to '../data'.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Processed dataframe containing image paths, label paths, and subject IDs.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
study = 'chengwen_chu_2015'
|
|
176
|
+
|
|
177
|
+
download_and_extract(
|
|
178
|
+
url=MURLs.CHENGWEN_CHU_SPINE_DATA,
|
|
179
|
+
filepath=f'{study}.zip',
|
|
180
|
+
output_dir=path
|
|
181
|
+
)
|
|
182
|
+
Path(f'{study}.zip').unlink()
|
|
183
|
+
|
|
184
|
+
return _create_spine_df(Path(path) / study)
|
|
185
|
+
|
|
186
|
+
# %% ../nbs/09_external_data.ipynb 12
|
|
187
|
+
def download_example_spine_data(path: (str, Path) = '../data') -> Path:
|
|
188
|
+
"""Downloads example T2w scan and corresponding predicted mask.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
path: Directory where the downloaded data
|
|
192
|
+
will be stored and extracted. Defaults to '../data'.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Path to the directory where the example data has been extracted.
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
study = 'example_data'
|
|
199
|
+
|
|
200
|
+
download_and_extract(
|
|
201
|
+
url=MURLs.EXAMPLE_SPINE_DATA,
|
|
202
|
+
filepath='example_data.zip',
|
|
203
|
+
output_dir=path
|
|
204
|
+
)
|
|
205
|
+
Path('example_data.zip').unlink()
|
|
206
|
+
|
|
207
|
+
return Path(path) / study
|
|
208
|
+
|
|
209
|
+
# %% ../nbs/09_external_data.ipynb 14
|
|
210
|
+
def _process_medmnist_img(path, idx_arr):
|
|
211
|
+
"""Save tensor as NIfTI."""
|
|
212
|
+
|
|
213
|
+
idx, arr = idx_arr
|
|
214
|
+
img = ScalarImage(tensor=arr[None, :])
|
|
215
|
+
fn = path/f'{idx}_nodule.nii.gz'
|
|
216
|
+
img.save(fn)
|
|
217
|
+
return str(fn)
|
|
218
|
+
|
|
219
|
+
# %% ../nbs/09_external_data.ipynb 15
|
|
220
|
+
def _df_sort_and_add_columns(df, label_list, is_val):
|
|
221
|
+
"""Sort the dataframe based on img_idx and add labels and if it is validation data column."""
|
|
222
|
+
|
|
223
|
+
df = df.sort_values(by='img_idx').reset_index(drop=True)
|
|
224
|
+
df['labels'], df['is_val'] = label_list, is_val
|
|
225
|
+
#df = df.replace({"labels": {0:'b', 1:'m'}})
|
|
226
|
+
df = df.drop('img_idx', axis=1)
|
|
227
|
+
|
|
228
|
+
return df
|
|
229
|
+
|
|
230
|
+
# %% ../nbs/09_external_data.ipynb 16
|
|
231
|
+
def _create_nodule_df(pool, output_dir, imgs, labels, is_val=False):
|
|
232
|
+
"""Create dataframe for MedMNIST data."""
|
|
233
|
+
|
|
234
|
+
img_path_list = pool.map(partial(_process_medmnist_img, output_dir), enumerate(imgs))
|
|
235
|
+
img_idx = [float(Path(fn).parts[-1].split('_')[0]) for fn in img_path_list]
|
|
236
|
+
|
|
237
|
+
df = pd.DataFrame(list(zip(img_path_list, img_idx)), columns=['img_path','img_idx'])
|
|
238
|
+
return _df_sort_and_add_columns(df, labels, is_val)
|
|
239
|
+
|
|
240
|
+
# %% ../nbs/09_external_data.ipynb 17
|
|
241
|
+
def download_medmnist3d_dataset(study: str, path: (str, Path) = '../data',
|
|
242
|
+
max_workers: int = 1):
|
|
243
|
+
"""Downloads and processes a particular MedMNIST3D dataset.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
study: MedMNIST dataset ('OrganMNIST3D', 'NoduleMNIST3D',
|
|
247
|
+
'AdrenalMNIST3D', 'FractureMNIST3D', 'VesselMNIST3D', 'SynapseMNIST3D')
|
|
248
|
+
path: Directory to store and extract downloaded data. Defaults to '../data'.
|
|
249
|
+
max_workers: Maximum number of worker processes for data processing.
|
|
250
|
+
Defaults to 1.
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
Two pandas DataFrames. The first DataFrame combines training and validation
|
|
254
|
+
data, and the second DataFrame contains the testing data.
|
|
255
|
+
"""
|
|
256
|
+
path = Path(path) / study
|
|
257
|
+
dataset_file_path = path / f'{study}.npz'
|
|
258
|
+
|
|
259
|
+
try:
|
|
260
|
+
#todo: check if dataset is downloaded
|
|
261
|
+
download_url(url=MURLs.MEDMNIST_DICT[study], filepath=dataset_file_path)
|
|
262
|
+
except:
|
|
263
|
+
raise ValueError(f"Dataset '{study}' does not exist.")
|
|
264
|
+
|
|
265
|
+
data = load(dataset_file_path)
|
|
266
|
+
keys = ['train_images', 'val_images', 'test_images']
|
|
267
|
+
|
|
268
|
+
for key in keys:
|
|
269
|
+
(path / key).mkdir(exist_ok=True)
|
|
270
|
+
|
|
271
|
+
train_imgs = data[keys[0]]
|
|
272
|
+
val_imgs = data[keys[1]]
|
|
273
|
+
test_imgs = data[keys[2]]
|
|
274
|
+
|
|
275
|
+
with mp.Pool(processes=max_workers) as pool:
|
|
276
|
+
train_df = _create_nodule_df(pool, path / keys[0], train_imgs,
|
|
277
|
+
data['train_labels'])
|
|
278
|
+
val_df = _create_nodule_df(pool, path / keys[1], val_imgs,
|
|
279
|
+
data['val_labels'], is_val=True)
|
|
280
|
+
test_df = _create_nodule_df(pool, path / keys[2], test_imgs,
|
|
281
|
+
data['test_labels'])
|
|
282
|
+
|
|
283
|
+
train_val_df = pd.concat([train_df, val_df], ignore_index=True)
|
|
284
|
+
|
|
285
|
+
dataset_file_path.unlink()
|
|
286
|
+
|
|
287
|
+
return train_val_df, test_df
|
|
288
|
+
|
|
289
|
+
# %% ../nbs/09_external_data.ipynb 19
|
|
290
|
+
def download_example_endometrial_cancer_data(path: (str, Path) = '../data') -> Path:
|
|
291
|
+
study = 'ec'
|
|
292
|
+
|
|
293
|
+
download_and_extract(
|
|
294
|
+
url=MURLs.EXAMPLE_EC_DATA,
|
|
295
|
+
filepath='ec.zip',
|
|
296
|
+
output_dir=path
|
|
297
|
+
)
|
|
298
|
+
Path('ec.zip').unlink()
|
|
299
|
+
|
|
300
|
+
return Path(path) / study
|
|
@@ -9,12 +9,8 @@ import torch
|
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
|
|
11
11
|
# %% ../nbs/07_utils.ipynb 3
|
|
12
|
-
def store_variables(pkl_fn:(str, Path),
|
|
13
|
-
|
|
14
|
-
reorder:bool,
|
|
15
|
-
resample:(int,list),
|
|
16
|
-
) -> None:
|
|
17
|
-
'''Save variable values in a pickle file.'''
|
|
12
|
+
def store_variables(pkl_fn: (str, Path), size: list, reorder: bool, resample: (int, list)):
|
|
13
|
+
"""Save variable values in a pickle file."""
|
|
18
14
|
|
|
19
15
|
var_vals = [size, reorder, resample]
|
|
20
16
|
|
|
@@ -22,19 +18,21 @@ def store_variables(pkl_fn:(str, Path),
|
|
|
22
18
|
pickle.dump(var_vals, f)
|
|
23
19
|
|
|
24
20
|
# %% ../nbs/07_utils.ipynb 4
|
|
25
|
-
def load_variables(pkl_fn
|
|
26
|
-
|
|
27
|
-
'''Load stored variable values from a pickle file.
|
|
21
|
+
def load_variables(pkl_fn: (str, Path)):
|
|
22
|
+
"""Loads stored variable values from a pickle file.
|
|
28
23
|
|
|
29
|
-
|
|
30
|
-
|
|
24
|
+
Args:
|
|
25
|
+
pkl_fn: File path of the pickle file to be loaded.
|
|
31
26
|
|
|
27
|
+
Returns:
|
|
28
|
+
The deserialized value of the pickled data.
|
|
29
|
+
"""
|
|
32
30
|
with open(pkl_fn, 'rb') as f:
|
|
33
31
|
return pickle.load(f)
|
|
34
32
|
|
|
35
33
|
# %% ../nbs/07_utils.ipynb 5
|
|
36
34
|
def print_colab_gpu_info():
|
|
37
|
-
|
|
35
|
+
"""Check if we have a GPU attached to the runtime."""
|
|
38
36
|
|
|
39
37
|
colab_gpu_msg =(f"{'#'*80}\n"
|
|
40
38
|
"Remember to attach a GPU to your Colab Runtime:"
|