fastMONAI 0.3.1__tar.gz → 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. fastMONAI-0.3.3/CONTRIBUTING.md +11 -0
  2. {fastMONAI-0.3.1/fastMONAI.egg-info → fastMONAI-0.3.3}/PKG-INFO +1 -1
  3. fastMONAI-0.3.3/fastMONAI/__init__.py +1 -0
  4. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI/_modidx.py +6 -20
  5. fastMONAI-0.3.3/fastMONAI/dataset_info.py +122 -0
  6. fastMONAI-0.3.3/fastMONAI/external_data.py +300 -0
  7. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI/utils.py +10 -12
  8. fastMONAI-0.3.3/fastMONAI/vision_augmentation.py +281 -0
  9. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI/vision_core.py +43 -27
  10. fastMONAI-0.3.3/fastMONAI/vision_data.py +267 -0
  11. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI/vision_inference.py +37 -22
  12. fastMONAI-0.3.3/fastMONAI/vision_loss.py +107 -0
  13. fastMONAI-0.3.3/fastMONAI/vision_metrics.py +100 -0
  14. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI/vision_plot.py +15 -13
  15. {fastMONAI-0.3.1 → fastMONAI-0.3.3/fastMONAI.egg-info}/PKG-INFO +1 -1
  16. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/settings.ini +1 -1
  17. fastMONAI-0.3.1/CONTRIBUTING.md +0 -2
  18. fastMONAI-0.3.1/fastMONAI/__init__.py +0 -1
  19. fastMONAI-0.3.1/fastMONAI/dataset_info.py +0 -114
  20. fastMONAI-0.3.1/fastMONAI/external_data.py +0 -210
  21. fastMONAI-0.3.1/fastMONAI/vision_augmentation.py +0 -260
  22. fastMONAI-0.3.1/fastMONAI/vision_data.py +0 -177
  23. fastMONAI-0.3.1/fastMONAI/vision_loss.py +0 -98
  24. fastMONAI-0.3.1/fastMONAI/vision_metrics.py +0 -77
  25. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/LICENSE +0 -0
  26. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/MANIFEST.in +0 -0
  27. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/README.md +0 -0
  28. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI/research_utils.py +0 -0
  29. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI/vision_all.py +0 -0
  30. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI.egg-info/SOURCES.txt +0 -0
  31. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI.egg-info/dependency_links.txt +0 -0
  32. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI.egg-info/entry_points.txt +0 -0
  33. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI.egg-info/not-zip-safe +0 -0
  34. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI.egg-info/requires.txt +0 -0
  35. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/fastMONAI.egg-info/top_level.txt +0 -0
  36. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/setup.cfg +0 -0
  37. {fastMONAI-0.3.1 → fastMONAI-0.3.3}/setup.py +0 -0
@@ -0,0 +1,11 @@
1
+ # How to contribute
2
+ Contributions to the source code are greatly appreciated. If you find any issues or have suggestions for improvements, please open an issue in the corresponding GitHub repository. To contribute code changes, we encourage you to follow these [steps](https://docs.github.com/en/get-started/quickstart/contributing-to-projects).
3
+
4
+ ## Getting Started
5
+ 1. Install an editable version of the forked project: `pip install -e 'fastMONAI[dev]'`
6
+ 2. Git hooks are used to run automatic scripts during each commit and merge to strip the notebooks of superfluous metadata and avoid merge conflicts. To set up Git hooks, run the following command inside the project folder: `nbdev_install_hooks`
7
+ 3. Once you have installed the project and set up the Git hooks, you can start making changes to the notebooks.
8
+ 4. Run: `nbdev_prepare` to build .py modules from notebooks and test them (if unit tests are written).
9
+
10
+ ## Contact
11
+ If you have any questions or need further assistance, please contact us at [skaliyugarasan@hotmail.com]. We appreciate your interest and look forward to your contributions.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fastMONAI
3
- Version: 0.3.1
3
+ Version: 0.3.3
4
4
  Summary: fastMONAI library
5
5
  Home-page: https://github.com/MMIV-ML/fastMONAI
6
6
  Author: Satheshkumar Kaliyugarasan
@@ -0,0 +1 @@
1
+ __version__ = "0.3.3"
@@ -29,16 +29,18 @@ d = { 'settings': { 'branch': 'master',
29
29
  'fastMONAI/external_data.py'),
30
30
  'fastMONAI.external_data._process_ixi_xls': ( 'external_data.html#_process_ixi_xls',
31
31
  'fastMONAI/external_data.py'),
32
- 'fastMONAI.external_data._process_nodule_img': ( 'external_data.html#_process_nodule_img',
33
- 'fastMONAI/external_data.py'),
34
- 'fastMONAI.external_data.download_NoduleMNIST3D': ( 'external_data.html#download_nodulemnist3d',
35
- 'fastMONAI/external_data.py'),
32
+ 'fastMONAI.external_data._process_medmnist_img': ( 'external_data.html#_process_medmnist_img',
33
+ 'fastMONAI/external_data.py'),
34
+ 'fastMONAI.external_data.download_example_endometrial_cancer_data': ( 'external_data.html#download_example_endometrial_cancer_data',
35
+ 'fastMONAI/external_data.py'),
36
36
  'fastMONAI.external_data.download_example_spine_data': ( 'external_data.html#download_example_spine_data',
37
37
  'fastMONAI/external_data.py'),
38
38
  'fastMONAI.external_data.download_ixi_data': ( 'external_data.html#download_ixi_data',
39
39
  'fastMONAI/external_data.py'),
40
40
  'fastMONAI.external_data.download_ixi_tiny': ( 'external_data.html#download_ixi_tiny',
41
41
  'fastMONAI/external_data.py'),
42
+ 'fastMONAI.external_data.download_medmnist3d_dataset': ( 'external_data.html#download_medmnist3d_dataset',
43
+ 'fastMONAI/external_data.py'),
42
44
  'fastMONAI.external_data.download_spine_test_data': ( 'external_data.html#download_spine_test_data',
43
45
  'fastMONAI/external_data.py')},
44
46
  'fastMONAI.research_utils': { 'fastMONAI.research_utils.pred_postprocess': ( 'research_utils.html#pred_postprocess',
@@ -131,22 +133,6 @@ d = { 'settings': { 'branch': 'master',
131
133
  'fastMONAI/vision_augmentation.py'),
132
134
  'fastMONAI.vision_augmentation.ZNormalization.encodes': ( 'vision_augment.html#znormalization.encodes',
133
135
  'fastMONAI/vision_augmentation.py'),
134
- 'fastMONAI.vision_augmentation._do_rand_biasfield': ( 'vision_augment.html#_do_rand_biasfield',
135
- 'fastMONAI/vision_augmentation.py'),
136
- 'fastMONAI.vision_augmentation._do_rand_blur': ( 'vision_augment.html#_do_rand_blur',
137
- 'fastMONAI/vision_augmentation.py'),
138
- 'fastMONAI.vision_augmentation._do_rand_gamma': ( 'vision_augment.html#_do_rand_gamma',
139
- 'fastMONAI/vision_augmentation.py'),
140
- 'fastMONAI.vision_augmentation._do_rand_ghosting': ( 'vision_augment.html#_do_rand_ghosting',
141
- 'fastMONAI/vision_augmentation.py'),
142
- 'fastMONAI.vision_augmentation._do_rand_motion': ( 'vision_augment.html#_do_rand_motion',
143
- 'fastMONAI/vision_augmentation.py'),
144
- 'fastMONAI.vision_augmentation._do_rand_noise': ( 'vision_augment.html#_do_rand_noise',
145
- 'fastMONAI/vision_augmentation.py'),
146
- 'fastMONAI.vision_augmentation._do_rand_spike': ( 'vision_augment.html#_do_rand_spike',
147
- 'fastMONAI/vision_augmentation.py'),
148
- 'fastMONAI.vision_augmentation._do_z_normalization': ( 'vision_augment.html#_do_z_normalization',
149
- 'fastMONAI/vision_augmentation.py'),
150
136
  'fastMONAI.vision_augmentation.do_pad_or_crop': ( 'vision_augment.html#do_pad_or_crop',
151
137
  'fastMONAI/vision_augmentation.py')},
152
138
  'fastMONAI.vision_core': { 'fastMONAI.vision_core.MedBase': ('vision_core.html#medbase', 'fastMONAI/vision_core.py'),
@@ -0,0 +1,122 @@
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/08_dataset_info.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['MedDataset', 'get_class_weights']
5
+
6
+ # %% ../nbs/08_dataset_info.ipynb 2
7
+ from .vision_core import *
8
+
9
+ from sklearn.utils.class_weight import compute_class_weight
10
+ from concurrent.futures import ThreadPoolExecutor
11
+ import pandas as pd
12
+ import numpy as np
13
+ import torch
14
+ import glob
15
+
16
+ # %% ../nbs/08_dataset_info.ipynb 4
17
+ class MedDataset:
18
+ """A class to extract and present information about the dataset."""
19
+
20
+ def __init__(self, path=None, postfix: str = '', img_list: list = None,
21
+ reorder: bool = False, dtype: (MedImage, MedMask) = MedImage,
22
+ max_workers: int = 1):
23
+ """Constructs MedDataset object.
24
+
25
+ Args:
26
+ path (str, optional): Path to the image folder.
27
+ postfix (str, optional): Specify the file type if there are different files in the folder.
28
+ img_list (List[str], optional): Alternatively, pass in a list with image paths.
29
+ reorder (bool, optional): Whether to reorder the data to be closest to canonical (RAS+) orientation.
30
+ dtype (Union[MedImage, MedMask], optional): Load data as datatype. Default is MedImage.
31
+ max_workers (int, optional): The number of worker threads. Default is 1.
32
+ """
33
+
34
+ self.path = path
35
+ self.postfix = postfix
36
+ self.img_list = img_list
37
+ self.reorder = reorder
38
+ self.dtype = dtype
39
+ self.max_workers = max_workers
40
+ self.df = self._create_data_frame()
41
+
42
+ def _create_data_frame(self):
43
+ """Private method that returns a dataframe with information about the dataset."""
44
+
45
+ if self.path:
46
+ self.img_list = glob.glob(f'{self.path}/*{self.postfix}*')
47
+ if not self.img_list: print('Could not find images. Check the image path')
48
+
49
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
50
+ data_info_dict = list(executor.map(self._get_data_info, self.img_list))
51
+
52
+ df = pd.DataFrame(data_info_dict)
53
+
54
+ if df.orientation.nunique() > 1:
55
+ print('The volumes in this dataset have different orientations. '
56
+ 'Recommended to pass in the argument reorder=True when creating a MedDataset object for this dataset')
57
+
58
+ return df
59
+
60
+ def summary(self):
61
+ """Summary DataFrame of the dataset with example path for similar data."""
62
+
63
+ columns = ['dim_0', 'dim_1', 'dim_2', 'voxel_0', 'voxel_1', 'voxel_2', 'orientation']
64
+
65
+ return self.df.groupby(columns, as_index=False).agg(
66
+ example_path=('path', 'min'), total=('path', 'size')
67
+ ).sort_values('total', ascending=False)
68
+
69
+ def suggestion(self):
70
+ """Voxel value that appears most often in dim_0, dim_1 and dim_2, and whether the data should be reoriented."""
71
+
72
+ resample = [self.df.voxel_0.mode()[0], self.df.voxel_1.mode()[0], self.df.voxel_2.mode()[0]]
73
+ return resample, self.reorder
74
+
75
+ def _get_data_info(self, fn: str):
76
+ """Private method to collect information about an image file."""
77
+ _, o, _ = med_img_reader(fn, dtype=self.dtype, reorder=self.reorder, only_tensor=False)
78
+
79
+ info_dict = {'path': fn, 'dim_0': o.shape[1], 'dim_1': o.shape[2], 'dim_2': o.shape[3],
80
+ 'voxel_0': round(o.spacing[0], 4), 'voxel_1': round(o.spacing[1], 4), 'voxel_2': round(o.spacing[2], 4),
81
+ 'orientation': f'{"".join(o.orientation)}+'}
82
+
83
+ if self.dtype is MedMask:
84
+ mask_labels_dict = o.count_labels()
85
+ mask_labels_dict = {f'voxel_count_{int(key)}': val for key, val in mask_labels_dict.items()}
86
+ info_dict.update(mask_labels_dict)
87
+
88
+ return info_dict
89
+
90
+ def get_largest_img_size(self, resample: list = None) -> list:
91
+ """Get the largest image size in the dataset."""
92
+
93
+ dims = None
94
+
95
+ if resample is not None:
96
+ org_voxels = self.df[["voxel_0", "voxel_1", 'voxel_2']].values
97
+ org_dims = self.df[["dim_0", "dim_1", 'dim_2']].values
98
+
99
+ ratio = org_voxels/resample
100
+ new_dims = (org_dims * ratio).T
101
+ dims = [new_dims[0].max().round(), new_dims[1].max().round(), new_dims[2].max().round()]
102
+
103
+ else:
104
+ dims = [df.dim_0.max(), df.dim_1.max(), df.dim_2.max()]
105
+
106
+ return dims
107
+
108
+ # %% ../nbs/08_dataset_info.ipynb 5
109
+ def get_class_weights(labels: (np.array, list), class_weight: str = 'balanced') -> torch.Tensor:
110
+ """Calculates and returns the class weights.
111
+
112
+ Args:
113
+ labels: An array or list of class labels for each instance in the dataset.
114
+ class_weight: Defaults to 'balanced'.
115
+
116
+ Returns:
117
+ A tensor of class weights.
118
+ """
119
+
120
+ class_weights = compute_class_weight(class_weight=class_weight, classes=np.unique(labels), y=labels)
121
+
122
+ return torch.Tensor(class_weights)
@@ -0,0 +1,300 @@
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/09_external_data.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['MURLs', 'download_ixi_data', 'download_ixi_tiny', 'download_spine_test_data', 'download_example_spine_data',
5
+ 'download_medmnist3d_dataset', 'download_example_endometrial_cancer_data']
6
+
7
+ # %% ../nbs/09_external_data.ipynb 1
8
+ from pathlib import Path
9
+ from glob import glob
10
+ from numpy import load
11
+ import pandas as pd
12
+ from monai.apps import download_url, download_and_extract
13
+ from torchio.datasets.ixi import IXITiny
14
+ from torchio import ScalarImage
15
+ import multiprocessing as mp
16
+ from functools import partial
17
+
18
+ # %% ../nbs/09_external_data.ipynb 3
19
+ class MURLs():
20
+ """A class with external medical dataset URLs."""
21
+
22
+ IXI_DATA = 'http://biomedic.doc.ic.ac.uk/brain-development/downloads/IXI/IXI-T1.tar'
23
+ IXI_DEMOGRAPHIC_INFORMATION = 'http://biomedic.doc.ic.ac.uk/brain-development/downloads/IXI/IXI.xls'
24
+ CHENGWEN_CHU_SPINE_DATA = 'https://drive.google.com/uc?id=1rbm9-KKAexpNm2mC9FsSbfnS8VJaF3Kn&confirm=t'
25
+ EXAMPLE_SPINE_DATA = 'https://drive.google.com/uc?id=1Ms3Q6MYQrQUA_PKZbJ2t2NeYFQ5jloMh'
26
+ MEDMNIST_DICT = {'OrganMNIST3D': 'https://zenodo.org/record/6496656/files/organmnist3d.npz?download=1',
27
+ 'NoduleMNIST3D': 'https://zenodo.org/record/6496656/files/nodulemnist3d.npz?download=1',
28
+ 'AdrenalMNIST3D': 'https://zenodo.org/record/6496656/files/adrenalmnist3d.npz?download=1',
29
+ 'FractureMNIST3D': 'https://zenodo.org/record/6496656/files/fracturemnist3d.npz?download=1',
30
+ 'VesselMNIST3D': 'https://zenodo.org/record/6496656/files/vesselmnist3d.npz?download=1',
31
+ 'SynapseMNIST3D': 'https://zenodo.org/record/6496656/files/synapsemnist3d.npz?download=1'}
32
+ EXAMPLE_EC_DATA = 'https://drive.google.com/uc?id=1cjOBhkdRsoX3unxHiL377R5j8ottN4An'
33
+
34
+ # %% ../nbs/09_external_data.ipynb 4
35
+ def _process_ixi_xls(xls_path: (str, Path), img_path: Path) -> pd.DataFrame:
36
+ """Private method to process the demographic information for the IXI dataset.
37
+
38
+ Args:
39
+ xls_path: File path to the xls file with the demographic information.
40
+ img_path: Folder path to the images.
41
+
42
+ Returns:
43
+ A processed dataframe with image path and demographic information.
44
+
45
+ Raises:
46
+ ValueError: If xls_path or img_path do not exist.
47
+ """
48
+
49
+ print('Preprocessing ' + str(xls_path))
50
+
51
+ df = pd.read_excel(xls_path)
52
+
53
+ duplicate_subject_ids = df[df.duplicated(['IXI_ID'], keep=False)].IXI_ID.unique()
54
+
55
+ for subject_id in duplicate_subject_ids:
56
+ age = df.loc[df.IXI_ID == subject_id].AGE.nunique()
57
+ if age != 1: df = df.loc[df.IXI_ID != subject_id] # Remove duplicates with two different age values
58
+
59
+ df = df.drop_duplicates(subset='IXI_ID', keep='first').reset_index(drop=True)
60
+
61
+ df['subject_id'] = ['IXI' + str(subject_id).zfill(3) for subject_id in df.IXI_ID.values]
62
+ df = df.rename(columns={'SEX_ID (1=m, 2=f)': 'gender'})
63
+ df['age_at_scan'] = df.AGE.round(2)
64
+ df = df.replace({'gender': {1: 'M', 2: 'F'}})
65
+
66
+ img_list = list(img_path.glob('*.nii.gz'))
67
+ for path in img_list:
68
+ subject_id = path.parts[-1].split('-')[0]
69
+ df.loc[df.subject_id == subject_id, 't1_path'] = str(path)
70
+
71
+ df = df.dropna()
72
+ df = df[['t1_path', 'subject_id', 'gender', 'age_at_scan']]
73
+
74
+ return df
75
+
76
+ # %% ../nbs/09_external_data.ipynb 6
77
+ def download_ixi_data(path: (str, Path) = '../data') -> Path:
78
+ """Download T1 scans and demographic information from the IXI dataset.
79
+
80
+ Args:
81
+ path: Path to the directory where the data will be stored. Defaults to '../data'.
82
+
83
+ Returns:
84
+ The path to the stored CSV file.
85
+ """
86
+
87
+ path = Path(path) / 'IXI'
88
+ img_path = path / 'T1_images'
89
+
90
+ # Check whether image data already present in img_path:
91
+ is_extracted = False
92
+ try:
93
+ if len(list(img_path.iterdir())) >= 581: # 581 imgs in the IXI dataset
94
+ is_extracted = True
95
+ print(f"Images already downloaded and extracted to {img_path}")
96
+ except:
97
+ is_extracted = False
98
+
99
+ if not is_extracted:
100
+ download_and_extract(url=MURLs.IXI_DATA, filepath=path / 'IXI-T1.tar', output_dir=img_path)
101
+ (path / 'IXI-T1.tar').unlink()
102
+
103
+ download_url(url=MURLs.IXI_DEMOGRAPHIC_INFORMATION, filepath=path / 'IXI.xls')
104
+
105
+ processed_df = _process_ixi_xls(xls_path=path / 'IXI.xls', img_path=img_path)
106
+ processed_df.to_csv(path / 'dataset.csv', index=False)
107
+
108
+ return path
109
+
110
+ # %% ../nbs/09_external_data.ipynb 8
111
+ def download_ixi_tiny(path: (str, Path) = '../data') -> Path:
112
+ """Download the tiny version of the IXI dataset provided by TorchIO.
113
+
114
+ Args:
115
+ path: The directory where the data will be
116
+ stored. If not provided, defaults to '../data'.
117
+
118
+ Returns:
119
+ The path to the directory where the data is stored.
120
+ """
121
+
122
+ path = Path(path) / 'IXITiny'
123
+
124
+ IXITiny(root=str(path), download=True)
125
+ download_url(url=MURLs.IXI_DEMOGRAPHIC_INFORMATION, filepath=path/'IXI.xls')
126
+
127
+ processed_df = _process_ixi_xls(xls_path=path/'IXI.xls', img_path=path/'image')
128
+ processed_df['labels'] = processed_df['t1_path'].str.replace('image','label')
129
+
130
+ processed_df.to_csv(path/'dataset.csv', index=False)
131
+
132
+ return path
133
+
134
+ # %% ../nbs/09_external_data.ipynb 10
135
+ def _create_spine_df(dir: Path) -> pd.DataFrame:
136
+ """Create a pandas DataFrame containing information about spinal images.
137
+
138
+ Args:
139
+ dir: Directory path where data (image and segmentation
140
+ mask files) are stored.
141
+
142
+ Returns:
143
+ A DataFrame containing the paths to the image files and their
144
+ corresponding mask files, the subject IDs, and a flag indicating that
145
+ these are test data.
146
+ """
147
+
148
+ img_list = glob(str(dir / 'img/*.nii.gz'))
149
+ mask_list = [str(fn).replace('img', 'seg') for fn in img_list]
150
+ subject_id_list = [fn.split('_')[-1].split('.')[0] for fn in mask_list]
151
+
152
+ test_data = {
153
+ 't2_img_path': img_list,
154
+ 't2_mask_path': mask_list,
155
+ 'subject_id': subject_id_list,
156
+ 'is_test': True,
157
+ }
158
+
159
+ return pd.DataFrame(test_data)
160
+
161
+ # %% ../nbs/09_external_data.ipynb 11
162
+ def download_spine_test_data(path: (str, Path) = '../data') -> pd.DataFrame:
163
+ """Downloads T2w scans from the study 'Fully Automatic Localization and
164
+ Segmentation of 3D Vertebral Bodies from CT/MR Images via a Learning-Based
165
+ Method' by Chu et. al.
166
+
167
+ Args:
168
+ path: Directory where the downloaded data
169
+ will be stored and extracted. Defaults to '../data'.
170
+
171
+ Returns:
172
+ Processed dataframe containing image paths, label paths, and subject IDs.
173
+ """
174
+
175
+ study = 'chengwen_chu_2015'
176
+
177
+ download_and_extract(
178
+ url=MURLs.CHENGWEN_CHU_SPINE_DATA,
179
+ filepath=f'{study}.zip',
180
+ output_dir=path
181
+ )
182
+ Path(f'{study}.zip').unlink()
183
+
184
+ return _create_spine_df(Path(path) / study)
185
+
186
+ # %% ../nbs/09_external_data.ipynb 12
187
+ def download_example_spine_data(path: (str, Path) = '../data') -> Path:
188
+ """Downloads example T2w scan and corresponding predicted mask.
189
+
190
+ Args:
191
+ path: Directory where the downloaded data
192
+ will be stored and extracted. Defaults to '../data'.
193
+
194
+ Returns:
195
+ Path to the directory where the example data has been extracted.
196
+ """
197
+
198
+ study = 'example_data'
199
+
200
+ download_and_extract(
201
+ url=MURLs.EXAMPLE_SPINE_DATA,
202
+ filepath='example_data.zip',
203
+ output_dir=path
204
+ )
205
+ Path('example_data.zip').unlink()
206
+
207
+ return Path(path) / study
208
+
209
+ # %% ../nbs/09_external_data.ipynb 14
210
+ def _process_medmnist_img(path, idx_arr):
211
+ """Save tensor as NIfTI."""
212
+
213
+ idx, arr = idx_arr
214
+ img = ScalarImage(tensor=arr[None, :])
215
+ fn = path/f'{idx}_nodule.nii.gz'
216
+ img.save(fn)
217
+ return str(fn)
218
+
219
+ # %% ../nbs/09_external_data.ipynb 15
220
+ def _df_sort_and_add_columns(df, label_list, is_val):
221
+ """Sort the dataframe based on img_idx and add labels and if it is validation data column."""
222
+
223
+ df = df.sort_values(by='img_idx').reset_index(drop=True)
224
+ df['labels'], df['is_val'] = label_list, is_val
225
+ #df = df.replace({"labels": {0:'b', 1:'m'}})
226
+ df = df.drop('img_idx', axis=1)
227
+
228
+ return df
229
+
230
+ # %% ../nbs/09_external_data.ipynb 16
231
+ def _create_nodule_df(pool, output_dir, imgs, labels, is_val=False):
232
+ """Create dataframe for MedMNIST data."""
233
+
234
+ img_path_list = pool.map(partial(_process_medmnist_img, output_dir), enumerate(imgs))
235
+ img_idx = [float(Path(fn).parts[-1].split('_')[0]) for fn in img_path_list]
236
+
237
+ df = pd.DataFrame(list(zip(img_path_list, img_idx)), columns=['img_path','img_idx'])
238
+ return _df_sort_and_add_columns(df, labels, is_val)
239
+
240
+ # %% ../nbs/09_external_data.ipynb 17
241
+ def download_medmnist3d_dataset(study: str, path: (str, Path) = '../data',
242
+ max_workers: int = 1):
243
+ """Downloads and processes a particular MedMNIST3D dataset.
244
+
245
+ Args:
246
+ study: MedMNIST dataset ('OrganMNIST3D', 'NoduleMNIST3D',
247
+ 'AdrenalMNIST3D', 'FractureMNIST3D', 'VesselMNIST3D', 'SynapseMNIST3D')
248
+ path: Directory to store and extract downloaded data. Defaults to '../data'.
249
+ max_workers: Maximum number of worker processes for data processing.
250
+ Defaults to 1.
251
+
252
+ Returns:
253
+ Two pandas DataFrames. The first DataFrame combines training and validation
254
+ data, and the second DataFrame contains the testing data.
255
+ """
256
+ path = Path(path) / study
257
+ dataset_file_path = path / f'{study}.npz'
258
+
259
+ try:
260
+ #todo: check if dataset is downloaded
261
+ download_url(url=MURLs.MEDMNIST_DICT[study], filepath=dataset_file_path)
262
+ except:
263
+ raise ValueError(f"Dataset '{study}' does not exist.")
264
+
265
+ data = load(dataset_file_path)
266
+ keys = ['train_images', 'val_images', 'test_images']
267
+
268
+ for key in keys:
269
+ (path / key).mkdir(exist_ok=True)
270
+
271
+ train_imgs = data[keys[0]]
272
+ val_imgs = data[keys[1]]
273
+ test_imgs = data[keys[2]]
274
+
275
+ with mp.Pool(processes=max_workers) as pool:
276
+ train_df = _create_nodule_df(pool, path / keys[0], train_imgs,
277
+ data['train_labels'])
278
+ val_df = _create_nodule_df(pool, path / keys[1], val_imgs,
279
+ data['val_labels'], is_val=True)
280
+ test_df = _create_nodule_df(pool, path / keys[2], test_imgs,
281
+ data['test_labels'])
282
+
283
+ train_val_df = pd.concat([train_df, val_df], ignore_index=True)
284
+
285
+ dataset_file_path.unlink()
286
+
287
+ return train_val_df, test_df
288
+
289
+ # %% ../nbs/09_external_data.ipynb 19
290
+ def download_example_endometrial_cancer_data(path: (str, Path) = '../data') -> Path:
291
+ study = 'ec'
292
+
293
+ download_and_extract(
294
+ url=MURLs.EXAMPLE_EC_DATA,
295
+ filepath='ec.zip',
296
+ output_dir=path
297
+ )
298
+ Path('ec.zip').unlink()
299
+
300
+ return Path(path) / study
@@ -9,12 +9,8 @@ import torch
9
9
  from pathlib import Path
10
10
 
11
11
  # %% ../nbs/07_utils.ipynb 3
12
- def store_variables(pkl_fn:(str, Path),
13
- size:list,
14
- reorder:bool,
15
- resample:(int,list),
16
- ) -> None:
17
- '''Save variable values in a pickle file.'''
12
+ def store_variables(pkl_fn: (str, Path), size: list, reorder: bool, resample: (int, list)):
13
+ """Save variable values in a pickle file."""
18
14
 
19
15
  var_vals = [size, reorder, resample]
20
16
 
@@ -22,19 +18,21 @@ def store_variables(pkl_fn:(str, Path),
22
18
  pickle.dump(var_vals, f)
23
19
 
24
20
  # %% ../nbs/07_utils.ipynb 4
25
- def load_variables(pkl_fn # Filename of the pickle file
26
- ):
27
- '''Load stored variable values from a pickle file.
21
+ def load_variables(pkl_fn: (str, Path)):
22
+ """Loads stored variable values from a pickle file.
28
23
 
29
- Returns: A list of variable values.
30
- '''
24
+ Args:
25
+ pkl_fn: File path of the pickle file to be loaded.
31
26
 
27
+ Returns:
28
+ The deserialized value of the pickled data.
29
+ """
32
30
  with open(pkl_fn, 'rb') as f:
33
31
  return pickle.load(f)
34
32
 
35
33
  # %% ../nbs/07_utils.ipynb 5
36
34
  def print_colab_gpu_info():
37
- '''Check if we have a GPU attached to the runtime.'''
35
+ """Check if we have a GPU attached to the runtime."""
38
36
 
39
37
  colab_gpu_msg =(f"{'#'*80}\n"
40
38
  "Remember to attach a GPU to your Colab Runtime:"