fastMONAI 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fastMONAI/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.1"
1
+ __version__ = "0.3.3"
fastMONAI/_modidx.py CHANGED
@@ -29,16 +29,18 @@ d = { 'settings': { 'branch': 'master',
29
29
  'fastMONAI/external_data.py'),
30
30
  'fastMONAI.external_data._process_ixi_xls': ( 'external_data.html#_process_ixi_xls',
31
31
  'fastMONAI/external_data.py'),
32
- 'fastMONAI.external_data._process_nodule_img': ( 'external_data.html#_process_nodule_img',
33
- 'fastMONAI/external_data.py'),
34
- 'fastMONAI.external_data.download_NoduleMNIST3D': ( 'external_data.html#download_nodulemnist3d',
35
- 'fastMONAI/external_data.py'),
32
+ 'fastMONAI.external_data._process_medmnist_img': ( 'external_data.html#_process_medmnist_img',
33
+ 'fastMONAI/external_data.py'),
34
+ 'fastMONAI.external_data.download_example_endometrial_cancer_data': ( 'external_data.html#download_example_endometrial_cancer_data',
35
+ 'fastMONAI/external_data.py'),
36
36
  'fastMONAI.external_data.download_example_spine_data': ( 'external_data.html#download_example_spine_data',
37
37
  'fastMONAI/external_data.py'),
38
38
  'fastMONAI.external_data.download_ixi_data': ( 'external_data.html#download_ixi_data',
39
39
  'fastMONAI/external_data.py'),
40
40
  'fastMONAI.external_data.download_ixi_tiny': ( 'external_data.html#download_ixi_tiny',
41
41
  'fastMONAI/external_data.py'),
42
+ 'fastMONAI.external_data.download_medmnist3d_dataset': ( 'external_data.html#download_medmnist3d_dataset',
43
+ 'fastMONAI/external_data.py'),
42
44
  'fastMONAI.external_data.download_spine_test_data': ( 'external_data.html#download_spine_test_data',
43
45
  'fastMONAI/external_data.py')},
44
46
  'fastMONAI.research_utils': { 'fastMONAI.research_utils.pred_postprocess': ( 'research_utils.html#pred_postprocess',
@@ -131,22 +133,6 @@ d = { 'settings': { 'branch': 'master',
131
133
  'fastMONAI/vision_augmentation.py'),
132
134
  'fastMONAI.vision_augmentation.ZNormalization.encodes': ( 'vision_augment.html#znormalization.encodes',
133
135
  'fastMONAI/vision_augmentation.py'),
134
- 'fastMONAI.vision_augmentation._do_rand_biasfield': ( 'vision_augment.html#_do_rand_biasfield',
135
- 'fastMONAI/vision_augmentation.py'),
136
- 'fastMONAI.vision_augmentation._do_rand_blur': ( 'vision_augment.html#_do_rand_blur',
137
- 'fastMONAI/vision_augmentation.py'),
138
- 'fastMONAI.vision_augmentation._do_rand_gamma': ( 'vision_augment.html#_do_rand_gamma',
139
- 'fastMONAI/vision_augmentation.py'),
140
- 'fastMONAI.vision_augmentation._do_rand_ghosting': ( 'vision_augment.html#_do_rand_ghosting',
141
- 'fastMONAI/vision_augmentation.py'),
142
- 'fastMONAI.vision_augmentation._do_rand_motion': ( 'vision_augment.html#_do_rand_motion',
143
- 'fastMONAI/vision_augmentation.py'),
144
- 'fastMONAI.vision_augmentation._do_rand_noise': ( 'vision_augment.html#_do_rand_noise',
145
- 'fastMONAI/vision_augmentation.py'),
146
- 'fastMONAI.vision_augmentation._do_rand_spike': ( 'vision_augment.html#_do_rand_spike',
147
- 'fastMONAI/vision_augmentation.py'),
148
- 'fastMONAI.vision_augmentation._do_z_normalization': ( 'vision_augment.html#_do_z_normalization',
149
- 'fastMONAI/vision_augmentation.py'),
150
136
  'fastMONAI.vision_augmentation.do_pad_or_crop': ( 'vision_augment.html#do_pad_or_crop',
151
137
  'fastMONAI/vision_augmentation.py')},
152
138
  'fastMONAI.vision_core': { 'fastMONAI.vision_core.MedBase': ('vision_core.html#medbase', 'fastMONAI/vision_core.py'),
fastMONAI/dataset_info.py CHANGED
@@ -14,18 +14,23 @@ import torch
14
14
  import glob
15
15
 
16
16
  # %% ../nbs/08_dataset_info.ipynb 4
17
- class MedDataset():
18
- '''A class to extract and present information about the dataset.'''
19
-
20
- def __init__(self, path=None, # Path to the image folder
21
- postfix:str='', # Specify the file type if there are different files in the folder
22
- img_list:list=None, # Alternatively pass in a list with image paths
23
- reorder:bool=False, # Whether to reorder the data to be closest to canonical (RAS+) orientation
24
- dtype:(MedImage, MedMask)=MedImage, # Load data as datatype
25
- max_workers:int=1 # The number of worker threads
26
- ):
27
- '''Constructs all the necessary attributes for the MedDataset object.'''
17
+ class MedDataset:
18
+ """A class to extract and present information about the dataset."""
28
19
 
20
+ def __init__(self, path=None, postfix: str = '', img_list: list = None,
21
+ reorder: bool = False, dtype: (MedImage, MedMask) = MedImage,
22
+ max_workers: int = 1):
23
+ """Constructs MedDataset object.
24
+
25
+ Args:
26
+ path (str, optional): Path to the image folder.
27
+ postfix (str, optional): Specify the file type if there are different files in the folder.
28
+ img_list (List[str], optional): Alternatively, pass in a list with image paths.
29
+ reorder (bool, optional): Whether to reorder the data to be closest to canonical (RAS+) orientation.
30
+ dtype (Union[MedImage, MedMask], optional): Load data as datatype. Default is MedImage.
31
+ max_workers (int, optional): The number of worker threads. Default is 1.
32
+ """
33
+
29
34
  self.path = path
30
35
  self.postfix = postfix
31
36
  self.img_list = img_list
@@ -35,48 +40,43 @@ class MedDataset():
35
40
  self.df = self._create_data_frame()
36
41
 
37
42
  def _create_data_frame(self):
38
- '''Private method that returns a dataframe with information about the dataset
39
-
40
- Returns:
41
- DataFrame: A DataFrame with information about the dataset.
42
- '''
43
+ """Private method that returns a dataframe with information about the dataset."""
43
44
 
44
45
  if self.path:
45
46
  self.img_list = glob.glob(f'{self.path}/*{self.postfix}*')
46
47
  if not self.img_list: print('Could not find images. Check the image path')
47
-
48
+
48
49
  with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
49
50
  data_info_dict = list(executor.map(self._get_data_info, self.img_list))
50
-
51
+
51
52
  df = pd.DataFrame(data_info_dict)
52
- if df.orientation.nunique() > 1: print('The volumes in this dataset have different orientations. Recommended to pass in the argument reorder=True when creating a MedDataset object for this dataset')
53
+
54
+ if df.orientation.nunique() > 1:
55
+ print('The volumes in this dataset have different orientations. '
56
+ 'Recommended to pass in the argument reorder=True when creating a MedDataset object for this dataset')
57
+
53
58
  return df
54
59
 
55
60
  def summary(self):
56
- '''Summary DataFrame of the dataset with example path for similar data.'''
57
-
61
+ """Summary DataFrame of the dataset with example path for similar data."""
62
+
58
63
  columns = ['dim_0', 'dim_1', 'dim_2', 'voxel_0', 'voxel_1', 'voxel_2', 'orientation']
59
- return self.df.groupby(columns,as_index=False).agg(example_path=('path', 'min'), total=('path', 'size')).sort_values('total', ascending=False)
64
+
65
+ return self.df.groupby(columns, as_index=False).agg(
66
+ example_path=('path', 'min'), total=('path', 'size')
67
+ ).sort_values('total', ascending=False)
60
68
 
61
69
  def suggestion(self):
62
- '''Voxel value that appears most often in dim_0, dim_1 and dim_2, and wheter the data should be reoriented.'''
70
+ """Voxel value that appears most often in dim_0, dim_1 and dim_2, and whether the data should be reoriented."""
71
+
63
72
  resample = [self.df.voxel_0.mode()[0], self.df.voxel_1.mode()[0], self.df.voxel_2.mode()[0]]
64
-
65
73
  return resample, self.reorder
66
74
 
67
- def _get_data_info(self, fn:str):
68
- '''Private method to collect information about an image file.
75
+ def _get_data_info(self, fn: str):
76
+ """Private method to collect information about an image file."""
77
+ _, o, _ = med_img_reader(fn, dtype=self.dtype, reorder=self.reorder, only_tensor=False)
69
78
 
70
- Args:
71
- fn: Image file path.
72
-
73
- Returns:
74
- dict: A dictionary with information about the image file
75
- '''
76
-
77
- _,o,_ = med_img_reader(fn, dtype=self.dtype, reorder=self.reorder, only_tensor=False)
78
-
79
- info_dict = {'path': fn, 'dim_0': o.shape[1], 'dim_1': o.shape[2], 'dim_2' :o.shape[3],
79
+ info_dict = {'path': fn, 'dim_0': o.shape[1], 'dim_1': o.shape[2], 'dim_2': o.shape[3],
80
80
  'voxel_0': round(o.spacing[0], 4), 'voxel_1': round(o.spacing[1], 4), 'voxel_2': round(o.spacing[2], 4),
81
81
  'orientation': f'{"".join(o.orientation)}+'}
82
82
 
@@ -87,28 +87,36 @@ class MedDataset():
87
87
 
88
88
  return info_dict
89
89
 
90
- def get_largest_img_size(self,
91
- resample:list=None # A list with voxel spacing [dim_0, dim_1, dim_2]
92
- ) -> list:
93
- '''Get the largest image size in the dataset.'''
94
- dims = None
90
+ def get_largest_img_size(self, resample: list = None) -> list:
91
+ """Get the largest image size in the dataset."""
95
92
 
96
- if resample is not None:
97
-
93
+ dims = None
94
+
95
+ if resample is not None:
98
96
  org_voxels = self.df[["voxel_0", "voxel_1", 'voxel_2']].values
99
97
  org_dims = self.df[["dim_0", "dim_1", 'dim_2']].values
100
-
98
+
101
99
  ratio = org_voxels/resample
102
100
  new_dims = (org_dims * ratio).T
103
101
  dims = [new_dims[0].max().round(), new_dims[1].max().round(), new_dims[2].max().round()]
104
-
105
- else: dims = [df.dim_0.max(), df.dim_1.max(), df.dim_2.max()]
106
-
102
+
103
+ else:
104
+ dims = [df.dim_0.max(), df.dim_1.max(), df.dim_2.max()]
105
+
107
106
  return dims
108
107
 
109
108
  # %% ../nbs/08_dataset_info.ipynb 5
110
- def get_class_weights(train_labels:(np.array, list), class_weight='balanced'):
111
- '''calculate class weights.'''
109
+ def get_class_weights(labels: (np.array, list), class_weight: str = 'balanced') -> torch.Tensor:
110
+ """Calculates and returns the class weights.
111
+
112
+ Args:
113
+ labels: An array or list of class labels for each instance in the dataset.
114
+ class_weight: Defaults to 'balanced'.
115
+
116
+ Returns:
117
+ A tensor of class weights.
118
+ """
119
+
120
+ class_weights = compute_class_weight(class_weight=class_weight, classes=np.unique(labels), y=labels)
112
121
 
113
- class_weights = compute_class_weight(class_weight=class_weight, classes=np.unique(train_labels), y=train_labels)
114
122
  return torch.Tensor(class_weights)
@@ -2,9 +2,9 @@
2
2
 
3
3
  # %% auto 0
4
4
  __all__ = ['MURLs', 'download_ixi_data', 'download_ixi_tiny', 'download_spine_test_data', 'download_example_spine_data',
5
- 'download_NoduleMNIST3D']
5
+ 'download_medmnist3d_dataset', 'download_example_endometrial_cancer_data']
6
6
 
7
- # %% ../nbs/09_external_data.ipynb 2
7
+ # %% ../nbs/09_external_data.ipynb 1
8
8
  from pathlib import Path
9
9
  from glob import glob
10
10
  from numpy import load
@@ -15,27 +15,36 @@ from torchio import ScalarImage
15
15
  import multiprocessing as mp
16
16
  from functools import partial
17
17
 
18
- # %% ../nbs/09_external_data.ipynb 4
18
+ # %% ../nbs/09_external_data.ipynb 3
19
19
  class MURLs():
20
- '''A class with external medical dataset URLs.'''
20
+ """A class with external medical dataset URLs."""
21
21
 
22
22
  IXI_DATA = 'http://biomedic.doc.ic.ac.uk/brain-development/downloads/IXI/IXI-T1.tar'
23
23
  IXI_DEMOGRAPHIC_INFORMATION = 'http://biomedic.doc.ic.ac.uk/brain-development/downloads/IXI/IXI.xls'
24
24
  CHENGWEN_CHU_SPINE_DATA = 'https://drive.google.com/uc?id=1rbm9-KKAexpNm2mC9FsSbfnS8VJaF3Kn&confirm=t'
25
25
  EXAMPLE_SPINE_DATA = 'https://drive.google.com/uc?id=1Ms3Q6MYQrQUA_PKZbJ2t2NeYFQ5jloMh'
26
- NODULE_MNIST_DATA = 'https://zenodo.org/record/6496656/files/nodulemnist3d.npz?download=1'
26
+ MEDMNIST_DICT = {'OrganMNIST3D': 'https://zenodo.org/record/6496656/files/organmnist3d.npz?download=1',
27
+ 'NoduleMNIST3D': 'https://zenodo.org/record/6496656/files/nodulemnist3d.npz?download=1',
28
+ 'AdrenalMNIST3D': 'https://zenodo.org/record/6496656/files/adrenalmnist3d.npz?download=1',
29
+ 'FractureMNIST3D': 'https://zenodo.org/record/6496656/files/fracturemnist3d.npz?download=1',
30
+ 'VesselMNIST3D': 'https://zenodo.org/record/6496656/files/vesselmnist3d.npz?download=1',
31
+ 'SynapseMNIST3D': 'https://zenodo.org/record/6496656/files/synapsemnist3d.npz?download=1'}
32
+ EXAMPLE_EC_DATA = 'https://drive.google.com/uc?id=1cjOBhkdRsoX3unxHiL377R5j8ottN4An'
27
33
 
28
- # %% ../nbs/09_external_data.ipynb 5
29
- def _process_ixi_xls(xls_path:(str, Path), img_path: Path):
30
- '''Private method to process the demographic information for the IXI dataset.
34
+ # %% ../nbs/09_external_data.ipynb 4
35
+ def _process_ixi_xls(xls_path: (str, Path), img_path: Path) -> pd.DataFrame:
36
+ """Private method to process the demographic information for the IXI dataset.
31
37
 
32
38
  Args:
33
39
  xls_path: File path to the xls file with the demographic information.
34
- img_path: Folder path to the images
40
+ img_path: Folder path to the images.
35
41
 
36
42
  Returns:
37
- DataFrame: A processed dataframe with image path and demographic information.
38
- '''
43
+ A processed dataframe with image path and demographic information.
44
+
45
+ Raises:
46
+ ValueError: If xls_path or img_path do not exist.
47
+ """
39
48
 
40
49
  print('Preprocessing ' + str(xls_path))
41
50
 
@@ -45,14 +54,14 @@ def _process_ixi_xls(xls_path:(str, Path), img_path: Path):
45
54
 
46
55
  for subject_id in duplicate_subject_ids:
47
56
  age = df.loc[df.IXI_ID == subject_id].AGE.nunique()
48
- if age != 1: df = df.loc[df.IXI_ID != subject_id] #Remove duplicates with two different age values
57
+ if age != 1: df = df.loc[df.IXI_ID != subject_id] # Remove duplicates with two different age values
49
58
 
50
59
  df = df.drop_duplicates(subset='IXI_ID', keep='first').reset_index(drop=True)
51
60
 
52
61
  df['subject_id'] = ['IXI' + str(subject_id).zfill(3) for subject_id in df.IXI_ID.values]
53
62
  df = df.rename(columns={'SEX_ID (1=m, 2=f)': 'gender'})
54
63
  df['age_at_scan'] = df.AGE.round(2)
55
- df = df.replace({'gender': {1:'M', 2:'F'}})
64
+ df = df.replace({'gender': {1: 'M', 2: 'F'}})
56
65
 
57
66
  img_list = list(img_path.glob('*.nii.gz'))
58
67
  for path in img_list:
@@ -61,50 +70,58 @@ def _process_ixi_xls(xls_path:(str, Path), img_path: Path):
61
70
 
62
71
  df = df.dropna()
63
72
  df = df[['t1_path', 'subject_id', 'gender', 'age_at_scan']]
73
+
64
74
  return df
65
75
 
66
- # %% ../nbs/09_external_data.ipynb 7
67
- def download_ixi_data(path:(str, Path)='../data' # Path to the directory where the data will be stored
68
- ):
69
- '''Download T1 scans and demographic information from the IXI dataset, then process the demographic
70
- information for each subject and save the information as a CSV file.
71
- Returns path to the stored CSV file.
72
- '''
73
- path = Path(path)/'IXI'
74
- img_path = path/'T1_images'
76
+ # %% ../nbs/09_external_data.ipynb 6
77
+ def download_ixi_data(path: (str, Path) = '../data') -> Path:
78
+ """Download T1 scans and demographic information from the IXI dataset.
79
+
80
+ Args:
81
+ path: Path to the directory where the data will be stored. Defaults to '../data'.
82
+
83
+ Returns:
84
+ The path to the stored CSV file.
85
+ """
86
+
87
+ path = Path(path) / 'IXI'
88
+ img_path = path / 'T1_images'
75
89
 
76
90
  # Check whether image data already present in img_path:
77
- is_extracted=False
91
+ is_extracted = False
78
92
  try:
79
- if len(list(img_path.iterdir())) >= 581: # 581 imgs in the IXI dataset
80
- is_extracted=True
93
+ if len(list(img_path.iterdir())) >= 581: # 581 imgs in the IXI dataset
94
+ is_extracted = True
81
95
  print(f"Images already downloaded and extracted to {img_path}")
82
96
  except:
83
- is_extracted=False
84
-
85
- # Download and extract images
86
- if not is_extracted:
87
- download_and_extract(url=MURLs.IXI_DATA, filepath=path/'IXI-T1.tar', output_dir=img_path)
88
- (path/'IXI-T1.tar').unlink()
97
+ is_extracted = False
89
98
 
99
+ if not is_extracted:
100
+ download_and_extract(url=MURLs.IXI_DATA, filepath=path / 'IXI-T1.tar', output_dir=img_path)
101
+ (path / 'IXI-T1.tar').unlink()
90
102
 
91
- # Download demographic info
92
- download_url(url=MURLs.IXI_DEMOGRAPHIC_INFORMATION, filepath=path/'IXI.xls')
103
+ download_url(url=MURLs.IXI_DEMOGRAPHIC_INFORMATION, filepath=path / 'IXI.xls')
93
104
 
94
- processed_df = _process_ixi_xls(xls_path=path/'IXI.xls', img_path=img_path)
95
- processed_df.to_csv(path/'dataset.csv',index=False)
105
+ processed_df = _process_ixi_xls(xls_path=path / 'IXI.xls', img_path=img_path)
106
+ processed_df.to_csv(path / 'dataset.csv', index=False)
96
107
 
97
108
  return path
98
109
 
99
- # %% ../nbs/09_external_data.ipynb 9
100
- def download_ixi_tiny(path:(str, Path)='../data'):
101
- ''' Download tiny version of IXI provided by TorchIO, containing 566 T1 brain MR scans and their corresponding brain segmentations.'''
110
+ # %% ../nbs/09_external_data.ipynb 8
111
+ def download_ixi_tiny(path: (str, Path) = '../data') -> Path:
112
+ """Download the tiny version of the IXI dataset provided by TorchIO.
113
+
114
+ Args:
115
+ path: The directory where the data will be
116
+ stored. If not provided, defaults to '../data'.
117
+
118
+ Returns:
119
+ The path to the directory where the data is stored.
120
+ """
102
121
 
103
- path = Path(path)/'IXITiny'
122
+ path = Path(path) / 'IXITiny'
104
123
 
105
- #Download MR scans and segmentation masks
106
124
  IXITiny(root=str(path), download=True)
107
- # Download demographic info
108
125
  download_url(url=MURLs.IXI_DEMOGRAPHIC_INFORMATION, filepath=path/'IXI.xls')
109
126
 
110
127
  processed_df = _process_ixi_xls(xls_path=path/'IXI.xls', img_path=path/'image')
@@ -115,96 +132,169 @@ def download_ixi_tiny(path:(str, Path)='../data'):
115
132
  return path
116
133
 
117
134
  # %% ../nbs/09_external_data.ipynb 10
118
- def _create_spine_df(test_dir:Path):
119
- # Get a list of the image files in the 'img' directory
120
- img_list = glob(str(test_dir/'img/*.nii.gz'))
135
+ def _create_spine_df(dir: Path) -> pd.DataFrame:
136
+ """Create a pandas DataFrame containing information about spinal images.
121
137
 
122
- # Create a list of the corresponding mask files in the 'seg' directory
123
- mask_list = [str(fn).replace('img', 'seg') for fn in img_list]
138
+ Args:
139
+ dir: Directory path where data (image and segmentation
140
+ mask files) are stored.
124
141
 
125
- # Create a list of the subject IDs for each image file
142
+ Returns:
143
+ A DataFrame containing the paths to the image files and their
144
+ corresponding mask files, the subject IDs, and a flag indicating that
145
+ these are test data.
146
+ """
147
+
148
+ img_list = glob(str(dir / 'img/*.nii.gz'))
149
+ mask_list = [str(fn).replace('img', 'seg') for fn in img_list]
126
150
  subject_id_list = [fn.split('_')[-1].split('.')[0] for fn in mask_list]
127
151
 
128
- # Create a dictionary containing the test data
129
- test_data = {'t2_img_path':img_list, 't2_mask_path':mask_list, 'subject_id':subject_id_list, 'is_test':True}
152
+ test_data = {
153
+ 't2_img_path': img_list,
154
+ 't2_mask_path': mask_list,
155
+ 'subject_id': subject_id_list,
156
+ 'is_test': True,
157
+ }
130
158
 
131
- # Create a DataFrame from the example data dictionary
132
159
  return pd.DataFrame(test_data)
133
160
 
134
- # %% ../nbs/09_external_data.ipynb 12
135
- def download_spine_test_data(path:(str, Path)='../data'):
161
+ # %% ../nbs/09_external_data.ipynb 11
162
+ def download_spine_test_data(path: (str, Path) = '../data') -> pd.DataFrame:
163
+ """Downloads T2w scans from the study 'Fully Automatic Localization and
164
+ Segmentation of 3D Vertebral Bodies from CT/MR Images via a Learning-Based
165
+ Method' by Chu et. al.
166
+
167
+ Args:
168
+ path: Directory where the downloaded data
169
+ will be stored and extracted. Defaults to '../data'.
170
+
171
+ Returns:
172
+ Processed dataframe containing image paths, label paths, and subject IDs.
173
+ """
136
174
 
137
- ''' Download T2w scans from 'Fully Automatic Localization and Segmentation of 3D Vertebral Bodies from CT/MR Images via a Learning-Based Method' study by Chu et. al.
138
- Returns a processed dataframe with image path, label path and subject IDs.
139
- '''
140
175
  study = 'chengwen_chu_2015'
141
176
 
142
- download_and_extract(url=MURLs.CHENGWEN_CHU_SPINE_DATA, filepath=f'{study}.zip', output_dir=path)
177
+ download_and_extract(
178
+ url=MURLs.CHENGWEN_CHU_SPINE_DATA,
179
+ filepath=f'{study}.zip',
180
+ output_dir=path
181
+ )
143
182
  Path(f'{study}.zip').unlink()
144
183
 
145
- return _create_spine_df(Path(path)/study)
184
+ return _create_spine_df(Path(path) / study)
146
185
 
147
- # %% ../nbs/09_external_data.ipynb 13
148
- def download_example_spine_data(path:(str, Path)='../data'):
186
+ # %% ../nbs/09_external_data.ipynb 12
187
+ def download_example_spine_data(path: (str, Path) = '../data') -> Path:
188
+ """Downloads example T2w scan and corresponding predicted mask.
189
+
190
+ Args:
191
+ path: Directory where the downloaded data
192
+ will be stored and extracted. Defaults to '../data'.
193
+
194
+ Returns:
195
+ Path to the directory where the example data has been extracted.
196
+ """
149
197
 
150
- '''Download example T2w scan and predicted mask.'''
151
198
  study = 'example_data'
152
199
 
153
- download_and_extract(url=MURLs.EXAMPLE_SPINE_DATA, filepath='example_data.zip', output_dir=path);
200
+ download_and_extract(
201
+ url=MURLs.EXAMPLE_SPINE_DATA,
202
+ filepath='example_data.zip',
203
+ output_dir=path
204
+ )
154
205
  Path('example_data.zip').unlink()
155
206
 
156
- return Path(path/study)
207
+ return Path(path) / study
157
208
 
158
- # %% ../nbs/09_external_data.ipynb 15
159
- def _process_nodule_img(path, idx_arr):
160
- '''Save tensor as NIfTI.'''
209
+ # %% ../nbs/09_external_data.ipynb 14
210
+ def _process_medmnist_img(path, idx_arr):
211
+ """Save tensor as NIfTI."""
212
+
161
213
  idx, arr = idx_arr
162
214
  img = ScalarImage(tensor=arr[None, :])
163
215
  fn = path/f'{idx}_nodule.nii.gz'
164
216
  img.save(fn)
165
217
  return str(fn)
166
218
 
167
- # %% ../nbs/09_external_data.ipynb 16
219
+ # %% ../nbs/09_external_data.ipynb 15
168
220
  def _df_sort_and_add_columns(df, label_list, is_val):
169
- '''Sort the dataframe based on img_idx and add labels and if it is validation data column'''
221
+ """Sort the dataframe based on img_idx and add labels and if it is validation data column."""
222
+
170
223
  df = df.sort_values(by='img_idx').reset_index(drop=True)
171
224
  df['labels'], df['is_val'] = label_list, is_val
172
- df = df.replace({"labels": {0:'b', 1:'m'}})
225
+ #df = df.replace({"labels": {0:'b', 1:'m'}})
173
226
  df = df.drop('img_idx', axis=1)
174
227
 
175
228
  return df
176
229
 
177
- # %% ../nbs/09_external_data.ipynb 17
230
+ # %% ../nbs/09_external_data.ipynb 16
178
231
  def _create_nodule_df(pool, output_dir, imgs, labels, is_val=False):
179
- '''Create dataframe for NoduleMNIST3D data.'''
180
- img_path_list = pool.map(partial(_process_nodule_img, output_dir), enumerate(imgs))
232
+ """Create dataframe for MedMNIST data."""
233
+
234
+ img_path_list = pool.map(partial(_process_medmnist_img, output_dir), enumerate(imgs))
181
235
  img_idx = [float(Path(fn).parts[-1].split('_')[0]) for fn in img_path_list]
182
236
 
183
237
  df = pd.DataFrame(list(zip(img_path_list, img_idx)), columns=['img_path','img_idx'])
184
238
  return _df_sort_and_add_columns(df, labels, is_val)
185
239
 
186
- # %% ../nbs/09_external_data.ipynb 18
187
- def download_NoduleMNIST3D(path:(str, Path)='../data', max_workers=1):
188
-
189
- '''Download ....'''
190
- study = 'NoduleMNIST3D'
191
- path = Path(path)/study
192
-
193
- download_url(url=MURLs.NODULE_MNIST_DATA, filepath=path/f'{study}.npz');
194
- data = load(path/f'{study}.npz')
195
- key_fn = ['train_images', 'val_images', 'test_images']
196
- for fn in key_fn: (path/fn).mkdir(exist_ok=True)
197
-
198
-
199
- train_imgs, val_imgs, test_imgs = data[key_fn[0]], data[key_fn[1]], data[key_fn[2]]
240
+ # %% ../nbs/09_external_data.ipynb 17
241
+ def download_medmnist3d_dataset(study: str, path: (str, Path) = '../data',
242
+ max_workers: int = 1):
243
+ """Downloads and processes a particular MedMNIST3D dataset.
200
244
 
245
+ Args:
246
+ study: MedMNIST dataset ('OrganMNIST3D', 'NoduleMNIST3D',
247
+ 'AdrenalMNIST3D', 'FractureMNIST3D', 'VesselMNIST3D', 'SynapseMNIST3D')
248
+ path: Directory to store and extract downloaded data. Defaults to '../data'.
249
+ max_workers: Maximum number of worker processes for data processing.
250
+ Defaults to 1.
251
+
252
+ Returns:
253
+ Two pandas DataFrames. The first DataFrame combines training and validation
254
+ data, and the second DataFrame contains the testing data.
255
+ """
256
+ path = Path(path) / study
257
+ dataset_file_path = path / f'{study}.npz'
258
+
259
+ try:
260
+ #todo: check if dataset is downloaded
261
+ download_url(url=MURLs.MEDMNIST_DICT[study], filepath=dataset_file_path)
262
+ except:
263
+ raise ValueError(f"Dataset '{study}' does not exist.")
264
+
265
+ data = load(dataset_file_path)
266
+ keys = ['train_images', 'val_images', 'test_images']
267
+
268
+ for key in keys:
269
+ (path / key).mkdir(exist_ok=True)
270
+
271
+ train_imgs = data[keys[0]]
272
+ val_imgs = data[keys[1]]
273
+ test_imgs = data[keys[2]]
201
274
 
202
275
  with mp.Pool(processes=max_workers) as pool:
203
-
204
- train_df = _create_nodule_df(pool, path/key_fn[0], train_imgs, data['train_labels'])
205
- val_df = _create_nodule_df(pool, path/key_fn[1], val_imgs, data['val_labels'], is_val=True)
206
- test_df = _create_nodule_df(pool, path/key_fn[2], test_imgs, data['test_labels'])
207
-
276
+ train_df = _create_nodule_df(pool, path / keys[0], train_imgs,
277
+ data['train_labels'])
278
+ val_df = _create_nodule_df(pool, path / keys[1], val_imgs,
279
+ data['val_labels'], is_val=True)
280
+ test_df = _create_nodule_df(pool, path / keys[2], test_imgs,
281
+ data['test_labels'])
282
+
208
283
  train_val_df = pd.concat([train_df, val_df], ignore_index=True)
209
-
284
+
285
+ dataset_file_path.unlink()
286
+
210
287
  return train_val_df, test_df
288
+
289
+ # %% ../nbs/09_external_data.ipynb 19
290
+ def download_example_endometrial_cancer_data(path: (str, Path) = '../data') -> Path:
291
+ study = 'ec'
292
+
293
+ download_and_extract(
294
+ url=MURLs.EXAMPLE_EC_DATA,
295
+ filepath='ec.zip',
296
+ output_dir=path
297
+ )
298
+ Path('ec.zip').unlink()
299
+
300
+ return Path(path) / study
fastMONAI/utils.py CHANGED
@@ -9,12 +9,8 @@ import torch
9
9
  from pathlib import Path
10
10
 
11
11
  # %% ../nbs/07_utils.ipynb 3
12
- def store_variables(pkl_fn:(str, Path),
13
- size:list,
14
- reorder:bool,
15
- resample:(int,list),
16
- ) -> None:
17
- '''Save variable values in a pickle file.'''
12
+ def store_variables(pkl_fn: (str, Path), size: list, reorder: bool, resample: (int, list)):
13
+ """Save variable values in a pickle file."""
18
14
 
19
15
  var_vals = [size, reorder, resample]
20
16
 
@@ -22,19 +18,21 @@ def store_variables(pkl_fn:(str, Path),
22
18
  pickle.dump(var_vals, f)
23
19
 
24
20
  # %% ../nbs/07_utils.ipynb 4
25
- def load_variables(pkl_fn # Filename of the pickle file
26
- ):
27
- '''Load stored variable values from a pickle file.
21
+ def load_variables(pkl_fn: (str, Path)):
22
+ """Loads stored variable values from a pickle file.
28
23
 
29
- Returns: A list of variable values.
30
- '''
24
+ Args:
25
+ pkl_fn: File path of the pickle file to be loaded.
31
26
 
27
+ Returns:
28
+ The deserialized value of the pickled data.
29
+ """
32
30
  with open(pkl_fn, 'rb') as f:
33
31
  return pickle.load(f)
34
32
 
35
33
  # %% ../nbs/07_utils.ipynb 5
36
34
  def print_colab_gpu_info():
37
- '''Check if we have a GPU attached to the runtime.'''
35
+ """Check if we have a GPU attached to the runtime."""
38
36
 
39
37
  colab_gpu_msg =(f"{'#'*80}\n"
40
38
  "Remember to attach a GPU to your Colab Runtime:"