mediml 0.9.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. MEDiml/MEDscan.py +1696 -0
  2. MEDiml/__init__.py +21 -0
  3. MEDiml/biomarkers/BatchExtractor.py +806 -0
  4. MEDiml/biomarkers/BatchExtractorTexturalFilters.py +840 -0
  5. MEDiml/biomarkers/__init__.py +16 -0
  6. MEDiml/biomarkers/diagnostics.py +125 -0
  7. MEDiml/biomarkers/get_oriented_bound_box.py +158 -0
  8. MEDiml/biomarkers/glcm.py +1602 -0
  9. MEDiml/biomarkers/gldzm.py +523 -0
  10. MEDiml/biomarkers/glrlm.py +1315 -0
  11. MEDiml/biomarkers/glszm.py +555 -0
  12. MEDiml/biomarkers/int_vol_hist.py +527 -0
  13. MEDiml/biomarkers/intensity_histogram.py +615 -0
  14. MEDiml/biomarkers/local_intensity.py +89 -0
  15. MEDiml/biomarkers/morph.py +1756 -0
  16. MEDiml/biomarkers/ngldm.py +780 -0
  17. MEDiml/biomarkers/ngtdm.py +414 -0
  18. MEDiml/biomarkers/stats.py +373 -0
  19. MEDiml/biomarkers/utils.py +389 -0
  20. MEDiml/filters/TexturalFilter.py +299 -0
  21. MEDiml/filters/__init__.py +9 -0
  22. MEDiml/filters/apply_filter.py +134 -0
  23. MEDiml/filters/gabor.py +215 -0
  24. MEDiml/filters/laws.py +283 -0
  25. MEDiml/filters/log.py +147 -0
  26. MEDiml/filters/mean.py +121 -0
  27. MEDiml/filters/textural_filters_kernels.py +1738 -0
  28. MEDiml/filters/utils.py +107 -0
  29. MEDiml/filters/wavelet.py +237 -0
  30. MEDiml/learning/DataCleaner.py +198 -0
  31. MEDiml/learning/DesignExperiment.py +480 -0
  32. MEDiml/learning/FSR.py +667 -0
  33. MEDiml/learning/Normalization.py +112 -0
  34. MEDiml/learning/RadiomicsLearner.py +714 -0
  35. MEDiml/learning/Results.py +2237 -0
  36. MEDiml/learning/Stats.py +694 -0
  37. MEDiml/learning/__init__.py +10 -0
  38. MEDiml/learning/cleaning_utils.py +107 -0
  39. MEDiml/learning/ml_utils.py +1015 -0
  40. MEDiml/processing/__init__.py +6 -0
  41. MEDiml/processing/compute_suv_map.py +121 -0
  42. MEDiml/processing/discretisation.py +149 -0
  43. MEDiml/processing/interpolation.py +275 -0
  44. MEDiml/processing/resegmentation.py +66 -0
  45. MEDiml/processing/segmentation.py +912 -0
  46. MEDiml/utils/__init__.py +25 -0
  47. MEDiml/utils/batch_patients.py +45 -0
  48. MEDiml/utils/create_radiomics_table.py +131 -0
  49. MEDiml/utils/data_frame_export.py +42 -0
  50. MEDiml/utils/find_process_names.py +16 -0
  51. MEDiml/utils/get_file_paths.py +34 -0
  52. MEDiml/utils/get_full_rad_names.py +21 -0
  53. MEDiml/utils/get_institutions_from_ids.py +16 -0
  54. MEDiml/utils/get_patient_id_from_scan_name.py +22 -0
  55. MEDiml/utils/get_patient_names.py +26 -0
  56. MEDiml/utils/get_radiomic_names.py +27 -0
  57. MEDiml/utils/get_scan_name_from_rad_name.py +22 -0
  58. MEDiml/utils/image_reader_SITK.py +37 -0
  59. MEDiml/utils/image_volume_obj.py +22 -0
  60. MEDiml/utils/imref.py +340 -0
  61. MEDiml/utils/initialize_features_names.py +62 -0
  62. MEDiml/utils/inpolygon.py +159 -0
  63. MEDiml/utils/interp3.py +43 -0
  64. MEDiml/utils/json_utils.py +78 -0
  65. MEDiml/utils/mode.py +31 -0
  66. MEDiml/utils/parse_contour_string.py +58 -0
  67. MEDiml/utils/save_MEDscan.py +30 -0
  68. MEDiml/utils/strfind.py +32 -0
  69. MEDiml/utils/textureTools.py +188 -0
  70. MEDiml/utils/texture_features_names.py +115 -0
  71. MEDiml/utils/write_radiomics_csv.py +47 -0
  72. MEDiml/wrangling/DataManager.py +1724 -0
  73. MEDiml/wrangling/ProcessDICOM.py +512 -0
  74. MEDiml/wrangling/__init__.py +3 -0
  75. mediml-0.9.9.dist-info/LICENSE.md +674 -0
  76. mediml-0.9.9.dist-info/METADATA +232 -0
  77. mediml-0.9.9.dist-info/RECORD +78 -0
  78. mediml-0.9.9.dist-info/WHEEL +4 -0
@@ -0,0 +1,25 @@
1
+ from . import *
2
+ from .batch_patients import *
3
+ from .create_radiomics_table import *
4
+ from .data_frame_export import *
5
+ from .find_process_names import *
6
+ from .get_file_paths import *
7
+ from .get_full_rad_names import *
8
+ from .get_institutions_from_ids import *
9
+ from .get_patient_id_from_scan_name import *
10
+ from .get_patient_names import *
11
+ from .get_radiomic_names import *
12
+ from .get_scan_name_from_rad_name import *
13
+ from .image_reader_SITK import *
14
+ from .image_volume_obj import *
15
+ from .imref import *
16
+ from .initialize_features_names import *
17
+ from .inpolygon import *
18
+ from .interp3 import *
19
+ from .json_utils import *
20
+ from .mode import *
21
+ from .parse_contour_string import *
22
+ from .save_MEDscan import *
23
+ from .strfind import *
24
+ from .textureTools import *
25
+ from .write_radiomics_csv import *
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import numpy as np
5
+
6
+
7
+ def batch_patients(n_patient: int,
8
+ n_batch: int) -> np.ndarray:
9
+ """Replaces volume intensities outside the ROI with NaN.
10
+
11
+ Args:
12
+ n_patient (int): Number of patient.
13
+ n_batch (int): Number of batch, usually less or equal to the cores number on your machine.
14
+
15
+ Returns:
16
+ ndarray: List of indexes with size n_batch and max value n_patient.
17
+ """
18
+
19
+ # FIND THE NUMBER OF PATIENTS IN EACH BATCH
20
+ patients = [0] * n_batch # np.zeros(n_batch, dtype=int)
21
+ patient_vect = np.random.permutation(n_patient) # To randomize stuff a bit.
22
+ if n_batch:
23
+ n_p = n_patient / n_batch
24
+ n_sup = np.ceil(n_p).astype(int)
25
+ n_inf = np.floor(n_p).astype(int)
26
+ if n_sup != n_inf:
27
+ n_sub_inf = n_batch - 1
28
+ n_sub_sup = 1
29
+ total = n_sub_inf*n_inf + n_sub_sup*n_sup
30
+ while total != n_patient:
31
+ n_sub_inf = n_sub_inf - 1
32
+ n_sub_sup = n_sub_sup + 1
33
+ total = n_sub_inf*n_inf + n_sub_sup*n_sup
34
+
35
+ n_p = np.hstack((np.tile(n_inf, (1, n_sub_inf))[
36
+ 0], np.tile(n_sup, (1, n_sub_sup))[0]))
37
+ else: # The number of patients in all batches will be the same
38
+ n_p = np.tile(n_sup, (1, n_batch))[0]
39
+
40
+ start = 0
41
+ for i in range(0, n_batch):
42
+ patients[i] = patient_vect[start:(start+n_p[i])].tolist()
43
+ start += n_p[i]
44
+
45
+ return patients
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import logging
5
+ import random
6
+ from json import load
7
+ from pathlib import Path
8
+ from typing import Dict, List, Union
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+ from ..utils.get_patient_id_from_scan_name import get_patient_id_from_scan_name
14
+ from ..utils.initialize_features_names import initialize_features_names
15
+
16
+
17
+ def create_radiomics_table(radiomics_files_paths: List, image_space: str, log_file: Union[str, Path]) -> Dict:
18
+ """
19
+ Creates a dictionary with a csv and other information
20
+
21
+ Args:
22
+ radiomics_files_paths(List): List of paths to the radiomics JSON files.
23
+ image_space(str): String of the image space that contains the extracted features
24
+ log_file(Union[str, Path]): Path to logging file.
25
+
26
+ Returns:
27
+ Dict: Dictionary containing the extracted radiomics and other info (patientID, feature names...)
28
+ """
29
+ if log_file:
30
+ # Setting up logging settings
31
+ logging.basicConfig(filename=log_file, level=logging.DEBUG)
32
+
33
+ # INITIALIZATIONS OF RADIOMICS STRUCTURES
34
+ n_files = len(radiomics_files_paths)
35
+ patientID = [0] * n_files
36
+ rad_structs = [0] * n_files
37
+ file_open = [False] * n_files
38
+
39
+ for f in range(n_files):
40
+ with open(radiomics_files_paths[f], "r") as fp:
41
+ radStruct = load(fp)
42
+ rad_structs[f] = radStruct
43
+ file_open[f] = True
44
+ patientID[f] = get_patient_id_from_scan_name(radiomics_files_paths[f].stem)
45
+
46
+ # INITIALIZE FEATURE NAMES
47
+ logging.info(f"\nnFiles: {n_files}")
48
+ non_text_cell = []
49
+ text_cell = []
50
+ while len(non_text_cell) == 0 and len(text_cell) == 0:
51
+ try:
52
+ rand_patient = np.floor(n_files * random.uniform(0, 1)).astype(int)
53
+ with open(radiomics_files_paths[rand_patient], "r") as fp:
54
+ radiomics_struct = load(fp)
55
+
56
+ # IMAGE SPACE STRUCTURE --> .morph, .locInt, ..., .texture
57
+ image_space_struct = radiomics_struct[image_space]
58
+ non_text_cell, text_cell = initialize_features_names(image_space_struct)
59
+ except:
60
+ pass
61
+
62
+ # CREATE TABLE DATA
63
+ features_name_dict = {}
64
+ str_table = ''
65
+ str_names = '||'
66
+ count_var = 0
67
+
68
+ # Non-texture features
69
+ for im_type in range(len(non_text_cell[0])):
70
+ for param in range(len(non_text_cell[2][im_type])):
71
+ for feat in range(len(non_text_cell[1][im_type])):
72
+ count_var = count_var + 1
73
+ feature_name = 'radVar' + str(count_var)
74
+ features_name_dict.update({feature_name: [0] * n_files})
75
+ real_name_feature = non_text_cell[0][im_type] + '__' + \
76
+ non_text_cell[1][im_type][feat] + '__' + \
77
+ non_text_cell[2][im_type][param]
78
+ str_table = str_table + feature_name + ','
79
+ str_names = str_names + feature_name + ':' + real_name_feature + '||'
80
+
81
+ for f in range(n_files):
82
+ if file_open[f]:
83
+ try:
84
+ val = rad_structs[f][image_space][
85
+ non_text_cell[0][im_type]][
86
+ non_text_cell[2][im_type][param]][
87
+ non_text_cell[1][im_type][feat]]
88
+ except:
89
+ val = np.NaN
90
+ if type(val) in [str, list]:
91
+ val = np.NaN
92
+ else:
93
+ val = np.NaN
94
+ features_name_dict[feature_name][f] = val
95
+
96
+ # Texture features
97
+ for im_type in range(len(text_cell[0])):
98
+ for param in range(len(text_cell[2][im_type])):
99
+ for feat in range(len(text_cell[1][im_type])):
100
+ count_var = count_var + 1
101
+ feature_name = 'radVar' + str(count_var)
102
+ features_name_dict.update({feature_name: [0] * n_files})
103
+ real_name_feature = text_cell[0][im_type] + '__' + \
104
+ text_cell[1][im_type][feat] + '__' + \
105
+ text_cell[2][im_type][param]
106
+ str_table = str_table + feature_name + ','
107
+ str_names = str_names + feature_name + ':' + real_name_feature + '||'
108
+ for f in range(n_files):
109
+ if file_open[f]:
110
+ try:
111
+ val = rad_structs[f][image_space]['texture'][
112
+ text_cell[0][im_type]][
113
+ text_cell[2][im_type][param]][
114
+ text_cell[1][im_type][feat]]
115
+ except:
116
+ val = np.NaN
117
+ if type(val) in [str, list]:
118
+ val = np.NaN
119
+ else:
120
+ val = np.NaN
121
+ features_name_dict[feature_name][f] = val
122
+
123
+ radiomics_table_dict = {
124
+ 'Table': pd.DataFrame(features_name_dict, index=patientID),
125
+ 'Properties': {'UserData': str_names,
126
+ 'RowNames': patientID,
127
+ 'DimensionNames': ['PatientID', 'Variables'],
128
+ 'VariableNames': [key for key in features_name_dict.keys()]
129
+ }}
130
+
131
+ return radiomics_table_dict
@@ -0,0 +1,42 @@
1
+ import os.path
2
+ from isort import file
3
+ import pandas as pd
4
+
5
+ def export_table(file_name: file,
6
+ data: object):
7
+ """Export table
8
+
9
+ Args:
10
+ file_name (file): name of the file
11
+ data (object): the data
12
+
13
+ Returns:
14
+ None
15
+ """
16
+
17
+ if not isinstance(data, (pd.DataFrame, pd.Series)):
18
+ raise TypeError(f"The exported data should be a pandas DataFrame or Series. Found: {type(data)}")
19
+
20
+ # Find the extension
21
+ ext = os.path.splitext(file_name)[1]
22
+
23
+ # Set an index switch based on type of input
24
+ if isinstance(data, pd.DataFrame):
25
+ write_index = False
26
+ else:
27
+ write_index = True
28
+
29
+ if ext == ".csv":
30
+ data.to_csv(path_or_buf=file_name, sep=";", index=write_index)
31
+ elif ext in [".xls", ".xlsx"]:
32
+ data.to_excel(excel_writer=file_name, index=write_index)
33
+ elif ext in [".tex"]:
34
+ with open(file=file_name, mode="w") as f:
35
+ data.to_latex(buf=f, index=write_index)
36
+ elif ext in [".html"]:
37
+ with open(file=file_name, mode="w") as f:
38
+ data.to_html(buf=f, index=write_index)
39
+ elif ext in [".json"]:
40
+ data.to_json(path_or_buf=file_name)
41
+ else:
42
+ raise ValueError(f"File extension not supported for export of table data. Recognised extensions are: \".csv\", \".xls\", \".xlsx\", \".tex\", \".html\" and \".json\". Found: {ext}")
@@ -0,0 +1,16 @@
1
+ from inspect import stack, getmodule
2
+ from typing import List
3
+
4
+ def get_process_names() -> List:
5
+ """Get process names
6
+
7
+ Returns:
8
+ List: process names
9
+ """
10
+ module_names = ["none"]
11
+ for stack_entry in stack():
12
+ current_module = getmodule(stack_entry[0])
13
+ if current_module is not None:
14
+ module_names += [current_module.__name__]
15
+
16
+ return module_names
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ from importlib.resources import path
5
+ from pathlib import Path
6
+ from typing import List, Union
7
+
8
+
9
+ def get_file_paths(path_to_parent_folder: Union[str, Path], wildcard: str=None) -> List[Path]:
10
+ """Finds all files in the given path that matches the pattern/wildcard.
11
+
12
+ Note:
13
+ The search is done recursively in all subdirectories.
14
+
15
+ Args:
16
+ path_to_parent_folder (Union[str, Path]): Full path to where the files are located.
17
+ wildcard (str, optional): String specifying which type of files
18
+ to locate in the parent folder.
19
+ - Ex : '*.dcm*', to look for dicom files.
20
+
21
+ Returns:
22
+ List: List of full paths to files with the specific wildcard located \
23
+ in the given path to parent folder.
24
+ """
25
+ if wildcard is None:
26
+ wildcard = '*'
27
+
28
+ # Getting the list of all files full path in file_paths
29
+ path_to_parent_folder = Path(path_to_parent_folder)
30
+ file_paths_list = list(path_to_parent_folder.rglob(wildcard))
31
+ # for the name only put file.name
32
+ file_paths = [file for file in file_paths_list if file.is_file()]
33
+
34
+ return file_paths
@@ -0,0 +1,21 @@
1
+ from typing import List
2
+
3
+ import numpy as np
4
+
5
+
6
+ def get_full_rad_names(str_user_data: str, rad_var_ids: List):
7
+ """
8
+ Returns the full real names of the radiomics variables (sequential names are not very informative)
9
+ Args:
10
+ str_user_data: string containing the full rad names
11
+ rad_var_ids: can get it by doing table.column.values
12
+
13
+ Returns:
14
+ List: List of full radiomic names.
15
+ """
16
+ full_rad_names = np.array([])
17
+ for rad_var in rad_var_ids:
18
+ ind_var = int(rad_var[6:])
19
+ full_rad_names = np.append(full_rad_names, str_user_data.split('||')[ind_var].split(':')[1])
20
+
21
+ return full_rad_names
@@ -0,0 +1,16 @@
1
+ import pandas as pd
2
+
3
+
4
+ def get_institutions_from_ids(patient_ids):
5
+ """
6
+ Extracts the institution strings from a cell of patient IDs.
7
+
8
+ Args:
9
+ patient_ids (Any): Patient ID (string, list of strings or pandas Series). Ex: 'Cervix-CEM-010'.
10
+
11
+ Returns:
12
+ str: Categorical vector, specifying the institution of each patient_id entry in "patient_ids". Ex: 'CEM'.
13
+ """
14
+ if isinstance(patient_ids, list):
15
+ patient_ids = pd.Series(patient_ids)
16
+ return patient_ids.str.rsplit('-', expand=True)[1]
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+
5
+ def get_patient_id_from_scan_name(rad_name: str) -> str:
6
+ """
7
+ Finds the patient id from the given string
8
+
9
+ Args:
10
+ rad_name(str): Name of a scan or a radiomics structure
11
+
12
+ Returns:
13
+ str: patient id
14
+
15
+ Example:
16
+ >>> get_patient_id_from_scan_name('STS-McGill-001__T1(tumourAndEdema).MRscan')
17
+ STS-McGill-001
18
+ """
19
+ ind_double_under = rad_name.find('__')
20
+ patientID = rad_name[:ind_double_under]
21
+
22
+ return patientID
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+
5
+ from typing import List
6
+
7
+ import numpy as np
8
+
9
+
10
+ def get_patient_names(roi_names: np.ndarray) -> List[str]:
11
+ """Generates all file names for scans using CSV data.
12
+
13
+ Args:
14
+ roi_names (ndarray): Array with CSV data organized as follows
15
+ [[patient_id], [imaging_scan_name], [imagning_modality]]
16
+
17
+ Returns:
18
+ list[str]: List of scans files name.
19
+ """
20
+ n_names = np.size(roi_names[0])
21
+ patient_names = [0] * n_names
22
+ for n in range(0, n_names):
23
+ patient_names[n] = roi_names[0][n]+'__'+roi_names[1][n] + \
24
+ '.'+roi_names[2][n]+'.npy'
25
+
26
+ return patient_names
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+
5
+ from typing import Dict
6
+ import numpy as np
7
+
8
+
9
+ def get_radiomic_names(roi_names: np.array,
10
+ roi_type: str) -> Dict:
11
+ """Generates radiomics names using ``roi_names`` and ``roi_types``.
12
+
13
+ Args:
14
+ roi_names (np.array): array of the ROI names.
15
+ roi_type(str): string of the ROI.
16
+
17
+ Returns:
18
+ dict: dict with the radiomic names
19
+ """
20
+
21
+ n_names = np.size(roi_names)[0]
22
+ radiomic_names = [0] * n_names
23
+ for n in range(0, n_names):
24
+ radiomic_names[n] = roi_names[n, 0]+'__'+roi_names[n, 1] + \
25
+ '('+roi_type+').'+roi_names[n, 2]+'.npy'
26
+
27
+ return radiomic_names
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+
5
+ def get_scan_name_from_rad_name(rad_name: str) -> str:
6
+ """Finds the imaging scan name from thr radiomics structure name
7
+
8
+ Args:
9
+ rad_name (str): radiomics structure name.
10
+
11
+ Returns:
12
+ str: String of the imaging scan name
13
+
14
+ Example:
15
+ >>> get_scan_name_from_rad_name('STS-McGill-001__T1(tumourAndEdema).MRscan')
16
+ 'T1'
17
+ """
18
+ ind_double_under = rad_name.find('__')
19
+ ind_open_par = rad_name.find('(')
20
+ scan_name = rad_name[ind_double_under + 2:ind_open_par]
21
+
22
+ return scan_name
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+
5
+ from pathlib import Path
6
+ from typing import Dict, Union
7
+ import SimpleITK as sitk
8
+ import numpy as np
9
+
10
+
11
+ def image_reader_SITK(path: Path,
12
+ option: str=None) -> Union[Dict, None]:
13
+ """Return the image in a numpy array or a dictionary with the header of the image.
14
+
15
+ Args:
16
+ path (path): path of the file
17
+ option (str): name of the option, either 'image' or 'header'
18
+
19
+ Returns:
20
+ Union[Dict, None]: dictionary with the header of the image
21
+ """
22
+ if option is None or option == 'image':
23
+ # return the image in a numpy array
24
+ return np.transpose(sitk.GetArrayFromImage(sitk.ReadImage(path)))
25
+ elif option == 'header':
26
+ # Return a dictionary with the header of the image.
27
+ reader = sitk.ImageFileReader()
28
+ reader.SetFileName(path)
29
+ # reader.LoadPrivateTagsOn()
30
+ reader.ReadImageInformation()
31
+ dic_im_header = {}
32
+ for key in reader.GetMetaDataKeys():
33
+ dic_im_header.update({key: reader.GetMetaData(key)})
34
+ return dic_im_header
35
+ else:
36
+ print("Argument option should be the string 'image' or 'header'")
37
+ return None
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+
5
+ class image_volume_obj:
6
+ """Used to organize Imaging data and their corresponding imref3d object.
7
+
8
+ Args:
9
+ data (ndarray, optional): 3D array of imaging data.
10
+ spatialRef (imref3d, optional): The corresponding imref3d object
11
+ (same functionality of MATLAB imref3d class).
12
+
13
+ Attributes:
14
+ data (ndarray): 3D array of imaging data.
15
+ spatialRef (imref3d): The corresponding imref3d object
16
+ (same functionality of MATLAB imref3d class).
17
+
18
+ """
19
+
20
+ def __init__(self, data=None, spatial_ref=None) -> None:
21
+ self.data = data
22
+ self.spatialRef = spatial_ref