PyPI - mediml - Versions diffs - 0.9.9__py3-none-any.whl - Mend

mediml 0.9.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

MEDiml/MEDscan.py +1696 -0
MEDiml/__init__.py +21 -0
MEDiml/biomarkers/BatchExtractor.py +806 -0
MEDiml/biomarkers/BatchExtractorTexturalFilters.py +840 -0
MEDiml/biomarkers/__init__.py +16 -0
MEDiml/biomarkers/diagnostics.py +125 -0
MEDiml/biomarkers/get_oriented_bound_box.py +158 -0
MEDiml/biomarkers/glcm.py +1602 -0
MEDiml/biomarkers/gldzm.py +523 -0
MEDiml/biomarkers/glrlm.py +1315 -0
MEDiml/biomarkers/glszm.py +555 -0
MEDiml/biomarkers/int_vol_hist.py +527 -0
MEDiml/biomarkers/intensity_histogram.py +615 -0
MEDiml/biomarkers/local_intensity.py +89 -0
MEDiml/biomarkers/morph.py +1756 -0
MEDiml/biomarkers/ngldm.py +780 -0
MEDiml/biomarkers/ngtdm.py +414 -0
MEDiml/biomarkers/stats.py +373 -0
MEDiml/biomarkers/utils.py +389 -0
MEDiml/filters/TexturalFilter.py +299 -0
MEDiml/filters/__init__.py +9 -0
MEDiml/filters/apply_filter.py +134 -0
MEDiml/filters/gabor.py +215 -0
MEDiml/filters/laws.py +283 -0
MEDiml/filters/log.py +147 -0
MEDiml/filters/mean.py +121 -0
MEDiml/filters/textural_filters_kernels.py +1738 -0
MEDiml/filters/utils.py +107 -0
MEDiml/filters/wavelet.py +237 -0
MEDiml/learning/DataCleaner.py +198 -0
MEDiml/learning/DesignExperiment.py +480 -0
MEDiml/learning/FSR.py +667 -0
MEDiml/learning/Normalization.py +112 -0
MEDiml/learning/RadiomicsLearner.py +714 -0
MEDiml/learning/Results.py +2237 -0
MEDiml/learning/Stats.py +694 -0
MEDiml/learning/__init__.py +10 -0
MEDiml/learning/cleaning_utils.py +107 -0
MEDiml/learning/ml_utils.py +1015 -0
MEDiml/processing/__init__.py +6 -0
MEDiml/processing/compute_suv_map.py +121 -0
MEDiml/processing/discretisation.py +149 -0
MEDiml/processing/interpolation.py +275 -0
MEDiml/processing/resegmentation.py +66 -0
MEDiml/processing/segmentation.py +912 -0
MEDiml/utils/__init__.py +25 -0
MEDiml/utils/batch_patients.py +45 -0
MEDiml/utils/create_radiomics_table.py +131 -0
MEDiml/utils/data_frame_export.py +42 -0
MEDiml/utils/find_process_names.py +16 -0
MEDiml/utils/get_file_paths.py +34 -0
MEDiml/utils/get_full_rad_names.py +21 -0
MEDiml/utils/get_institutions_from_ids.py +16 -0
MEDiml/utils/get_patient_id_from_scan_name.py +22 -0
MEDiml/utils/get_patient_names.py +26 -0
MEDiml/utils/get_radiomic_names.py +27 -0
MEDiml/utils/get_scan_name_from_rad_name.py +22 -0
MEDiml/utils/image_reader_SITK.py +37 -0
MEDiml/utils/image_volume_obj.py +22 -0
MEDiml/utils/imref.py +340 -0
MEDiml/utils/initialize_features_names.py +62 -0
MEDiml/utils/inpolygon.py +159 -0
MEDiml/utils/interp3.py +43 -0
MEDiml/utils/json_utils.py +78 -0
MEDiml/utils/mode.py +31 -0
MEDiml/utils/parse_contour_string.py +58 -0
MEDiml/utils/save_MEDscan.py +30 -0
MEDiml/utils/strfind.py +32 -0
MEDiml/utils/textureTools.py +188 -0
MEDiml/utils/texture_features_names.py +115 -0
MEDiml/utils/write_radiomics_csv.py +47 -0
MEDiml/wrangling/DataManager.py +1724 -0
MEDiml/wrangling/ProcessDICOM.py +512 -0
MEDiml/wrangling/__init__.py +3 -0
mediml-0.9.9.dist-info/LICENSE.md +674 -0
mediml-0.9.9.dist-info/METADATA +232 -0
mediml-0.9.9.dist-info/RECORD +78 -0
mediml-0.9.9.dist-info/WHEEL +4 -0

MEDiml/utils/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+from . import *
+from .batch_patients import *
+from .create_radiomics_table import *
+from .data_frame_export import *
+from .find_process_names import *
+from .get_file_paths import *
+from .get_full_rad_names import *
+from .get_institutions_from_ids import *
+from .get_patient_id_from_scan_name import *
+from .get_patient_names import *
+from .get_radiomic_names import *
+from .get_scan_name_from_rad_name import *
+from .image_reader_SITK import *
+from .image_volume_obj import *
+from .imref import *
+from .initialize_features_names import *
+from .inpolygon import *
+from .interp3 import *
+from .json_utils import *
+from .mode import *
+from .parse_contour_string import *
+from .save_MEDscan import *
+from .strfind import *
+from .textureTools import *
+from .write_radiomics_csv import *

MEDiml/utils/batch_patients.py ADDED Viewed

@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import numpy as np
+def batch_patients(n_patient: int,
+                   n_batch: int) -> np.ndarray:
+    """Replaces volume intensities outside the ROI with NaN.
+    Args:
+        n_patient (int): Number of patient.
+        n_batch (int): Number of batch, usually less or equal to the cores number on your machine.
+    Returns:
+        ndarray: List of indexes with size n_batch and max value n_patient.
+    """
+    # FIND THE NUMBER OF PATIENTS IN EACH BATCH
+    patients = [0] * n_batch  # np.zeros(n_batch, dtype=int)
+    patient_vect = np.random.permutation(n_patient)  # To randomize stuff a bit.
+    if n_batch:
+        n_p = n_patient / n_batch
+        n_sup = np.ceil(n_p).astype(int)
+        n_inf = np.floor(n_p).astype(int)
+        if n_sup != n_inf:
+            n_sub_inf = n_batch - 1
+            n_sub_sup = 1
+            total = n_sub_inf*n_inf + n_sub_sup*n_sup
+            while total != n_patient:
+                n_sub_inf = n_sub_inf - 1
+                n_sub_sup = n_sub_sup + 1
+                total = n_sub_inf*n_inf + n_sub_sup*n_sup
+            n_p = np.hstack((np.tile(n_inf, (1, n_sub_inf))[
+                           0], np.tile(n_sup, (1, n_sub_sup))[0]))
+        else:  # The number of patients in all batches will be the same
+            n_p = np.tile(n_sup, (1, n_batch))[0]
+        start = 0
+        for i in range(0, n_batch):
+            patients[i] = patient_vect[start:(start+n_p[i])].tolist()
+            start += n_p[i]
+    return patients

MEDiml/utils/create_radiomics_table.py ADDED Viewed

@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import logging
+import random
+from json import load
+from pathlib import Path
+from typing import Dict, List, Union
+import numpy as np
+import pandas as pd
+from ..utils.get_patient_id_from_scan_name import get_patient_id_from_scan_name
+from ..utils.initialize_features_names import initialize_features_names
+def create_radiomics_table(radiomics_files_paths: List, image_space: str, log_file: Union[str, Path]) -> Dict:
+    """
+    Creates a dictionary with a csv and other information
+    Args:
+        radiomics_files_paths(List): List of paths to the radiomics JSON files.
+        image_space(str): String of the image space that contains the extracted features
+        log_file(Union[str, Path]): Path to logging file.
+    Returns:
+        Dict: Dictionary containing the extracted radiomics and other info (patientID, feature names...)
+    """
+    if log_file:
+        # Setting up logging settings
+        logging.basicConfig(filename=log_file, level=logging.DEBUG)
+    # INITIALIZATIONS OF RADIOMICS STRUCTURES
+    n_files = len(radiomics_files_paths)
+    patientID = [0] * n_files
+    rad_structs = [0] * n_files
+    file_open = [False] * n_files
+    for f in range(n_files):
+        with open(radiomics_files_paths[f], "r") as fp:
+            radStruct = load(fp)
+        rad_structs[f] = radStruct
+        file_open[f] = True
+        patientID[f] = get_patient_id_from_scan_name(radiomics_files_paths[f].stem)
+    # INITIALIZE FEATURE NAMES
+    logging.info(f"\nnFiles: {n_files}")
+    non_text_cell = []
+    text_cell = []
+    while len(non_text_cell) == 0 and len(text_cell) == 0:
+        try:
+            rand_patient = np.floor(n_files * random.uniform(0, 1)).astype(int)
+            with open(radiomics_files_paths[rand_patient], "r") as fp:
+                radiomics_struct = load(fp)
+            # IMAGE SPACE STRUCTURE --> .morph, .locInt, ...,  .texture
+            image_space_struct = radiomics_struct[image_space]
+            non_text_cell, text_cell = initialize_features_names(image_space_struct)
+        except:
+            pass
+    # CREATE TABLE DATA
+    features_name_dict = {}
+    str_table = ''
+    str_names = '||'
+    count_var = 0
+    # Non-texture features
+    for im_type in range(len(non_text_cell[0])):
+        for param in range(len(non_text_cell[2][im_type])):
+            for feat in range(len(non_text_cell[1][im_type])):
+                count_var = count_var + 1
+                feature_name = 'radVar' + str(count_var)
+                features_name_dict.update({feature_name: [0] * n_files})
+                real_name_feature = non_text_cell[0][im_type] + '__' + \
+                    non_text_cell[1][im_type][feat] + '__' + \
+                    non_text_cell[2][im_type][param]
+                str_table = str_table + feature_name + ','
+                str_names = str_names + feature_name + ':' + real_name_feature + '||'
+                for f in range(n_files):
+                    if file_open[f]:
+                        try:
+                            val = rad_structs[f][image_space][
+                                non_text_cell[0][im_type]][
+                                non_text_cell[2][im_type][param]][
+                                non_text_cell[1][im_type][feat]]
+                        except:
+                            val = np.NaN
+                        if type(val) in [str, list]:
+                            val = np.NaN
+                    else:
+                        val = np.NaN
+                    features_name_dict[feature_name][f] = val
+    # Texture features
+    for im_type in range(len(text_cell[0])):
+        for param in range(len(text_cell[2][im_type])):
+            for feat in range(len(text_cell[1][im_type])):
+                count_var = count_var + 1
+                feature_name = 'radVar' + str(count_var)
+                features_name_dict.update({feature_name: [0] * n_files})
+                real_name_feature = text_cell[0][im_type] + '__' + \
+                    text_cell[1][im_type][feat] + '__' + \
+                    text_cell[2][im_type][param]
+                str_table = str_table + feature_name + ','
+                str_names = str_names + feature_name + ':' + real_name_feature + '||'
+                for f in range(n_files):
+                    if file_open[f]:
+                        try:
+                            val = rad_structs[f][image_space]['texture'][
+                                text_cell[0][im_type]][
+                                text_cell[2][im_type][param]][
+                                text_cell[1][im_type][feat]]
+                        except:
+                            val = np.NaN
+                        if type(val) in [str, list]:
+                            val = np.NaN
+                    else:
+                        val = np.NaN
+                    features_name_dict[feature_name][f] = val
+    radiomics_table_dict = {
+        'Table': pd.DataFrame(features_name_dict, index=patientID),
+        'Properties': {'UserData': str_names,
+                       'RowNames': patientID,
+                       'DimensionNames': ['PatientID', 'Variables'],
+                       'VariableNames': [key for key in features_name_dict.keys()]
+                       }}
+    return radiomics_table_dict

MEDiml/utils/data_frame_export.py ADDED Viewed

@@ -0,0 +1,42 @@
+import os.path
+from isort import file
+import pandas as pd
+def export_table(file_name: file,
+                 data: object):
+    """Export table
+    Args:
+        file_name (file): name of the file
+        data (object): the data
+    Returns:
+        None
+    """
+    if not isinstance(data, (pd.DataFrame, pd.Series)):
+        raise TypeError(f"The exported data should be a pandas DataFrame or Series. Found: {type(data)}")
+    # Find the extension
+    ext = os.path.splitext(file_name)[1]
+    # Set an index switch based on type of input
+    if isinstance(data, pd.DataFrame):
+        write_index = False
+    else:
+        write_index = True
+    if ext == ".csv":
+        data.to_csv(path_or_buf=file_name, sep=";", index=write_index)
+    elif ext in [".xls", ".xlsx"]:
+        data.to_excel(excel_writer=file_name, index=write_index)
+    elif ext in [".tex"]:
+        with open(file=file_name, mode="w") as f:
+            data.to_latex(buf=f, index=write_index)
+    elif ext in [".html"]:
+        with open(file=file_name, mode="w") as f:
+            data.to_html(buf=f, index=write_index)
+    elif ext in [".json"]:
+        data.to_json(path_or_buf=file_name)
+    else:
+        raise ValueError(f"File extension not supported for export of table data. Recognised extensions are: \".csv\", \".xls\", \".xlsx\", \".tex\", \".html\" and \".json\". Found: {ext}")

MEDiml/utils/find_process_names.py ADDED Viewed

@@ -0,0 +1,16 @@
+from inspect import stack, getmodule
+from typing import List
+def get_process_names() -> List:
+    """Get process names
+    Returns:
+        List: process names
+    """
+    module_names = ["none"]
+    for stack_entry in stack():
+        current_module = getmodule(stack_entry[0])
+        if current_module is not None:
+            module_names += [current_module.__name__]
+    return module_names

MEDiml/utils/get_file_paths.py ADDED Viewed

@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from importlib.resources import path
+from pathlib import Path
+from typing import List, Union
+def get_file_paths(path_to_parent_folder: Union[str, Path], wildcard: str=None) -> List[Path]:
+    """Finds all files in the given path that matches the pattern/wildcard.
+    Note:
+        The search is done recursively in all subdirectories.
+    Args:
+        path_to_parent_folder (Union[str, Path]): Full path to where the files are located.
+        wildcard (str, optional): String specifying which type of files
+        to locate in the parent folder.
+            - Ex : '*.dcm*', to look for dicom files.
+    Returns:
+        List: List of full paths to files with the specific wildcard located \
+            in the given path to parent folder.
+    """
+    if wildcard is None:
+        wildcard = '*'
+    # Getting the list of all files full path in file_paths
+    path_to_parent_folder = Path(path_to_parent_folder)
+    file_paths_list = list(path_to_parent_folder.rglob(wildcard))
+    # for the name only put file.name
+    file_paths = [file for file in file_paths_list if file.is_file()]
+    return file_paths

MEDiml/utils/get_full_rad_names.py ADDED Viewed

@@ -0,0 +1,21 @@
+from typing import List
+import numpy as np
+def get_full_rad_names(str_user_data: str, rad_var_ids: List):
+    """
+    Returns the full real names of the radiomics variables (sequential names are not very informative)
+    Args:
+        str_user_data: string containing the full rad names
+        rad_var_ids: can get it by doing table.column.values
+    Returns:
+        List: List of full radiomic names.
+    """
+    full_rad_names = np.array([])
+    for rad_var in rad_var_ids:
+        ind_var = int(rad_var[6:])
+        full_rad_names = np.append(full_rad_names, str_user_data.split('||')[ind_var].split(':')[1])
+    return full_rad_names

MEDiml/utils/get_institutions_from_ids.py ADDED Viewed

@@ -0,0 +1,16 @@
+import pandas as pd
+def get_institutions_from_ids(patient_ids):
+    """
+    Extracts the institution strings from a cell of patient IDs.
+    Args:
+        patient_ids (Any): Patient ID (string, list of strings or pandas Series). Ex: 'Cervix-CEM-010'.
+    Returns:
+        str: Categorical vector, specifying the institution of each patient_id entry in "patient_ids". Ex: 'CEM'.
+    """
+    if isinstance(patient_ids, list):
+        patient_ids = pd.Series(patient_ids)
+    return patient_ids.str.rsplit('-', expand=True)[1]

MEDiml/utils/get_patient_id_from_scan_name.py ADDED Viewed

@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+def get_patient_id_from_scan_name(rad_name: str) -> str:
+    """
+    Finds the patient id from the given string
+    Args:
+        rad_name(str): Name of a scan or a radiomics structure
+    Returns:
+        str: patient id
+    Example:
+        >>> get_patient_id_from_scan_name('STS-McGill-001__T1(tumourAndEdema).MRscan')
+        STS-McGill-001
+    """
+    ind_double_under = rad_name.find('__')
+    patientID = rad_name[:ind_double_under]
+    return patientID

MEDiml/utils/get_patient_names.py ADDED Viewed

@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import List
+import numpy as np
+def get_patient_names(roi_names: np.ndarray) -> List[str]:
+    """Generates all file names for scans using CSV data.
+    Args:
+        roi_names (ndarray): Array with CSV data organized as follows
+            [[patient_id], [imaging_scan_name], [imagning_modality]]
+    Returns:
+        list[str]: List of scans files name.
+    """
+    n_names = np.size(roi_names[0])
+    patient_names = [0] * n_names
+    for n in range(0, n_names):
+        patient_names[n] = roi_names[0][n]+'__'+roi_names[1][n] + \
+            '.'+roi_names[2][n]+'.npy'
+    return patient_names

MEDiml/utils/get_radiomic_names.py ADDED Viewed

@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from typing import Dict
+import numpy as np
+def get_radiomic_names(roi_names: np.array,
+                       roi_type: str) -> Dict:
+    """Generates radiomics names using ``roi_names`` and ``roi_types``.
+    Args:
+        roi_names (np.array): array of the ROI names.
+        roi_type(str): string of the ROI.
+    Returns:
+        dict: dict with the radiomic names
+    """
+    n_names = np.size(roi_names)[0]
+    radiomic_names = [0] * n_names
+    for n in range(0, n_names):
+        radiomic_names[n] = roi_names[n, 0]+'__'+roi_names[n, 1] + \
+            '('+roi_type+').'+roi_names[n, 2]+'.npy'
+    return radiomic_names

MEDiml/utils/get_scan_name_from_rad_name.py ADDED Viewed

@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+def get_scan_name_from_rad_name(rad_name: str) -> str:
+    """Finds the imaging scan name from thr radiomics structure name
+    Args:
+        rad_name (str): radiomics structure name.
+    Returns:
+        str: String of the imaging scan name
+    Example:
+        >>> get_scan_name_from_rad_name('STS-McGill-001__T1(tumourAndEdema).MRscan')
+        'T1'
+    """
+    ind_double_under = rad_name.find('__')
+    ind_open_par = rad_name.find('(')
+    scan_name = rad_name[ind_double_under + 2:ind_open_par]
+    return scan_name

MEDiml/utils/image_reader_SITK.py ADDED Viewed

@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from pathlib import Path
+from typing import Dict, Union
+import SimpleITK as sitk
+import numpy as np
+def image_reader_SITK(path: Path,
+                      option: str=None) -> Union[Dict, None]:
+    """Return the image in a numpy array or a dictionary with the header of the image.
+    Args:
+        path (path): path of the file
+        option (str): name of the option, either 'image' or 'header'
+    Returns:
+        Union[Dict, None]: dictionary with the header of the image
+    """
+    if option is None or option == 'image':
+        # return the image in a numpy array
+        return np.transpose(sitk.GetArrayFromImage(sitk.ReadImage(path)))
+    elif option == 'header':
+        # Return a dictionary with the header of the image.
+        reader = sitk.ImageFileReader()
+        reader.SetFileName(path)
+        # reader.LoadPrivateTagsOn()
+        reader.ReadImageInformation()
+        dic_im_header = {}
+        for key in reader.GetMetaDataKeys():
+            dic_im_header.update({key: reader.GetMetaData(key)})
+        return dic_im_header
+    else:
+        print("Argument option should be the string 'image' or 'header'")
+        return None

MEDiml/utils/image_volume_obj.py ADDED Viewed

@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+class image_volume_obj:
+    """Used to organize Imaging data and their corresponding imref3d object.
+    Args:
+        data (ndarray, optional): 3D array of imaging data.
+        spatialRef (imref3d, optional): The corresponding imref3d object
+            (same functionality of MATLAB imref3d class).
+    Attributes:
+        data (ndarray): 3D array of imaging data.
+        spatialRef (imref3d): The corresponding imref3d object
+            (same functionality of MATLAB imref3d class).
+    """
+    def __init__(self, data=None, spatial_ref=None) -> None:
+        self.data = data
+        self.spatialRef = spatial_ref