mediml 0.9.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- MEDiml/MEDscan.py +1696 -0
- MEDiml/__init__.py +21 -0
- MEDiml/biomarkers/BatchExtractor.py +806 -0
- MEDiml/biomarkers/BatchExtractorTexturalFilters.py +840 -0
- MEDiml/biomarkers/__init__.py +16 -0
- MEDiml/biomarkers/diagnostics.py +125 -0
- MEDiml/biomarkers/get_oriented_bound_box.py +158 -0
- MEDiml/biomarkers/glcm.py +1602 -0
- MEDiml/biomarkers/gldzm.py +523 -0
- MEDiml/biomarkers/glrlm.py +1315 -0
- MEDiml/biomarkers/glszm.py +555 -0
- MEDiml/biomarkers/int_vol_hist.py +527 -0
- MEDiml/biomarkers/intensity_histogram.py +615 -0
- MEDiml/biomarkers/local_intensity.py +89 -0
- MEDiml/biomarkers/morph.py +1756 -0
- MEDiml/biomarkers/ngldm.py +780 -0
- MEDiml/biomarkers/ngtdm.py +414 -0
- MEDiml/biomarkers/stats.py +373 -0
- MEDiml/biomarkers/utils.py +389 -0
- MEDiml/filters/TexturalFilter.py +299 -0
- MEDiml/filters/__init__.py +9 -0
- MEDiml/filters/apply_filter.py +134 -0
- MEDiml/filters/gabor.py +215 -0
- MEDiml/filters/laws.py +283 -0
- MEDiml/filters/log.py +147 -0
- MEDiml/filters/mean.py +121 -0
- MEDiml/filters/textural_filters_kernels.py +1738 -0
- MEDiml/filters/utils.py +107 -0
- MEDiml/filters/wavelet.py +237 -0
- MEDiml/learning/DataCleaner.py +198 -0
- MEDiml/learning/DesignExperiment.py +480 -0
- MEDiml/learning/FSR.py +667 -0
- MEDiml/learning/Normalization.py +112 -0
- MEDiml/learning/RadiomicsLearner.py +714 -0
- MEDiml/learning/Results.py +2237 -0
- MEDiml/learning/Stats.py +694 -0
- MEDiml/learning/__init__.py +10 -0
- MEDiml/learning/cleaning_utils.py +107 -0
- MEDiml/learning/ml_utils.py +1015 -0
- MEDiml/processing/__init__.py +6 -0
- MEDiml/processing/compute_suv_map.py +121 -0
- MEDiml/processing/discretisation.py +149 -0
- MEDiml/processing/interpolation.py +275 -0
- MEDiml/processing/resegmentation.py +66 -0
- MEDiml/processing/segmentation.py +912 -0
- MEDiml/utils/__init__.py +25 -0
- MEDiml/utils/batch_patients.py +45 -0
- MEDiml/utils/create_radiomics_table.py +131 -0
- MEDiml/utils/data_frame_export.py +42 -0
- MEDiml/utils/find_process_names.py +16 -0
- MEDiml/utils/get_file_paths.py +34 -0
- MEDiml/utils/get_full_rad_names.py +21 -0
- MEDiml/utils/get_institutions_from_ids.py +16 -0
- MEDiml/utils/get_patient_id_from_scan_name.py +22 -0
- MEDiml/utils/get_patient_names.py +26 -0
- MEDiml/utils/get_radiomic_names.py +27 -0
- MEDiml/utils/get_scan_name_from_rad_name.py +22 -0
- MEDiml/utils/image_reader_SITK.py +37 -0
- MEDiml/utils/image_volume_obj.py +22 -0
- MEDiml/utils/imref.py +340 -0
- MEDiml/utils/initialize_features_names.py +62 -0
- MEDiml/utils/inpolygon.py +159 -0
- MEDiml/utils/interp3.py +43 -0
- MEDiml/utils/json_utils.py +78 -0
- MEDiml/utils/mode.py +31 -0
- MEDiml/utils/parse_contour_string.py +58 -0
- MEDiml/utils/save_MEDscan.py +30 -0
- MEDiml/utils/strfind.py +32 -0
- MEDiml/utils/textureTools.py +188 -0
- MEDiml/utils/texture_features_names.py +115 -0
- MEDiml/utils/write_radiomics_csv.py +47 -0
- MEDiml/wrangling/DataManager.py +1724 -0
- MEDiml/wrangling/ProcessDICOM.py +512 -0
- MEDiml/wrangling/__init__.py +3 -0
- mediml-0.9.9.dist-info/LICENSE.md +674 -0
- mediml-0.9.9.dist-info/METADATA +232 -0
- mediml-0.9.9.dist-info/RECORD +78 -0
- mediml-0.9.9.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def check_min_n_per_cat(
|
|
9
|
+
variable_table: pd.DataFrame,
|
|
10
|
+
var_names: List[str],
|
|
11
|
+
min_n_per_cat: float,
|
|
12
|
+
type: str) -> pd.DataFrame:
|
|
13
|
+
"""
|
|
14
|
+
This Function is different from matlab, it takes the whole variable_table
|
|
15
|
+
and the name of the var_of_type to fit the way pandas works
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
variable_table (pd.DataFrame): Table of variables.
|
|
19
|
+
var_names (list): List of variable names.
|
|
20
|
+
min_n_per_cat (float): Minimum number of observations per category.
|
|
21
|
+
type (str): Type of variable.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
pd.DataFrame: Table of variables with categories under ``min_n_per_cat``.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
for name in var_names:
|
|
28
|
+
table = variable_table[var_names]
|
|
29
|
+
cats = pd.Categorical(table[name]).categories
|
|
30
|
+
for cat in cats:
|
|
31
|
+
flag_cat = (table == cat)
|
|
32
|
+
if sum(flag_cat[name]) < min_n_per_cat:
|
|
33
|
+
if type == 'hcategorical':
|
|
34
|
+
table.mask(flag_cat, np.nan)
|
|
35
|
+
if type == 'icategorical':
|
|
36
|
+
table.mask(flag_cat, '')
|
|
37
|
+
variable_table[var_names] = table
|
|
38
|
+
|
|
39
|
+
return variable_table
|
|
40
|
+
|
|
41
|
+
def check_max_percent_cat(variable_table, var_names, max_percent_cat) -> pd.Series:
|
|
42
|
+
"""
|
|
43
|
+
This Function is different from matlab, it takes the whole variable_table
|
|
44
|
+
and the name of the var_of_type to fit the way pandas works
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
variable_table (pd.DataFrame): Table of variables.
|
|
48
|
+
var_names (list): List of variable names.
|
|
49
|
+
max_percent_cat (float): Maximum number of observations per category.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
pd.DataFrame: Table of variables with categories over ``max_percent_cat``.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
n_observation = variable_table.shape[0]
|
|
56
|
+
flag_var_out = pd.Series(np.zeros(var_names.size, dtype=bool))
|
|
57
|
+
n = 0
|
|
58
|
+
for name in var_names:
|
|
59
|
+
cats = pd.Categorical(variable_table[name]).categories
|
|
60
|
+
for cat in cats:
|
|
61
|
+
if (variable_table[name] == cat).sum()/n_observation > max_percent_cat:
|
|
62
|
+
flag_var_out[n] = True
|
|
63
|
+
break
|
|
64
|
+
n += 1
|
|
65
|
+
return flag_var_out
|
|
66
|
+
|
|
67
|
+
def one_hot_encode_table(variable_table: pd.DataFrame) -> pd.DataFrame:
|
|
68
|
+
"""
|
|
69
|
+
Converts a table of categorical variables into a table of one-hot encoded variables.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
variable_table (pd.DataFrame): Table of variables.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
variable_table (pd.DataFrame): Table of variables with one-hot encoded variables.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
#INITIALIZATION
|
|
79
|
+
var_icat = variable_table.Properties['userData']['variables']['icategorical']
|
|
80
|
+
n_var_icat = var_icat.size
|
|
81
|
+
if n_var_icat == 0:
|
|
82
|
+
return variable_table
|
|
83
|
+
|
|
84
|
+
# ONE-HOT ENCODING
|
|
85
|
+
for var_name in var_icat:
|
|
86
|
+
categories = variable_table[var_name].unique()
|
|
87
|
+
categories = np.asarray(list(filter(lambda v: v == v, categories))) # get rid of nan
|
|
88
|
+
categories.sort()
|
|
89
|
+
n_categories = categories.size
|
|
90
|
+
name_encoded = []
|
|
91
|
+
position_to_add = variable_table.columns.get_loc(var_name)+1
|
|
92
|
+
if n_categories == 2:
|
|
93
|
+
n_categories = 1
|
|
94
|
+
for c in range(n_categories):
|
|
95
|
+
cat = categories[c]
|
|
96
|
+
new_name = f"{var_name}__{cat}"
|
|
97
|
+
data_to_add = (variable_table[var_name] == cat).astype(int)
|
|
98
|
+
variable_table.insert(loc=position_to_add, column=new_name, value=data_to_add)
|
|
99
|
+
name_encoded.append(new_name)
|
|
100
|
+
variable_table.Properties['userData']['variables']["one_hot"] = dict()
|
|
101
|
+
variable_table.Properties['userData']['variables']["one_hot"][var_name] = name_encoded
|
|
102
|
+
variable_table = variable_table.drop(var_name, axis=1)
|
|
103
|
+
|
|
104
|
+
# UPDATING THE VARIABLE TYPES
|
|
105
|
+
variable_table.Properties['userData']['variables']["icategorical"] = np.array([])
|
|
106
|
+
variable_table.Properties['userData']['variables']["hcategorical"] = np.append([], name_encoded)
|
|
107
|
+
return variable_table
|