pico-ml 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pico/__init__.py +3 -0
- pico/__main__.py +3 -0
- pico/cli/__init__.py +2 -0
- pico/cli/main.py +117 -0
- pico/conf/SupportedCV.py +17 -0
- pico/conf/SupportedModels.py +73 -0
- pico/conf/algo_sklearn.json +51 -0
- pico/conf/parameters.py +14 -0
- pico/domain/ClassificationDesign.py +107 -0
- pico/domain/Controller.py +397 -0
- pico/domain/DataMatrix.py +147 -0
- pico/domain/ExperimentDTO.py +17 -0
- pico/domain/MetaData.py +229 -0
- pico/domain/MetaboExperiment.py +696 -0
- pico/domain/MetaboModel.py +53 -0
- pico/domain/ModelFactory.py +45 -0
- pico/domain/Results.py +602 -0
- pico/domain/SplitGroup.py +202 -0
- pico/domain/__init__.py +9 -0
- pico/domain/dumps/metadata/.gitkeep +0 -0
- pico/domain/dumps/splits/.gitkeep +0 -0
- pico/service/DataFormat.py +180 -0
- pico/service/ExperimentDesign.py +30 -0
- pico/service/LoggerConfig.py +150 -0
- pico/service/Plots.py +472 -0
- pico/service/RunMLalgo.py +93 -0
- pico/service/SamplesPairing.py +390 -0
- pico/service/Utils.py +497 -0
- pico/service/__init__.py +7 -0
- pico/ui/__init__.py +1 -0
- pico/ui/app.py +145 -0
- pico/ui/assets/000_Stylesheet.css +464 -0
- pico/ui/assets/DecisionTree.png +0 -0
- pico/ui/assets/Figure_home_wider.png +0 -0
- pico/ui/assets/favicon.ico +0 -0
- pico/ui/assets/help_icon.png +0 -0
- pico/ui/assets/help_icon.svg +15 -0
- pico/ui/assets/update_figure_steps_MeDIC_4.svg +1 -0
- pico/ui/tabs/AggregatedResultsTab.py +394 -0
- pico/ui/tabs/InfoTab.py +440 -0
- pico/ui/tabs/InterpretTab.py +21 -0
- pico/ui/tabs/MLTab.py +487 -0
- pico/ui/tabs/MetaTab.py +23 -0
- pico/ui/tabs/ResultsTab.py +1062 -0
- pico/ui/tabs/SplitsTab.py +1227 -0
- pico/ui/tabs/__init__.py +6 -0
- pico/ui/tabs/utils.py +101 -0
- pico_ml-2.0.0.dist-info/METADATA +86 -0
- pico_ml-2.0.0.dist-info/RECORD +52 -0
- pico_ml-2.0.0.dist-info/WHEEL +4 -0
- pico_ml-2.0.0.dist-info/entry_points.txt +2 -0
- pico_ml-2.0.0.dist-info/licenses/LICENSE +437 -0
pico/service/Utils.py
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import hashlib
|
|
3
|
+
import importlib
|
|
4
|
+
import os
|
|
5
|
+
import pickle
|
|
6
|
+
from typing import Union
|
|
7
|
+
import re
|
|
8
|
+
|
|
9
|
+
import pickle as pkl
|
|
10
|
+
from typing import List, Dict, Tuple
|
|
11
|
+
|
|
12
|
+
import numpy
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
import sklearn
|
|
16
|
+
|
|
17
|
+
PACKAGE_ROOT_PATH = os.sep.join(os.path.dirname(__file__).split(os.sep)[:-1])
|
|
18
|
+
DUMP_PATH = os.path.join(PACKAGE_ROOT_PATH, "domain", "dumps")
|
|
19
|
+
DUMP_EXPE_PATH = os.path.join(DUMP_PATH, "save.mtxp")
|
|
20
|
+
|
|
21
|
+
DEFAULT_IMPORTANCE_ATTRIBUTE = "feature_importances_"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def dump_metabo_expe(obj, expe_file_path: str=DUMP_EXPE_PATH):
|
|
25
|
+
with open(expe_file_path, "w+b") as expe_file:
|
|
26
|
+
pkl.dump(obj, expe_file)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_dumped_metabo_experiment_path() -> str:
|
|
30
|
+
return DUMP_EXPE_PATH
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_metabo_experiment_path(expe_filename: str="save", is_date: bool=True) -> str:
|
|
34
|
+
"""Get the metabo experiment path.
|
|
35
|
+
- optionaly add the date (if is_date is True).
|
|
36
|
+
- add the ".mtxp" extension if expe_filename don't have one.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
expe_filename (str, optional): filename to use as template. Defaults to "save".
|
|
40
|
+
is_date (bool, optional): True to add the date and time in the filename. Defaults to True.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
str: The full path to the filename
|
|
44
|
+
"""
|
|
45
|
+
_, name_ext = os.path.splitext(expe_filename)
|
|
46
|
+
if not name_ext:
|
|
47
|
+
expe_filename = expe_filename + ".mtxp"
|
|
48
|
+
|
|
49
|
+
saves_dir = get_pico_subdir("saves")
|
|
50
|
+
|
|
51
|
+
new_filename = insert_datetime(expe_filename) if is_date else expe_filename
|
|
52
|
+
return os.path.join(saves_dir, new_filename)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_pico_subdir(subdir_name: str) -> str:
|
|
56
|
+
"""Returns the path to provided subdirectory of '~/pico_files/' main pico directory.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
subdir_name (str): subdir to get (and create if it doesn't exist)
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
str: the subdir full path
|
|
63
|
+
"""
|
|
64
|
+
home_dir_path: str = os.path.expanduser("~")
|
|
65
|
+
subdir_path: str = os.path.join(home_dir_path, "pico_files", subdir_name)
|
|
66
|
+
|
|
67
|
+
if not os.path.exists(subdir_path):
|
|
68
|
+
os.makedirs(subdir_path)
|
|
69
|
+
|
|
70
|
+
if not os.path.exists(subdir_path):
|
|
71
|
+
subdir_path = os.getcwd()
|
|
72
|
+
|
|
73
|
+
return subdir_path
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
from datetime import datetime
|
|
77
|
+
|
|
78
|
+
def insert_datetime(expe_filename: str) -> str:
|
|
79
|
+
current_datetime = datetime.now().strftime("_%Y%m%d_%H%M%S")
|
|
80
|
+
name_root, name_ext = os.path.splitext(expe_filename)
|
|
81
|
+
return name_root + current_datetime + name_ext
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def load_metabo_expe(path):
|
|
85
|
+
if os.path.isfile(DUMP_EXPE_PATH):
|
|
86
|
+
with open(path, "rb") as expe_file:
|
|
87
|
+
return pkl.load(expe_file)
|
|
88
|
+
else:
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def retrieve_data_from_sample_name(names_list, dataframe):
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
:param names_list: list of samples name
|
|
96
|
+
:param dataframe: a dataframe with each sample as a line identified by the samples' name
|
|
97
|
+
:return: list of data
|
|
98
|
+
"""
|
|
99
|
+
print("retrieving data from name")
|
|
100
|
+
data_list = []
|
|
101
|
+
for n in names_list:
|
|
102
|
+
d = dataframe.loc[n, :]
|
|
103
|
+
data_list.append(d.tolist())
|
|
104
|
+
# print("data list : {}".format(data_list[0]))
|
|
105
|
+
print("data from name retrieved")
|
|
106
|
+
# print("data 2nd element : {}".format(data_list[1]))
|
|
107
|
+
return data_list
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def list_filler(liste):
|
|
111
|
+
"""
|
|
112
|
+
(NA or "")
|
|
113
|
+
Complete the NA values of a list with the last non NA value from left to right.
|
|
114
|
+
If the first value is NA, then leave it like this.
|
|
115
|
+
:param liste: list to fill
|
|
116
|
+
:return: new list filled
|
|
117
|
+
"""
|
|
118
|
+
l = []
|
|
119
|
+
current = ""
|
|
120
|
+
for idx, j in enumerate(liste):
|
|
121
|
+
if j != "":
|
|
122
|
+
current = j
|
|
123
|
+
l.append(current)
|
|
124
|
+
return l
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def read_Progenesis_compounds_table(fileName, with_raw=True):
|
|
128
|
+
datatable = pd.read_csv(fileName, header=2, index_col=0)
|
|
129
|
+
header = pd.read_csv(fileName, nrows=1, index_col=0)
|
|
130
|
+
start_normalized = header.columns.tolist().index("Normalised abundance")
|
|
131
|
+
|
|
132
|
+
labels_array = np.array(header.iloc[0].tolist())
|
|
133
|
+
possible_labels = labels_array[labels_array != "nan"]
|
|
134
|
+
|
|
135
|
+
if with_raw:
|
|
136
|
+
start_raw = header.columns.tolist().index("Raw abundance")
|
|
137
|
+
sample_names = datatable.iloc[:, start_normalized:start_raw].columns
|
|
138
|
+
possible_labels = possible_labels[0: int(len(possible_labels) / 2)]
|
|
139
|
+
else:
|
|
140
|
+
sample_names = datatable.iloc[:, start_normalized:].columns
|
|
141
|
+
|
|
142
|
+
labels = [""] * len(sample_names)
|
|
143
|
+
start_label = possible_labels[0]
|
|
144
|
+
labels_array = labels_array.tolist()
|
|
145
|
+
for next_labels in possible_labels[1:]:
|
|
146
|
+
index_s = labels_array.index(start_label) - start_normalized
|
|
147
|
+
index_e = labels_array.index(next_labels) - start_normalized
|
|
148
|
+
labels[index_s:index_e] = [start_label] * (index_e - index_s)
|
|
149
|
+
start_label = next_labels
|
|
150
|
+
labels[index_e:] = [start_label] * (len(labels) - index_e)
|
|
151
|
+
|
|
152
|
+
labels_dict = {sample_names[i]: j for i, j in enumerate(labels)}
|
|
153
|
+
|
|
154
|
+
if with_raw:
|
|
155
|
+
datatable_compoundsInfo = datatable.iloc[:, 0:start_normalized]
|
|
156
|
+
datatable_normalized = datatable.iloc[:, start_normalized:start_raw]
|
|
157
|
+
datatable_raw = datatable.iloc[:, start_raw:]
|
|
158
|
+
datatable_raw.columns = [
|
|
159
|
+
i.rstrip(".1") for i in datatable_raw.columns
|
|
160
|
+
] # Fix the columns names
|
|
161
|
+
|
|
162
|
+
datatable_normalized = datatable_normalized.T
|
|
163
|
+
datatable_raw = datatable_raw.T
|
|
164
|
+
datatable_compoundsInfo = datatable_compoundsInfo.T
|
|
165
|
+
datatable_normalized.rename(columns={"Compound": "Sample"})
|
|
166
|
+
datatable_raw.rename(columns={"Compound": "Sample"})
|
|
167
|
+
return (
|
|
168
|
+
datatable_compoundsInfo,
|
|
169
|
+
datatable_normalized,
|
|
170
|
+
datatable_raw,
|
|
171
|
+
labels,
|
|
172
|
+
sample_names,
|
|
173
|
+
)
|
|
174
|
+
else:
|
|
175
|
+
datatable_compoundsInfo = datatable.iloc[:, 0:start_normalized]
|
|
176
|
+
datatable_normalized = datatable.iloc[:, start_normalized:]
|
|
177
|
+
datatable_normalized = datatable_normalized.T
|
|
178
|
+
datatable_compoundsInfo = datatable_compoundsInfo.T
|
|
179
|
+
datatable_normalized.rename(columns={"Compound": "Sample"})
|
|
180
|
+
return datatable_compoundsInfo, datatable_normalized, labels, sample_names
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def filter_sample_based_on_labels(data, labels, labels_to_keep):
|
|
184
|
+
"""
|
|
185
|
+
function not used
|
|
186
|
+
"""
|
|
187
|
+
labels_filter = np.array([i in labels_to_keep for i in labels])
|
|
188
|
+
d = data.iloc[labels_filter]
|
|
189
|
+
l = np.array(labels)[labels_filter]
|
|
190
|
+
return d, l
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def get_group_to_class(classes):
|
|
194
|
+
"""
|
|
195
|
+
function not used
|
|
196
|
+
"""
|
|
197
|
+
group_to_class = {}
|
|
198
|
+
for class_name in classes:
|
|
199
|
+
for subgroup in classes[class_name]:
|
|
200
|
+
group_to_class[subgroup] = class_name
|
|
201
|
+
return group_to_class
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def reverse_dict(dictionnary: dict) -> dict:
|
|
205
|
+
"""
|
|
206
|
+
Create a reverse dict to easily retrieve the label associate to a target.
|
|
207
|
+
example
|
|
208
|
+
input dict is in shape {label1 : [target1, target2], label2 : [target3, target4]}
|
|
209
|
+
output dict would be {target1 : label1, target2 : label1, target3 : label2, target4 : label2}
|
|
210
|
+
"""
|
|
211
|
+
reversed_dict = {}
|
|
212
|
+
for key, value in dictionnary.items():
|
|
213
|
+
if type(value) is list:
|
|
214
|
+
for val in value:
|
|
215
|
+
reversed_dict[val] = key
|
|
216
|
+
else:
|
|
217
|
+
reversed_dict[value] = key
|
|
218
|
+
return reversed_dict
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def load_classes_from_targets(classes_design: dict, targets: Tuple[str]) -> List[str]:
|
|
222
|
+
"""
|
|
223
|
+
Create a list of targets to be predicted according to the class design, kind of a "traduction" of labels.
|
|
224
|
+
!!! There is some confusion between class/label/target, it results from their use as synonyms in general in
|
|
225
|
+
the litterature. We tried to established a distinction because we need 3 different terms to name 3 slightly different
|
|
226
|
+
things. However, even for us it got mixed up during the development. Hence the sometime confusing naming of variables.
|
|
227
|
+
Always refer to the documentation for the proper meaning. !!!
|
|
228
|
+
example
|
|
229
|
+
Argument 'targets' is (class3, class1, class4, class4, class2, class1)
|
|
230
|
+
classes_design would be something like {label1 : [class1, class2], label2 : [class3, class4]}
|
|
231
|
+
reverse_classes_design would be {class1 : label1, class2 : label1, class3 : label2, class4 : label2}
|
|
232
|
+
'classes' that is returned would be [label2, label1, label2, label2, label1, label1]
|
|
233
|
+
"""
|
|
234
|
+
reverse_classes_design = reverse_dict(classes_design)
|
|
235
|
+
classes = []
|
|
236
|
+
for target in targets:
|
|
237
|
+
if target not in reverse_classes_design:
|
|
238
|
+
raise ValueError("Target {} not found in classes_design".format(target))
|
|
239
|
+
classes.append(reverse_classes_design[target])
|
|
240
|
+
if len(classes) != len(targets):
|
|
241
|
+
raise ValueError("Some targets were not found in classes_design")
|
|
242
|
+
return classes
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
# TODO: need to support multi-classification
|
|
246
|
+
def get_binary(list_to_convert: List[str], classes: List[str]) -> List[int]:
|
|
247
|
+
return [classes.index(value) for value in list_to_convert]
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def compute_hash(data: str) -> str:
|
|
251
|
+
"""
|
|
252
|
+
Compute a hash for a data string
|
|
253
|
+
"""
|
|
254
|
+
return hashlib.sha256(data.encode("utf-8")).hexdigest()
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def is_save_safe(saved_metabo_experiment_dto) -> bool:
|
|
258
|
+
return (
|
|
259
|
+
saved_metabo_experiment_dto.metadata.is_data_the_same()
|
|
260
|
+
and saved_metabo_experiment_dto.data_matrix.is_data_the_same()
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def format_list_for_checklist(list_to_format: List[str]) -> List[Dict[str, str]]:
|
|
265
|
+
return [{"label": value, "value": value} for value in list_to_format]
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def check_if_column_exist(datatable: pd.DataFrame, column_name: str) -> bool:
|
|
269
|
+
return column_name in datatable.columns
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def decode_pickle_from_base64(encoded_object: str):
|
|
273
|
+
return pickle.loads(base64.b64decode(encoded_object.split(",")[1]))
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def are_files_corresponding_to_dto(
|
|
277
|
+
data: str, metadata: str, metabo_experiment_dto
|
|
278
|
+
) -> bool:
|
|
279
|
+
return is_data_the_same(data, metabo_experiment_dto) and is_metadata_the_same(
|
|
280
|
+
metadata, metabo_experiment_dto
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def reset_file(file_path: str):
|
|
285
|
+
"""
|
|
286
|
+
Reset the content of a file ?
|
|
287
|
+
"""
|
|
288
|
+
open(file_path, "w+b").close()
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
# TODO : function to probably delete
|
|
292
|
+
def restore_ids_and_targets_from_pairing_groups(filtered_samples: List[str], dataframe: pd.DataFrame, id_column: str,
|
|
293
|
+
paired_column: str, target_column: str, classes_design: dict,) -> Tuple[List[str], List[str]]:
|
|
294
|
+
|
|
295
|
+
pairing_values = dataframe.loc[dataframe[id_column].isin(filtered_samples)][paired_column].tolist()
|
|
296
|
+
ids = dataframe[dataframe[paired_column].isin(pairing_values)][id_column].tolist()
|
|
297
|
+
targets = dataframe.loc[dataframe[id_column].isin(ids)][target_column].tolist()
|
|
298
|
+
duo = list(zip(ids, targets))
|
|
299
|
+
restored_ids = []
|
|
300
|
+
restored_targets = []
|
|
301
|
+
for d in duo:
|
|
302
|
+
if d[1] in np.concatenate(list(classes_design.values())):
|
|
303
|
+
restored_ids.append(d[0])
|
|
304
|
+
restored_targets.append(d[1])
|
|
305
|
+
return restored_ids, load_classes_from_targets(classes_design, restored_targets)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def convert_str_to_list_of_lists(str_to_convert: str) -> List[List[Union[str, float, int]]]:
|
|
309
|
+
first_level = []
|
|
310
|
+
for find in re.findall(r'\[((([\w\'".]+,? ?)+))\] ?,?', str_to_convert):
|
|
311
|
+
tmp = find[0].split(',')
|
|
312
|
+
second_level = []
|
|
313
|
+
for element in tmp:
|
|
314
|
+
element = element.strip()
|
|
315
|
+
try:
|
|
316
|
+
element = int(element)
|
|
317
|
+
except ValueError:
|
|
318
|
+
pass
|
|
319
|
+
try:
|
|
320
|
+
element = float(element)
|
|
321
|
+
except ValueError:
|
|
322
|
+
pass
|
|
323
|
+
second_level.append(element)
|
|
324
|
+
|
|
325
|
+
first_level.append(second_level)
|
|
326
|
+
return first_level
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def is_data_the_same(data: str, metabo_experiment_dto) -> bool:
|
|
330
|
+
return metabo_experiment_dto.data_matrix.get_hash() == compute_hash(data)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def is_metadata_the_same(metadata: str, metabo_experiment_dto) -> bool:
|
|
334
|
+
return metabo_experiment_dto.metadata.get_hash() == compute_hash(metadata)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def get_model_from_import(imports_list: list, model_name: str) -> sklearn:
|
|
338
|
+
"""
|
|
339
|
+
Import a "custom" model from sklearn
|
|
340
|
+
"""
|
|
341
|
+
last_import = importlib.import_module("." + imports_list[0], package="sklearn")
|
|
342
|
+
|
|
343
|
+
for next_import in imports_list[1:]:
|
|
344
|
+
last_import = getattr(last_import, next_import)
|
|
345
|
+
|
|
346
|
+
model = getattr(last_import, model_name)
|
|
347
|
+
return model
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def get_model_parameters(model) -> List[Tuple[str, str]]:
|
|
351
|
+
parameters = vars(model()).keys()
|
|
352
|
+
parameters = [(parameter, _get_type(parameter, model())) for parameter in parameters if parameter not in ["self", "random_state"]]
|
|
353
|
+
return parameters
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def _get_type(attribute: str, owner_instance) -> str:
|
|
357
|
+
return str(type(getattr(owner_instance, attribute)).__name__)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def _parameter_is_relevant(parameter: str, model: object) -> bool:
|
|
361
|
+
return parameter not in get_model_parameters(model) and parameter.endswith("_") and not parameter.startswith("_")
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def _parameter_is_collection(parameter: str, trained_model: sklearn) -> bool:
|
|
365
|
+
try:
|
|
366
|
+
attribute = getattr(trained_model, parameter)
|
|
367
|
+
except AttributeError:
|
|
368
|
+
print("Attribute {} not found in model".format(parameter))
|
|
369
|
+
return False
|
|
370
|
+
if type(attribute) in (list, tuple, set, dict, pd.DataFrame, pd.Series, numpy.ndarray):
|
|
371
|
+
if len(attribute) == 3:
|
|
372
|
+
return True
|
|
373
|
+
try:
|
|
374
|
+
# TODO: manual test on the web app
|
|
375
|
+
if len(attribute[0]) == 3 and len(attribute) == 1:
|
|
376
|
+
return True
|
|
377
|
+
except (KeyError, TypeError, IndexError):
|
|
378
|
+
pass
|
|
379
|
+
return False
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def get_model_parameters_after_training(model: sklearn) -> List[Tuple[str, str]]:
|
|
383
|
+
trained_model = model()
|
|
384
|
+
trained_model.fit([[1, 2, 3], [4, 5, 6]], [1, 2])
|
|
385
|
+
|
|
386
|
+
attributes = dir(trained_model)
|
|
387
|
+
parameters = []
|
|
388
|
+
for attribute in attributes:
|
|
389
|
+
if _parameter_is_collection(attribute, trained_model) and \
|
|
390
|
+
_parameter_is_relevant(attribute, model):
|
|
391
|
+
parameters.append((attribute, _get_type(attribute, trained_model)))
|
|
392
|
+
if DEFAULT_IMPORTANCE_ATTRIBUTE in attributes:
|
|
393
|
+
default_tuple = (DEFAULT_IMPORTANCE_ATTRIBUTE, _get_type(DEFAULT_IMPORTANCE_ATTRIBUTE, trained_model))
|
|
394
|
+
parameters.remove(default_tuple)
|
|
395
|
+
parameters.insert(0, default_tuple)
|
|
396
|
+
return parameters
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def transform_params_to_cross_validation_dict(params: List[Tuple[str, str]], param_types: dict) -> dict:
|
|
400
|
+
cross_validation_params = {}
|
|
401
|
+
error = []
|
|
402
|
+
for param, value in params:
|
|
403
|
+
values = re.split(r" *, *", value)
|
|
404
|
+
param_type = param_types[param]
|
|
405
|
+
if param_type == "float":
|
|
406
|
+
try:
|
|
407
|
+
cross_validation_params[param] = [float(val) for val in values]
|
|
408
|
+
except ValueError:
|
|
409
|
+
error.append(f"{param} must be decimal numbers (with '.')")
|
|
410
|
+
elif param_type == "int":
|
|
411
|
+
try:
|
|
412
|
+
cross_validation_params[param] = [int(val) for val in values]
|
|
413
|
+
except ValueError:
|
|
414
|
+
error.append(f"{param} must be integers")
|
|
415
|
+
else:
|
|
416
|
+
try:
|
|
417
|
+
values = int(value)
|
|
418
|
+
except ValueError:
|
|
419
|
+
try:
|
|
420
|
+
values = float(value)
|
|
421
|
+
except ValueError:
|
|
422
|
+
pass
|
|
423
|
+
try:
|
|
424
|
+
values = [int(val) for val in values]
|
|
425
|
+
except ValueError:
|
|
426
|
+
try:
|
|
427
|
+
values = [float(val) for val in values]
|
|
428
|
+
except ValueError:
|
|
429
|
+
pass
|
|
430
|
+
|
|
431
|
+
cross_validation_params[param] = values
|
|
432
|
+
if error:
|
|
433
|
+
raise ValueError(error)
|
|
434
|
+
return cross_validation_params
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def get_closest_integer_steps(slider_size):
|
|
438
|
+
max_number_of_steps = 5
|
|
439
|
+
|
|
440
|
+
if slider_size <= 0:
|
|
441
|
+
return []
|
|
442
|
+
step = slider_size // max_number_of_steps
|
|
443
|
+
steps = [step * i for i in range(0, max_number_of_steps + 1)]
|
|
444
|
+
if slider_size % max_number_of_steps != 0:
|
|
445
|
+
# steps.pop(-1)
|
|
446
|
+
steps.append(slider_size)
|
|
447
|
+
return [int(i) for i in steps if i <= slider_size]
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def remove_random_samples_from_class(X: pd.Series, y: List[str], balance_correction: int,
|
|
451
|
+
classes_repartition: dict, seed: int = 42)-> Tuple[pd.Series, list]:
|
|
452
|
+
"""
|
|
453
|
+
Function to adjust proportion between classes, to make it more balanced, or as balanced as possible
|
|
454
|
+
Supposed to be provided by user (how much to adjust)
|
|
455
|
+
"""
|
|
456
|
+
|
|
457
|
+
samples_ids_and_targets = pd.DataFrame({"id": X, "final_classes": y})
|
|
458
|
+
balance_correction = balance_correction / 100
|
|
459
|
+
|
|
460
|
+
if len(classes_repartition) > 2:
|
|
461
|
+
raise ValueError("Balance correction is not supported for multiclassification")
|
|
462
|
+
|
|
463
|
+
class_A_name, class_B_name = tuple(classes_repartition.keys())
|
|
464
|
+
total_number_of_samples = classes_repartition[class_A_name] + classes_repartition[class_B_name]
|
|
465
|
+
class_A_repartition = classes_repartition[class_A_name] / total_number_of_samples
|
|
466
|
+
class_B_repartition = classes_repartition[class_B_name] / total_number_of_samples
|
|
467
|
+
|
|
468
|
+
# Naming trick to ensure that the "A" class is always the one with the higher number of examples
|
|
469
|
+
if class_B_repartition > class_A_repartition:
|
|
470
|
+
class_A_name, class_B_name = class_B_name, class_A_name
|
|
471
|
+
class_A_repartition, class_B_repartition = class_B_repartition, class_A_repartition
|
|
472
|
+
|
|
473
|
+
class_A_lines = samples_ids_and_targets[samples_ids_and_targets["final_classes"] == class_A_name]
|
|
474
|
+
|
|
475
|
+
class_A_number_of_samples = len(class_A_lines)
|
|
476
|
+
class_B_number_of_samples = len(samples_ids_and_targets[samples_ids_and_targets["final_classes"] == class_B_name])
|
|
477
|
+
|
|
478
|
+
# --- PROOF OF THE FORMULA ---
|
|
479
|
+
# new_proportion_A = trimmed_classe_A_samples / (trimmed_classe_A_samples + class_B_samples)
|
|
480
|
+
# proportion_A - correction = trimmed_classe_A_samples / (trimmed_classe_A_samples + class_B_samples)
|
|
481
|
+
# (proportion_A - correction) * (trimmed_classe_A_samples + class_B_samples) = trimmed_classe_A_samples
|
|
482
|
+
# proportion_A * trimmed_classe_A_samples - correction * trimmed_classe_A_samples + proportion_A * class_B_samples - correction * class_B_samples = trimmed_classe_A_samples
|
|
483
|
+
# proportion_A * class_B_samples - correction * class_B_samples = trimmed_classe_A_samples - proportion_A * trimmed_classe_A_samples + correction * trimmed_classe_A_samples
|
|
484
|
+
# proportion_A * class_B_samples - correction * class_B_samples = trimmed_classe_A_samples * (1 - proportion_A + correction)
|
|
485
|
+
# trimmed_classe_A_samples = class_B_samples * (proportion_A - correction) / (1 - proportion_A + correction)
|
|
486
|
+
|
|
487
|
+
new_class_A_number_of_samples = class_B_number_of_samples * (class_A_repartition - balance_correction) / \
|
|
488
|
+
(1 - class_A_repartition + balance_correction)
|
|
489
|
+
# Considering class A is always bigger than or equal to class B, to get a more balanced repartition,
|
|
490
|
+
# "new_class_A_number_of_samples" should be a smaller number than "class_A_number_of_samples"
|
|
491
|
+
number_of_samples_to_remove = class_A_number_of_samples - new_class_A_number_of_samples
|
|
492
|
+
|
|
493
|
+
np.random.seed(seed)
|
|
494
|
+
ids_to_remove = np.random.choice(class_A_lines.index, int(number_of_samples_to_remove), replace=False)
|
|
495
|
+
samples_ids_and_targets = samples_ids_and_targets.drop(ids_to_remove)
|
|
496
|
+
|
|
497
|
+
return samples_ids_and_targets["id"], samples_ids_and_targets["final_classes"].tolist()
|
pico/service/__init__.py
ADDED
pico/ui/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .app import app
|
pico/ui/app.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import matplotlib
|
|
2
|
+
matplotlib.use('agg')
|
|
3
|
+
|
|
4
|
+
from dash import html, dcc
|
|
5
|
+
from dash.dependencies import Input, Output
|
|
6
|
+
import dash
|
|
7
|
+
import dash_bootstrap_components as dbc
|
|
8
|
+
from flask import request, jsonify
|
|
9
|
+
import os
|
|
10
|
+
import signal
|
|
11
|
+
|
|
12
|
+
from .tabs import *
|
|
13
|
+
from ..domain import Controller
|
|
14
|
+
|
|
15
|
+
from pico.service import set_log_filename, init_logger
|
|
16
|
+
import threading
|
|
17
|
+
|
|
18
|
+
# Code for the logging
|
|
19
|
+
if threading.current_thread() is threading.main_thread():
|
|
20
|
+
logger = set_log_filename()
|
|
21
|
+
logger.info(f"Starting PICO")
|
|
22
|
+
else:
|
|
23
|
+
logger = init_logger()
|
|
24
|
+
logger.debug(f"New thread '{threading.current_thread().name}')")
|
|
25
|
+
|
|
26
|
+
# Launch dash app
|
|
27
|
+
app = dash.Dash(
|
|
28
|
+
__name__,
|
|
29
|
+
external_stylesheets=[dbc.themes.LUX],
|
|
30
|
+
meta_tags=[{"name": "viewport", "content": "width=device-width"}],
|
|
31
|
+
)
|
|
32
|
+
app.title = "PICO"
|
|
33
|
+
server = app.server
|
|
34
|
+
# app.scripts.config.serve_locally = False
|
|
35
|
+
app.css.config.serve_locally = False
|
|
36
|
+
app.config.suppress_callback_exceptions = True
|
|
37
|
+
app.css.append_css({"external_url": "https://codepen.io/chriddyp/pen/bWLwgP.css"})
|
|
38
|
+
|
|
39
|
+
metabo_controller = Controller()
|
|
40
|
+
infoTab = InfoTab(app, metabo_controller)
|
|
41
|
+
splitsTab = SplitsTab(app, metabo_controller)
|
|
42
|
+
mLTab = MLTab(app, metabo_controller)
|
|
43
|
+
resultsTab = ResultsTab(app, metabo_controller)
|
|
44
|
+
resultsAggregatedTab = AggregatedResultsTab(app, metabo_controller)
|
|
45
|
+
interpretTab = InterpretTab(app, metabo_controller)
|
|
46
|
+
|
|
47
|
+
app.layout = html.Div(
|
|
48
|
+
id="page",
|
|
49
|
+
children=[
|
|
50
|
+
html.Div(id="dataCache", children=[], style={"display": "none"}),
|
|
51
|
+
html.Div(
|
|
52
|
+
id="title_container",
|
|
53
|
+
className="row",
|
|
54
|
+
style={"display": "flex", "justify-content": "space-between", "align-items": "center"},
|
|
55
|
+
children=[
|
|
56
|
+
html.Div(
|
|
57
|
+
children=[
|
|
58
|
+
html.H1(id="title", children="PICO"),
|
|
59
|
+
html.Div(
|
|
60
|
+
children=[
|
|
61
|
+
html.P(
|
|
62
|
+
"Pipeline for Interpretable", style={"color": "white", 'text-transform': 'uppercase', "margin-bottom": "0"}
|
|
63
|
+
),
|
|
64
|
+
html.P(
|
|
65
|
+
"Classification of Omics",
|
|
66
|
+
style={"color": "white", 'text-transform': 'uppercase', "margin-bottom": "0"},
|
|
67
|
+
),
|
|
68
|
+
],
|
|
69
|
+
id="acronym",
|
|
70
|
+
style={"display": "flex", "justify-content": "center"},
|
|
71
|
+
),
|
|
72
|
+
],
|
|
73
|
+
id="title_bg",
|
|
74
|
+
),
|
|
75
|
+
html.Div( # Conteneur parent du bouton
|
|
76
|
+
children=[
|
|
77
|
+
html.Button("X", id="close-button", style={
|
|
78
|
+
"color": "white", "background-color": "darkred",
|
|
79
|
+
"border": "none", "border-radius": "50%", "width": "30px", "height": "30px",
|
|
80
|
+
"font-size": "20px", "line-height": "20px", "text-align": "center", "padding": "0",
|
|
81
|
+
"cursor": "pointer", "margin": "20px", "margin-top": "0px", "flex-shrink": "0"})
|
|
82
|
+
],
|
|
83
|
+
style={"display": "flex", "justify-content": "flex-end", "align-items": "center", "flex": "1"}
|
|
84
|
+
),
|
|
85
|
+
html.Div(id="message_close", style={"color": "white", "background-color": "darkred", "font-size": "24px", "display": "none"}),
|
|
86
|
+
],
|
|
87
|
+
),
|
|
88
|
+
dcc.Location(id='url', refresh=True),
|
|
89
|
+
html.Div(id='clientside-container', style={"display": "none"}),
|
|
90
|
+
html.Div(
|
|
91
|
+
id="main-content",
|
|
92
|
+
children=[
|
|
93
|
+
dbc.Tabs(
|
|
94
|
+
id="custom_big_tabs",
|
|
95
|
+
active_tab="tab-0",
|
|
96
|
+
className="global_tabs_container",
|
|
97
|
+
children=[
|
|
98
|
+
infoTab.getLayout(),
|
|
99
|
+
splitsTab.getLayout(),
|
|
100
|
+
mLTab.getLayout(),
|
|
101
|
+
#dbc.Tab(label="Splits", disabled=True),
|
|
102
|
+
#dbc.Tab(label="Machine Learning", disabled=True),
|
|
103
|
+
resultsTab.getLayout(),
|
|
104
|
+
resultsAggregatedTab.getLayout()
|
|
105
|
+
# interpretTab.getLayout()
|
|
106
|
+
],
|
|
107
|
+
)
|
|
108
|
+
],
|
|
109
|
+
),
|
|
110
|
+
],
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
app.clientside_callback(
|
|
115
|
+
"""
|
|
116
|
+
function(n_clicks) {
|
|
117
|
+
if (n_clicks > 0) {
|
|
118
|
+
if (confirm("Do you want to close PICO application server?")) {
|
|
119
|
+
window.close();
|
|
120
|
+
fetch('/shutdown', {method: 'POST'})
|
|
121
|
+
.then(response => response.json())
|
|
122
|
+
.then(data => console.log(data));
|
|
123
|
+
document.getElementById('message_close').innerHTML = 'Server stopped. Please close this web page.';
|
|
124
|
+
document.getElementById('message_close').style.display = 'block';
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
""",
|
|
129
|
+
Output('clientside-container', 'children'),
|
|
130
|
+
Input('close-button', 'n_clicks')
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
@server.route('/shutdown', methods=['POST'])
|
|
134
|
+
def shutdown():
|
|
135
|
+
try:
|
|
136
|
+
logger.info("Shutting down PICO server...")
|
|
137
|
+
shutdown_func = request.environ.get('werkzeug.server.shutdown')
|
|
138
|
+
if shutdown_func:
|
|
139
|
+
shutdown_func()
|
|
140
|
+
else:
|
|
141
|
+
os.kill(os.getpid(), signal.SIGINT)
|
|
142
|
+
return jsonify({'message': 'PICO server is shutting down...'})
|
|
143
|
+
except Exception as e:
|
|
144
|
+
logger.error(f"Error during shutdown: {e}")
|
|
145
|
+
return jsonify({'error': str(e)}), 500
|