pico-ml 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pico/__init__.py +3 -0
- pico/__main__.py +3 -0
- pico/cli/__init__.py +2 -0
- pico/cli/main.py +117 -0
- pico/conf/SupportedCV.py +17 -0
- pico/conf/SupportedModels.py +73 -0
- pico/conf/algo_sklearn.json +51 -0
- pico/conf/parameters.py +14 -0
- pico/domain/ClassificationDesign.py +107 -0
- pico/domain/Controller.py +397 -0
- pico/domain/DataMatrix.py +147 -0
- pico/domain/ExperimentDTO.py +17 -0
- pico/domain/MetaData.py +229 -0
- pico/domain/MetaboExperiment.py +696 -0
- pico/domain/MetaboModel.py +53 -0
- pico/domain/ModelFactory.py +45 -0
- pico/domain/Results.py +602 -0
- pico/domain/SplitGroup.py +202 -0
- pico/domain/__init__.py +9 -0
- pico/domain/dumps/metadata/.gitkeep +0 -0
- pico/domain/dumps/splits/.gitkeep +0 -0
- pico/service/DataFormat.py +180 -0
- pico/service/ExperimentDesign.py +30 -0
- pico/service/LoggerConfig.py +150 -0
- pico/service/Plots.py +472 -0
- pico/service/RunMLalgo.py +93 -0
- pico/service/SamplesPairing.py +390 -0
- pico/service/Utils.py +497 -0
- pico/service/__init__.py +7 -0
- pico/ui/__init__.py +1 -0
- pico/ui/app.py +145 -0
- pico/ui/assets/000_Stylesheet.css +464 -0
- pico/ui/assets/DecisionTree.png +0 -0
- pico/ui/assets/Figure_home_wider.png +0 -0
- pico/ui/assets/favicon.ico +0 -0
- pico/ui/assets/help_icon.png +0 -0
- pico/ui/assets/help_icon.svg +15 -0
- pico/ui/assets/update_figure_steps_MeDIC_4.svg +1 -0
- pico/ui/tabs/AggregatedResultsTab.py +394 -0
- pico/ui/tabs/InfoTab.py +440 -0
- pico/ui/tabs/InterpretTab.py +21 -0
- pico/ui/tabs/MLTab.py +487 -0
- pico/ui/tabs/MetaTab.py +23 -0
- pico/ui/tabs/ResultsTab.py +1062 -0
- pico/ui/tabs/SplitsTab.py +1227 -0
- pico/ui/tabs/__init__.py +6 -0
- pico/ui/tabs/utils.py +101 -0
- pico_ml-2.0.0.dist-info/METADATA +86 -0
- pico_ml-2.0.0.dist-info/RECORD +52 -0
- pico_ml-2.0.0.dist-info/WHEEL +4 -0
- pico_ml-2.0.0.dist-info/entry_points.txt +2 -0
- pico_ml-2.0.0.dist-info/licenses/LICENSE +437 -0
pico/__init__.py
ADDED
pico/__main__.py
ADDED
pico/cli/__init__.py
ADDED
pico/cli/main.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import sys
|
|
3
|
+
import time
|
|
4
|
+
import webbrowser
|
|
5
|
+
import threading
|
|
6
|
+
|
|
7
|
+
from typing import Annotated, Optional
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
|
|
11
|
+
from . import console
|
|
12
|
+
from .. import __version__
|
|
13
|
+
from .. import app as pico_app
|
|
14
|
+
|
|
15
|
+
# Use print from `rich` library
|
|
16
|
+
print = console.print
|
|
17
|
+
|
|
18
|
+
app = typer.Typer()
|
|
19
|
+
|
|
20
|
+
PYPI_PROJECT_NAME = "pico-ml"
|
|
21
|
+
|
|
22
|
+
@app.callback(invoke_without_command=True, no_args_is_help=True)
|
|
23
|
+
def main(
|
|
24
|
+
version: bool = typer.Option(False, "--version", "-v", help="PICO CLI version")
|
|
25
|
+
):
|
|
26
|
+
"""
|
|
27
|
+
PICO CLI!
|
|
28
|
+
"""
|
|
29
|
+
if version:
|
|
30
|
+
typer.echo(__version__)
|
|
31
|
+
typer.Exit()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@app.command(name="ui")
|
|
35
|
+
def ui(
|
|
36
|
+
port: Annotated[Optional[int], typer.Option("--port", "-p", help="Specify the port of the app")] = 5000,
|
|
37
|
+
verbose: Annotated[Optional[str], typer.Option("--verbose", "-v", help="Specifiy verbosity")] = "debug"
|
|
38
|
+
):
|
|
39
|
+
"""Launch the web application"""
|
|
40
|
+
print("verbose", verbose)
|
|
41
|
+
page_url = f"http://127.0.0.1:{port}"
|
|
42
|
+
|
|
43
|
+
def startWebPage(url, wait_time: int = 0):
|
|
44
|
+
"""Open a browser on localhost at a url after a given wait_time"""
|
|
45
|
+
time.sleep(wait_time)
|
|
46
|
+
webbrowser.open(url)
|
|
47
|
+
|
|
48
|
+
start_web_page_thread = threading.Thread(
|
|
49
|
+
target=startWebPage,
|
|
50
|
+
args=(page_url, 5)
|
|
51
|
+
)
|
|
52
|
+
start_web_page_thread.start()
|
|
53
|
+
pico_app.run(debug=False, host='0.0.0.0', port=port)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@app.command()
|
|
57
|
+
def update():
|
|
58
|
+
"""Update the pico project to the latest version"""
|
|
59
|
+
|
|
60
|
+
# See https://stackoverflow.com/a/50255019 for installation of a python package
|
|
61
|
+
pip_infos = (
|
|
62
|
+
subprocess.run(
|
|
63
|
+
[sys.executable, "-m", "pip", "show", PYPI_PROJECT_NAME],
|
|
64
|
+
check=True,
|
|
65
|
+
capture_output=True,
|
|
66
|
+
)
|
|
67
|
+
.stdout.decode('latin-1')
|
|
68
|
+
.split("\n")
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
editable_identifier_str = "Editable project location: "
|
|
72
|
+
editable_project_location = [l for l in pip_infos if editable_identifier_str in l]
|
|
73
|
+
if editable_project_location:
|
|
74
|
+
print(
|
|
75
|
+
"[cyan]pico[/cyan] is installed in editable mode, it should already be up to date."
|
|
76
|
+
)
|
|
77
|
+
print(f"Current version: {__version__}")
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
# pico was installed with pipx
|
|
81
|
+
if "pipx" in sys.executable:
|
|
82
|
+
print("[cyan]pico[/cyan] is managed by [cyan]pipx[/cyan]")
|
|
83
|
+
update_function = [
|
|
84
|
+
"pipx",
|
|
85
|
+
"upgrade",
|
|
86
|
+
PYPI_PROJECT_NAME
|
|
87
|
+
]
|
|
88
|
+
print("The following command will update [cyan]pico[/cyan]:")
|
|
89
|
+
print(" ".join(update_function), end="\n\n")
|
|
90
|
+
typer.confirm("Do you want to proceed?", abort=True)
|
|
91
|
+
|
|
92
|
+
# pipx is verbose enough
|
|
93
|
+
subprocess.run(update_function, check=True)
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
# Update pico with the current Python interpreter
|
|
97
|
+
update_function = [
|
|
98
|
+
sys.executable,
|
|
99
|
+
"-m",
|
|
100
|
+
"pip",
|
|
101
|
+
"install",
|
|
102
|
+
"-U",
|
|
103
|
+
PYPI_PROJECT_NAME
|
|
104
|
+
]
|
|
105
|
+
print("The following command will update [cyan]pico[/cyan]:")
|
|
106
|
+
print(" ".join(update_function), end="\n\n")
|
|
107
|
+
typer.confirm("Do you want to proceed?", abort=True)
|
|
108
|
+
|
|
109
|
+
subprocess.run(update_function, check=True, capture_output=True)
|
|
110
|
+
new_version = subprocess.run(
|
|
111
|
+
[sys.executable, "-m", "pico", "--version"], capture_output=True
|
|
112
|
+
).stdout.decode().strip()
|
|
113
|
+
print(f"[cyan]pico[/cyan] updated to {new_version}")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# This is expose for documentation purposes
|
|
117
|
+
typer_click_object = typer.main.get_command(app)
|
pico/conf/SupportedCV.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
|
|
2
|
+
|
|
3
|
+
CV_ALGORITHMS = {
|
|
4
|
+
"GridSearchCV": {
|
|
5
|
+
"constructor": GridSearchCV,
|
|
6
|
+
"params": []
|
|
7
|
+
},
|
|
8
|
+
"RandomizedSearchCV": {
|
|
9
|
+
"constructor": RandomizedSearchCV,
|
|
10
|
+
"params": [{
|
|
11
|
+
"name": "n_iter",
|
|
12
|
+
"value": 10,
|
|
13
|
+
"type": "int",
|
|
14
|
+
"constant": False
|
|
15
|
+
}]
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from pyscm.scm import SetCoveringMachineClassifier
|
|
3
|
+
from randomscm.randomscm import RandomScmClassifier
|
|
4
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
5
|
+
from sklearn.tree import DecisionTreeClassifier
|
|
6
|
+
|
|
7
|
+
LEARN_CONFIG = {
|
|
8
|
+
"DecisionTree": {
|
|
9
|
+
"function": DecisionTreeClassifier,
|
|
10
|
+
"ParamGrid": {
|
|
11
|
+
"RandomSearch": {
|
|
12
|
+
"max_depth": np.arange(2, 9, step=1, dtype=int),
|
|
13
|
+
"min_samples_split": np.arange(2, 20, step=2, dtype=int),
|
|
14
|
+
"max_features": ["sqrt", "log2"],
|
|
15
|
+
},
|
|
16
|
+
"GridSearch": {
|
|
17
|
+
"max_depth": [2, 3, 4, 5, 6, 7],
|
|
18
|
+
"min_samples_split": [2, 4, 6, 8, 10],
|
|
19
|
+
"max_features": ["sqrt", "log2"],
|
|
20
|
+
"class_weight": ["balanced"]
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
"importance_attribute": "feature_importances_",
|
|
24
|
+
},
|
|
25
|
+
"RandomForest": {
|
|
26
|
+
"function": RandomForestClassifier,
|
|
27
|
+
"ParamGrid": {
|
|
28
|
+
"RandomSearch" : {
|
|
29
|
+
"n_estimators": np.arange(5, 300, step=10, dtype=int),
|
|
30
|
+
"max_depth": np.arange(2, 6, step=1, dtype=int), #[1, 2, 3, 4, 5],
|
|
31
|
+
"min_samples_split": np.arange(2, 20, step=2, dtype=int),
|
|
32
|
+
},
|
|
33
|
+
"GridSearch" : {
|
|
34
|
+
"n_estimators": [10, 30, 50, 70, 100, 200], # removed 5,
|
|
35
|
+
"max_depth": [2, 3, 4, 5],
|
|
36
|
+
"min_samples_split": [2, 4, 6, 8, 10]
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
"importance_attribute": "feature_importances_",
|
|
40
|
+
},
|
|
41
|
+
"SCM": {
|
|
42
|
+
"function": SetCoveringMachineClassifier,
|
|
43
|
+
"ParamGrid": {
|
|
44
|
+
"RandomSearch" : {
|
|
45
|
+
"p": np.logspace(-2, 2, base=10, num=30),
|
|
46
|
+
"max_rules": np.arange(1, 6, 1, dtype=int),
|
|
47
|
+
"model_type": ["conjunction", "disjunction"],
|
|
48
|
+
},
|
|
49
|
+
"GridSearch" : {
|
|
50
|
+
"p": np.logspace(-2, 2, base=10, num=7), #[0.01, 0.1, 1, 10],
|
|
51
|
+
"max_rules": [1, 2, 3, 4, 5],
|
|
52
|
+
"model_type": ["conjunction", "disjunction"],
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
"importance_attribute": "feature_importances_",
|
|
56
|
+
},
|
|
57
|
+
"RandomSCM": {
|
|
58
|
+
"function": RandomScmClassifier,
|
|
59
|
+
"ParamGrid": {
|
|
60
|
+
"RandomSearch" : {
|
|
61
|
+
"p": np.logspace(-2, 2, base=10, num=30),
|
|
62
|
+
"n_estimators": np.arange(5, 200, step=10, dtype=int),
|
|
63
|
+
"model_type": ["conjunction", "disjunction"],
|
|
64
|
+
},
|
|
65
|
+
"GridSearch" : {
|
|
66
|
+
"p": np.logspace(-2, 2, base=10, num=5),
|
|
67
|
+
"n_estimators": [5, 10, 30, 50, 70, 100, 200],
|
|
68
|
+
"model_type": ["conjunction", "disjunction"],
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
"importance_attribute": "feature_importances_",
|
|
72
|
+
},
|
|
73
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
{
|
|
2
|
+
"DecisionTree": {
|
|
3
|
+
"function": "DecisionTreeClassifier",
|
|
4
|
+
"ParamGrid": {
|
|
5
|
+
"max_depth": [
|
|
6
|
+
1,
|
|
7
|
+
2,
|
|
8
|
+
3,
|
|
9
|
+
4,
|
|
10
|
+
5,
|
|
11
|
+
10
|
|
12
|
+
],
|
|
13
|
+
"min_samples_split": [
|
|
14
|
+
2,
|
|
15
|
+
4,
|
|
16
|
+
6,
|
|
17
|
+
8,
|
|
18
|
+
10
|
|
19
|
+
]
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
"RandomForest": {
|
|
23
|
+
"function": "RandomForestClassifier",
|
|
24
|
+
"ParamGrid": {
|
|
25
|
+
"n_estimators": [
|
|
26
|
+
1,
|
|
27
|
+
2,
|
|
28
|
+
4,
|
|
29
|
+
10,
|
|
30
|
+
30,
|
|
31
|
+
70,
|
|
32
|
+
100,
|
|
33
|
+
500,
|
|
34
|
+
1000
|
|
35
|
+
]
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
"SVM_L1": {
|
|
39
|
+
"function": "LinearSVC",
|
|
40
|
+
"ParamGrid": {
|
|
41
|
+
"C": "np.logspace(-5, 5, 20)"
|
|
42
|
+
}
|
|
43
|
+
},
|
|
44
|
+
"SCM": {
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
},
|
|
48
|
+
"RandomSCM": {
|
|
49
|
+
|
|
50
|
+
}
|
|
51
|
+
}
|
pico/conf/parameters.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Number of features to compute PCA, UMAP and Strip charts
|
|
2
|
+
features = [5, 10, 40, 100]
|
|
3
|
+
|
|
4
|
+
# Default marks for sliders
|
|
5
|
+
default_marks = {str(k): str(v) for k, v in enumerate(features)}
|
|
6
|
+
|
|
7
|
+
# Mark for the all value
|
|
8
|
+
all_mark = {str(len(features)): "All"}
|
|
9
|
+
|
|
10
|
+
# Offset when showing the used value
|
|
11
|
+
custom_mark_offset = 0.3
|
|
12
|
+
|
|
13
|
+
# Limit of used features to plot (used for strip charts)
|
|
14
|
+
max_used_features_to_show = 200
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from typing import Generator, Tuple, Dict, Union
|
|
2
|
+
|
|
3
|
+
from . import SplitGroup, MetaData
|
|
4
|
+
from .Results import *
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ClassificationDesign:
|
|
8
|
+
def __init__(self, classes_design: dict):
|
|
9
|
+
self._classes_design: dict = classes_design # dict with labels as keys and classes as values
|
|
10
|
+
self._name: str = ""
|
|
11
|
+
self._compute_name()
|
|
12
|
+
self._split_group: Union[SplitGroup, None] = None
|
|
13
|
+
self._selected_models_name: Union[list, None] = None
|
|
14
|
+
self.design_Results: dict = {}
|
|
15
|
+
self._is_done: bool = False
|
|
16
|
+
self._balance_correction: int = 0
|
|
17
|
+
|
|
18
|
+
def get_is_done(self) -> bool:
|
|
19
|
+
"""
|
|
20
|
+
Return the attribute is_done which is trigered when the learning is done
|
|
21
|
+
"""
|
|
22
|
+
return self._is_done
|
|
23
|
+
|
|
24
|
+
def set_is_done(self, is_done: bool) -> None:
|
|
25
|
+
self._is_done = is_done
|
|
26
|
+
|
|
27
|
+
def get_balance_correction(self) -> int:
|
|
28
|
+
return self._balance_correction
|
|
29
|
+
|
|
30
|
+
def set_balance_correction(self, balance_correction: int) -> None:
|
|
31
|
+
if balance_correction < 0:
|
|
32
|
+
raise ValueError("Balance correction cannot be negative")
|
|
33
|
+
self._balance_correction = balance_correction
|
|
34
|
+
|
|
35
|
+
def set_split_parameter_and_compute_splits(self, train_test_proportion: float, number_of_splits: int,
|
|
36
|
+
metadata: MetaData, pairing_column: str, uniq_sample_id: List[str],
|
|
37
|
+
test_split_seed: int|None=None) -> None:
|
|
38
|
+
"""
|
|
39
|
+
Retrieve the classes repartition which is needed to create an instance of SplitGroup
|
|
40
|
+
Create an instance of SplitGroup in the attribute _split_group
|
|
41
|
+
(The init of SplitGroup triggers the _compute_splits function).
|
|
42
|
+
If test_split_seed is provided, then only this test split seed is computed.
|
|
43
|
+
"""
|
|
44
|
+
classes_repartition = metadata.get_classes_repartition_based_on_design(self._classes_design)
|
|
45
|
+
self._split_group = SplitGroup(metadata, self.get_selected_targets_name(), train_test_proportion,
|
|
46
|
+
number_of_splits, self._classes_design, pairing_column, uniq_sample_id,
|
|
47
|
+
self._balance_correction, classes_repartition, test_split_seed)
|
|
48
|
+
|
|
49
|
+
def get_name(self) -> str:
|
|
50
|
+
return self._name
|
|
51
|
+
|
|
52
|
+
def get_full_name(self) -> str:
|
|
53
|
+
name = []
|
|
54
|
+
for key, item_list in self._classes_design.items():
|
|
55
|
+
name.append(f"{key} ({', '.join(item_list)})")
|
|
56
|
+
return " versus ".join(name)
|
|
57
|
+
|
|
58
|
+
def get_classes_design(self) -> dict:
|
|
59
|
+
return self._classes_design
|
|
60
|
+
|
|
61
|
+
def set_selected_models_name(self, selected_models_name: list) -> None:
|
|
62
|
+
"""
|
|
63
|
+
Set the attribute self._selected_models_name
|
|
64
|
+
and create the attribute self.design_Results with initialized instances of the class Results for each algorithm
|
|
65
|
+
selected_models_name: list of names of models to run on data
|
|
66
|
+
"""
|
|
67
|
+
if self._split_group is None:
|
|
68
|
+
raise ValueError("Trying to set models before setting splits parameters")
|
|
69
|
+
self._selected_models_name = selected_models_name
|
|
70
|
+
for n in self._selected_models_name:
|
|
71
|
+
self.design_Results[n] = Results(self._split_group.get_number_of_splits())
|
|
72
|
+
|
|
73
|
+
def get_results(self) -> Dict[str, Results]:
|
|
74
|
+
"""
|
|
75
|
+
Return the results dict (attribute) corresponding to this instance of Classification Design
|
|
76
|
+
"""
|
|
77
|
+
if self.design_Results == {}:
|
|
78
|
+
raise RuntimeError("The name of the selected models has to be set before accessing results.")
|
|
79
|
+
return self.design_Results
|
|
80
|
+
|
|
81
|
+
def _compute_name(self) -> None:
|
|
82
|
+
"""
|
|
83
|
+
The name is made up of label1_vs_label2
|
|
84
|
+
Each label refering to a group of one or more classes from the data
|
|
85
|
+
"""
|
|
86
|
+
self._name = "_vs_".join(self._classes_design)
|
|
87
|
+
|
|
88
|
+
def get_number_of_splits(self) -> int:
|
|
89
|
+
"""
|
|
90
|
+
Retrieve the number of splits from SplitGroup instance
|
|
91
|
+
"""
|
|
92
|
+
return self._split_group.get_number_of_splits()
|
|
93
|
+
|
|
94
|
+
def all_splits(self) -> Generator[Tuple[int, list], None, None]:
|
|
95
|
+
if self._split_group is None:
|
|
96
|
+
raise RuntimeError(
|
|
97
|
+
"Trying to access Splits before setting splits parameters"
|
|
98
|
+
)
|
|
99
|
+
for split_index in range(self._split_group.get_number_of_splits()):
|
|
100
|
+
yield split_index, self._split_group.load_split_with_index(split_index)
|
|
101
|
+
|
|
102
|
+
def get_selected_targets_name(self) -> list:
|
|
103
|
+
"""
|
|
104
|
+
get the _classes_design dict in input and reverse it to have the classes as key and their corresponding labels
|
|
105
|
+
as value. It is then easier to retrieve a label for a specific class
|
|
106
|
+
"""
|
|
107
|
+
return list(Utils.reverse_dict(self._classes_design).keys())
|