pico-ml 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. pico/__init__.py +3 -0
  2. pico/__main__.py +3 -0
  3. pico/cli/__init__.py +2 -0
  4. pico/cli/main.py +117 -0
  5. pico/conf/SupportedCV.py +17 -0
  6. pico/conf/SupportedModels.py +73 -0
  7. pico/conf/algo_sklearn.json +51 -0
  8. pico/conf/parameters.py +14 -0
  9. pico/domain/ClassificationDesign.py +107 -0
  10. pico/domain/Controller.py +397 -0
  11. pico/domain/DataMatrix.py +147 -0
  12. pico/domain/ExperimentDTO.py +17 -0
  13. pico/domain/MetaData.py +229 -0
  14. pico/domain/MetaboExperiment.py +696 -0
  15. pico/domain/MetaboModel.py +53 -0
  16. pico/domain/ModelFactory.py +45 -0
  17. pico/domain/Results.py +602 -0
  18. pico/domain/SplitGroup.py +202 -0
  19. pico/domain/__init__.py +9 -0
  20. pico/domain/dumps/metadata/.gitkeep +0 -0
  21. pico/domain/dumps/splits/.gitkeep +0 -0
  22. pico/service/DataFormat.py +180 -0
  23. pico/service/ExperimentDesign.py +30 -0
  24. pico/service/LoggerConfig.py +150 -0
  25. pico/service/Plots.py +472 -0
  26. pico/service/RunMLalgo.py +93 -0
  27. pico/service/SamplesPairing.py +390 -0
  28. pico/service/Utils.py +497 -0
  29. pico/service/__init__.py +7 -0
  30. pico/ui/__init__.py +1 -0
  31. pico/ui/app.py +145 -0
  32. pico/ui/assets/000_Stylesheet.css +464 -0
  33. pico/ui/assets/DecisionTree.png +0 -0
  34. pico/ui/assets/Figure_home_wider.png +0 -0
  35. pico/ui/assets/favicon.ico +0 -0
  36. pico/ui/assets/help_icon.png +0 -0
  37. pico/ui/assets/help_icon.svg +15 -0
  38. pico/ui/assets/update_figure_steps_MeDIC_4.svg +1 -0
  39. pico/ui/tabs/AggregatedResultsTab.py +394 -0
  40. pico/ui/tabs/InfoTab.py +440 -0
  41. pico/ui/tabs/InterpretTab.py +21 -0
  42. pico/ui/tabs/MLTab.py +487 -0
  43. pico/ui/tabs/MetaTab.py +23 -0
  44. pico/ui/tabs/ResultsTab.py +1062 -0
  45. pico/ui/tabs/SplitsTab.py +1227 -0
  46. pico/ui/tabs/__init__.py +6 -0
  47. pico/ui/tabs/utils.py +101 -0
  48. pico_ml-2.0.0.dist-info/METADATA +86 -0
  49. pico_ml-2.0.0.dist-info/RECORD +52 -0
  50. pico_ml-2.0.0.dist-info/WHEEL +4 -0
  51. pico_ml-2.0.0.dist-info/entry_points.txt +2 -0
  52. pico_ml-2.0.0.dist-info/licenses/LICENSE +437 -0
pico/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .ui import app
2
+
3
+ __version__ = "2.0.0"
pico/__main__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .cli.main import app
2
+
3
+ app()
pico/cli/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ from rich.console import Console
2
+ console = Console()
pico/cli/main.py ADDED
@@ -0,0 +1,117 @@
1
+ import subprocess
2
+ import sys
3
+ import time
4
+ import webbrowser
5
+ import threading
6
+
7
+ from typing import Annotated, Optional
8
+
9
+ import typer
10
+
11
+ from . import console
12
+ from .. import __version__
13
+ from .. import app as pico_app
14
+
15
+ # Use print from `rich` library
16
+ print = console.print
17
+
18
+ app = typer.Typer()
19
+
20
+ PYPI_PROJECT_NAME = "pico-ml"
21
+
22
+ @app.callback(invoke_without_command=True, no_args_is_help=True)
23
+ def main(
24
+ version: bool = typer.Option(False, "--version", "-v", help="PICO CLI version")
25
+ ):
26
+ """
27
+ PICO CLI!
28
+ """
29
+ if version:
30
+ typer.echo(__version__)
31
+ typer.Exit()
32
+
33
+
34
+ @app.command(name="ui")
35
+ def ui(
36
+ port: Annotated[Optional[int], typer.Option("--port", "-p", help="Specify the port of the app")] = 5000,
37
+ verbose: Annotated[Optional[str], typer.Option("--verbose", "-v", help="Specifiy verbosity")] = "debug"
38
+ ):
39
+ """Launch the web application"""
40
+ print("verbose", verbose)
41
+ page_url = f"http://127.0.0.1:{port}"
42
+
43
+ def startWebPage(url, wait_time: int = 0):
44
+ """Open a browser on localhost at a url after a given wait_time"""
45
+ time.sleep(wait_time)
46
+ webbrowser.open(url)
47
+
48
+ start_web_page_thread = threading.Thread(
49
+ target=startWebPage,
50
+ args=(page_url, 5)
51
+ )
52
+ start_web_page_thread.start()
53
+ pico_app.run(debug=False, host='0.0.0.0', port=port)
54
+
55
+
56
+ @app.command()
57
+ def update():
58
+ """Update the pico project to the latest version"""
59
+
60
+ # See https://stackoverflow.com/a/50255019 for installation of a python package
61
+ pip_infos = (
62
+ subprocess.run(
63
+ [sys.executable, "-m", "pip", "show", PYPI_PROJECT_NAME],
64
+ check=True,
65
+ capture_output=True,
66
+ )
67
+ .stdout.decode('latin-1')
68
+ .split("\n")
69
+ )
70
+
71
+ editable_identifier_str = "Editable project location: "
72
+ editable_project_location = [l for l in pip_infos if editable_identifier_str in l]
73
+ if editable_project_location:
74
+ print(
75
+ "[cyan]pico[/cyan] is installed in editable mode, it should already be up to date."
76
+ )
77
+ print(f"Current version: {__version__}")
78
+ return
79
+
80
+ # pico was installed with pipx
81
+ if "pipx" in sys.executable:
82
+ print("[cyan]pico[/cyan] is managed by [cyan]pipx[/cyan]")
83
+ update_function = [
84
+ "pipx",
85
+ "upgrade",
86
+ PYPI_PROJECT_NAME
87
+ ]
88
+ print("The following command will update [cyan]pico[/cyan]:")
89
+ print(" ".join(update_function), end="\n\n")
90
+ typer.confirm("Do you want to proceed?", abort=True)
91
+
92
+ # pipx is verbose enough
93
+ subprocess.run(update_function, check=True)
94
+ return
95
+
96
+ # Update pico with the current Python interpreter
97
+ update_function = [
98
+ sys.executable,
99
+ "-m",
100
+ "pip",
101
+ "install",
102
+ "-U",
103
+ PYPI_PROJECT_NAME
104
+ ]
105
+ print("The following command will update [cyan]pico[/cyan]:")
106
+ print(" ".join(update_function), end="\n\n")
107
+ typer.confirm("Do you want to proceed?", abort=True)
108
+
109
+ subprocess.run(update_function, check=True, capture_output=True)
110
+ new_version = subprocess.run(
111
+ [sys.executable, "-m", "pico", "--version"], capture_output=True
112
+ ).stdout.decode().strip()
113
+ print(f"[cyan]pico[/cyan] updated to {new_version}")
114
+
115
+
116
+ # This is expose for documentation purposes
117
+ typer_click_object = typer.main.get_command(app)
@@ -0,0 +1,17 @@
1
+ from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
2
+
3
+ CV_ALGORITHMS = {
4
+ "GridSearchCV": {
5
+ "constructor": GridSearchCV,
6
+ "params": []
7
+ },
8
+ "RandomizedSearchCV": {
9
+ "constructor": RandomizedSearchCV,
10
+ "params": [{
11
+ "name": "n_iter",
12
+ "value": 10,
13
+ "type": "int",
14
+ "constant": False
15
+ }]
16
+ }
17
+ }
@@ -0,0 +1,73 @@
1
+ import numpy as np
2
+ from pyscm.scm import SetCoveringMachineClassifier
3
+ from randomscm.randomscm import RandomScmClassifier
4
+ from sklearn.ensemble import RandomForestClassifier
5
+ from sklearn.tree import DecisionTreeClassifier
6
+
7
+ LEARN_CONFIG = {
8
+ "DecisionTree": {
9
+ "function": DecisionTreeClassifier,
10
+ "ParamGrid": {
11
+ "RandomSearch": {
12
+ "max_depth": np.arange(2, 9, step=1, dtype=int),
13
+ "min_samples_split": np.arange(2, 20, step=2, dtype=int),
14
+ "max_features": ["sqrt", "log2"],
15
+ },
16
+ "GridSearch": {
17
+ "max_depth": [2, 3, 4, 5, 6, 7],
18
+ "min_samples_split": [2, 4, 6, 8, 10],
19
+ "max_features": ["sqrt", "log2"],
20
+ "class_weight": ["balanced"]
21
+ }
22
+ },
23
+ "importance_attribute": "feature_importances_",
24
+ },
25
+ "RandomForest": {
26
+ "function": RandomForestClassifier,
27
+ "ParamGrid": {
28
+ "RandomSearch" : {
29
+ "n_estimators": np.arange(5, 300, step=10, dtype=int),
30
+ "max_depth": np.arange(2, 6, step=1, dtype=int), #[1, 2, 3, 4, 5],
31
+ "min_samples_split": np.arange(2, 20, step=2, dtype=int),
32
+ },
33
+ "GridSearch" : {
34
+ "n_estimators": [10, 30, 50, 70, 100, 200], # removed 5,
35
+ "max_depth": [2, 3, 4, 5],
36
+ "min_samples_split": [2, 4, 6, 8, 10]
37
+ }
38
+ },
39
+ "importance_attribute": "feature_importances_",
40
+ },
41
+ "SCM": {
42
+ "function": SetCoveringMachineClassifier,
43
+ "ParamGrid": {
44
+ "RandomSearch" : {
45
+ "p": np.logspace(-2, 2, base=10, num=30),
46
+ "max_rules": np.arange(1, 6, 1, dtype=int),
47
+ "model_type": ["conjunction", "disjunction"],
48
+ },
49
+ "GridSearch" : {
50
+ "p": np.logspace(-2, 2, base=10, num=7), #[0.01, 0.1, 1, 10],
51
+ "max_rules": [1, 2, 3, 4, 5],
52
+ "model_type": ["conjunction", "disjunction"],
53
+ }
54
+ },
55
+ "importance_attribute": "feature_importances_",
56
+ },
57
+ "RandomSCM": {
58
+ "function": RandomScmClassifier,
59
+ "ParamGrid": {
60
+ "RandomSearch" : {
61
+ "p": np.logspace(-2, 2, base=10, num=30),
62
+ "n_estimators": np.arange(5, 200, step=10, dtype=int),
63
+ "model_type": ["conjunction", "disjunction"],
64
+ },
65
+ "GridSearch" : {
66
+ "p": np.logspace(-2, 2, base=10, num=5),
67
+ "n_estimators": [5, 10, 30, 50, 70, 100, 200],
68
+ "model_type": ["conjunction", "disjunction"],
69
+ },
70
+ },
71
+ "importance_attribute": "feature_importances_",
72
+ },
73
+ }
@@ -0,0 +1,51 @@
1
+ {
2
+ "DecisionTree": {
3
+ "function": "DecisionTreeClassifier",
4
+ "ParamGrid": {
5
+ "max_depth": [
6
+ 1,
7
+ 2,
8
+ 3,
9
+ 4,
10
+ 5,
11
+ 10
12
+ ],
13
+ "min_samples_split": [
14
+ 2,
15
+ 4,
16
+ 6,
17
+ 8,
18
+ 10
19
+ ]
20
+ }
21
+ },
22
+ "RandomForest": {
23
+ "function": "RandomForestClassifier",
24
+ "ParamGrid": {
25
+ "n_estimators": [
26
+ 1,
27
+ 2,
28
+ 4,
29
+ 10,
30
+ 30,
31
+ 70,
32
+ 100,
33
+ 500,
34
+ 1000
35
+ ]
36
+ }
37
+ },
38
+ "SVM_L1": {
39
+ "function": "LinearSVC",
40
+ "ParamGrid": {
41
+ "C": "np.logspace(-5, 5, 20)"
42
+ }
43
+ },
44
+ "SCM": {
45
+
46
+
47
+ },
48
+ "RandomSCM": {
49
+
50
+ }
51
+ }
@@ -0,0 +1,14 @@
1
+ # Number of features to compute PCA, UMAP and Strip charts
2
+ features = [5, 10, 40, 100]
3
+
4
+ # Default marks for sliders
5
+ default_marks = {str(k): str(v) for k, v in enumerate(features)}
6
+
7
+ # Mark for the all value
8
+ all_mark = {str(len(features)): "All"}
9
+
10
+ # Offset when showing the used value
11
+ custom_mark_offset = 0.3
12
+
13
+ # Limit of used features to plot (used for strip charts)
14
+ max_used_features_to_show = 200
@@ -0,0 +1,107 @@
1
+ from typing import Generator, Tuple, Dict, Union
2
+
3
+ from . import SplitGroup, MetaData
4
+ from .Results import *
5
+
6
+
7
+ class ClassificationDesign:
8
+ def __init__(self, classes_design: dict):
9
+ self._classes_design: dict = classes_design # dict with labels as keys and classes as values
10
+ self._name: str = ""
11
+ self._compute_name()
12
+ self._split_group: Union[SplitGroup, None] = None
13
+ self._selected_models_name: Union[list, None] = None
14
+ self.design_Results: dict = {}
15
+ self._is_done: bool = False
16
+ self._balance_correction: int = 0
17
+
18
+ def get_is_done(self) -> bool:
19
+ """
20
+ Return the attribute is_done which is trigered when the learning is done
21
+ """
22
+ return self._is_done
23
+
24
+ def set_is_done(self, is_done: bool) -> None:
25
+ self._is_done = is_done
26
+
27
+ def get_balance_correction(self) -> int:
28
+ return self._balance_correction
29
+
30
+ def set_balance_correction(self, balance_correction: int) -> None:
31
+ if balance_correction < 0:
32
+ raise ValueError("Balance correction cannot be negative")
33
+ self._balance_correction = balance_correction
34
+
35
+ def set_split_parameter_and_compute_splits(self, train_test_proportion: float, number_of_splits: int,
36
+ metadata: MetaData, pairing_column: str, uniq_sample_id: List[str],
37
+ test_split_seed: int|None=None) -> None:
38
+ """
39
+ Retrieve the classes repartition which is needed to create an instance of SplitGroup
40
+ Create an instance of SplitGroup in the attribute _split_group
41
+ (The init of SplitGroup triggers the _compute_splits function).
42
+ If test_split_seed is provided, then only this test split seed is computed.
43
+ """
44
+ classes_repartition = metadata.get_classes_repartition_based_on_design(self._classes_design)
45
+ self._split_group = SplitGroup(metadata, self.get_selected_targets_name(), train_test_proportion,
46
+ number_of_splits, self._classes_design, pairing_column, uniq_sample_id,
47
+ self._balance_correction, classes_repartition, test_split_seed)
48
+
49
+ def get_name(self) -> str:
50
+ return self._name
51
+
52
+ def get_full_name(self) -> str:
53
+ name = []
54
+ for key, item_list in self._classes_design.items():
55
+ name.append(f"{key} ({', '.join(item_list)})")
56
+ return " versus ".join(name)
57
+
58
+ def get_classes_design(self) -> dict:
59
+ return self._classes_design
60
+
61
+ def set_selected_models_name(self, selected_models_name: list) -> None:
62
+ """
63
+ Set the attribute self._selected_models_name
64
+ and create the attribute self.design_Results with initialized instances of the class Results for each algorithm
65
+ selected_models_name: list of names of models to run on data
66
+ """
67
+ if self._split_group is None:
68
+ raise ValueError("Trying to set models before setting splits parameters")
69
+ self._selected_models_name = selected_models_name
70
+ for n in self._selected_models_name:
71
+ self.design_Results[n] = Results(self._split_group.get_number_of_splits())
72
+
73
+ def get_results(self) -> Dict[str, Results]:
74
+ """
75
+ Return the results dict (attribute) corresponding to this instance of Classification Design
76
+ """
77
+ if self.design_Results == {}:
78
+ raise RuntimeError("The name of the selected models has to be set before accessing results.")
79
+ return self.design_Results
80
+
81
+ def _compute_name(self) -> None:
82
+ """
83
+ The name is made up of label1_vs_label2
84
+ Each label refering to a group of one or more classes from the data
85
+ """
86
+ self._name = "_vs_".join(self._classes_design)
87
+
88
+ def get_number_of_splits(self) -> int:
89
+ """
90
+ Retrieve the number of splits from SplitGroup instance
91
+ """
92
+ return self._split_group.get_number_of_splits()
93
+
94
+ def all_splits(self) -> Generator[Tuple[int, list], None, None]:
95
+ if self._split_group is None:
96
+ raise RuntimeError(
97
+ "Trying to access Splits before setting splits parameters"
98
+ )
99
+ for split_index in range(self._split_group.get_number_of_splits()):
100
+ yield split_index, self._split_group.load_split_with_index(split_index)
101
+
102
+ def get_selected_targets_name(self) -> list:
103
+ """
104
+ get the _classes_design dict in input and reverse it to have the classes as key and their corresponding labels
105
+ as value. It is then easier to retrieve a label for a specific class
106
+ """
107
+ return list(Utils.reverse_dict(self._classes_design).keys())