pico-ml 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. pico/__init__.py +3 -0
  2. pico/__main__.py +3 -0
  3. pico/cli/__init__.py +2 -0
  4. pico/cli/main.py +117 -0
  5. pico/conf/SupportedCV.py +17 -0
  6. pico/conf/SupportedModels.py +73 -0
  7. pico/conf/algo_sklearn.json +51 -0
  8. pico/conf/parameters.py +14 -0
  9. pico/domain/ClassificationDesign.py +107 -0
  10. pico/domain/Controller.py +397 -0
  11. pico/domain/DataMatrix.py +147 -0
  12. pico/domain/ExperimentDTO.py +17 -0
  13. pico/domain/MetaData.py +229 -0
  14. pico/domain/MetaboExperiment.py +696 -0
  15. pico/domain/MetaboModel.py +53 -0
  16. pico/domain/ModelFactory.py +45 -0
  17. pico/domain/Results.py +602 -0
  18. pico/domain/SplitGroup.py +202 -0
  19. pico/domain/__init__.py +9 -0
  20. pico/domain/dumps/metadata/.gitkeep +0 -0
  21. pico/domain/dumps/splits/.gitkeep +0 -0
  22. pico/service/DataFormat.py +180 -0
  23. pico/service/ExperimentDesign.py +30 -0
  24. pico/service/LoggerConfig.py +150 -0
  25. pico/service/Plots.py +472 -0
  26. pico/service/RunMLalgo.py +93 -0
  27. pico/service/SamplesPairing.py +390 -0
  28. pico/service/Utils.py +497 -0
  29. pico/service/__init__.py +7 -0
  30. pico/ui/__init__.py +1 -0
  31. pico/ui/app.py +145 -0
  32. pico/ui/assets/000_Stylesheet.css +464 -0
  33. pico/ui/assets/DecisionTree.png +0 -0
  34. pico/ui/assets/Figure_home_wider.png +0 -0
  35. pico/ui/assets/favicon.ico +0 -0
  36. pico/ui/assets/help_icon.png +0 -0
  37. pico/ui/assets/help_icon.svg +15 -0
  38. pico/ui/assets/update_figure_steps_MeDIC_4.svg +1 -0
  39. pico/ui/tabs/AggregatedResultsTab.py +394 -0
  40. pico/ui/tabs/InfoTab.py +440 -0
  41. pico/ui/tabs/InterpretTab.py +21 -0
  42. pico/ui/tabs/MLTab.py +487 -0
  43. pico/ui/tabs/MetaTab.py +23 -0
  44. pico/ui/tabs/ResultsTab.py +1062 -0
  45. pico/ui/tabs/SplitsTab.py +1227 -0
  46. pico/ui/tabs/__init__.py +6 -0
  47. pico/ui/tabs/utils.py +101 -0
  48. pico_ml-2.0.0.dist-info/METADATA +86 -0
  49. pico_ml-2.0.0.dist-info/RECORD +52 -0
  50. pico_ml-2.0.0.dist-info/WHEEL +4 -0
  51. pico_ml-2.0.0.dist-info/entry_points.txt +2 -0
  52. pico_ml-2.0.0.dist-info/licenses/LICENSE +437 -0
@@ -0,0 +1,202 @@
1
+ from typing import List, Tuple, Union
2
+
3
+ import pandas as pd
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ from . import MetaData
7
+ from ..service import Utils, init_logger
8
+ import numpy as np
9
+
10
+
11
+ class SplitGroup:
12
+ def __init__(self, metadata: MetaData, selected_targets: List[str], train_test_proportion: float,
13
+ number_of_splits: int, classes_design: dict, pairing_column: str,
14
+ uniq_sample_id: List[str], balance_correction: int = 0,
15
+ classes_repartition: Union[dict, None] = None,
16
+ test_split_seed: Union[int,None] = None):
17
+ self._logger = init_logger()
18
+ self._metadata = metadata
19
+ self._number_of_split = number_of_splits
20
+ self._classes_design = classes_design
21
+ self._splits = []
22
+ self._compute_splits(train_test_proportion, number_of_splits, pairing_column, selected_targets,
23
+ uniq_sample_id, balance_correction, classes_repartition, test_split_seed)
24
+
25
+ def _compute_splits(self, train_test_proportion: float, number_of_splits: int, pairing_column: str,
26
+ selected_targets: List[str], uniq_sample_id: List[str],
27
+ balance_correction: int = 0,
28
+ classes_repartition: Union[dict, None] = None,
29
+ test_split_seed: Union[int,None] = None) -> None:
30
+ """
31
+ Create the desired number of split for the experiment. It includes/hadles the train-test repartition, the class
32
+ balancing, the pairing of samples, the classes design, etc.
33
+
34
+ Args:
35
+ train_test_proportion (float): Proportion of the dataset to include in the test split.
36
+ number_of_splits (int): total number of splits
37
+ pairing_column (str): Column to user of the pairing. When empty ("") not pairing is done.
38
+ selected_targets (List[str]): The selection of classes done with the interface or the automate.py (the names of the
39
+ selected classes/targets)
40
+ We consider selected_targets that has targets coming from multiple columns, that they are separated
41
+ by "__" i.e. "ali__A", "med__B", etc.
42
+ balance_correction (int, optional): balance correction to adjust proportion between classes.
43
+ Defaults to 0. (for no balancing)
44
+ classes_repartition (Union[dict, None], optional): . Defaults to None.
45
+ test_split_seed (int | None, optional): Split seed number. For test purpose only,
46
+ to be used from automate.py to test one specific split. Defaults to None.
47
+ """
48
+ self._logger.info("_compute_split function beginning")
49
+
50
+ # 1 - filter out the samples having a target not included in the classification design
51
+ # retrieve metadata dataframe
52
+ df_filter = self._metadata.get_metadata()
53
+ # keep only the lines for which the value in the final_targets column is in selected_targets
54
+ df_filter = df_filter[df_filter[self._metadata.get_target_column()].isin(selected_targets)]
55
+ # keep only the lines that correspond to samples in data file
56
+ # (handles the cases of a metadata file for multiple data files : where samples in
57
+ # the metadata having corresponding targets are not in the provided data file)
58
+ df_filter = df_filter[df_filter[self._metadata.get_id_column()].isin(uniq_sample_id)]
59
+
60
+ # 2 - select only one sample per entity
61
+ if pairing_column != "":
62
+ # sort the dataframe by the pairing_column values
63
+ df_entity = df_filter.sort_values(pairing_column)
64
+ # group samples by the pairing column and keep only the first row of each group (.nth(0) is more stable
65
+ # than .first())
66
+ # Carefull : the groupby function change the index of the dataframe to the column it groups by
67
+ df_entity = df_entity.groupby(pairing_column).nth(0)
68
+ else:
69
+ df_entity = df_filter
70
+
71
+ # 2.5 - extract ids and targets, transform targets to labels
72
+ ids = df_entity[self._metadata.get_id_column()]
73
+ targets = df_entity[self._metadata.get_target_column()]
74
+ labels = Utils.load_classes_from_targets(self._classes_design, targets)
75
+
76
+ # 3- procede with the train-test division on the selected samples
77
+ if test_split_seed is not None:
78
+ self._logger.debug(f"Testing split seed #{test_split_seed}")
79
+ split_indexes: list[int] = [test_split_seed] # Test only one split seed
80
+ else:
81
+ split_indexes = list(range(number_of_splits)) # All splits indexes
82
+
83
+ for split_index in split_indexes:
84
+ if pairing_column == "":
85
+ X_train, X_test, y_train, y_test = train_test_split(ids, labels, test_size=train_test_proportion,
86
+ random_state=split_index, stratify=labels)
87
+
88
+ # 4- retrieve the paired samples corresponding to the one in train or test set
89
+ else:
90
+ # random shuffle initialisation for second shuffle of samples
91
+ rng = np.random.default_rng(seed=split_index)
92
+ # define the ids column as the index of the dataframe, so it can be extracted with groupby().groups
93
+ df = df_filter.set_index(self._metadata.get_id_column())
94
+ # groups is a dictionary with 'keys' as the pairing value and 'values' as the index of the lines corresponding to the pairing
95
+ groups = df.groupby(pairing_column).groups
96
+ # apply the train-test division on the pairing values / the entity
97
+ # TODO : careful check if labels is in the right order with the data
98
+ X_train_temp, X_test_temp, y_train_temp, y_test_temp = train_test_split(df_entity.index, labels,
99
+ test_size=train_test_proportion,
100
+ random_state=split_index,
101
+ stratify=labels)
102
+ # retrieve the ids corresponding the to entities in train
103
+ X_train = []
104
+ for representative in X_train_temp:
105
+ represented_pairing_value = df_filter.loc[representative][pairing_column]
106
+ X_train.extend(groups[represented_pairing_value])
107
+ # retrieve targets corresponding to ids and then convert to labels
108
+ X_train = pd.Series(X_train)
109
+ targets = df.loc[X_train][self._metadata.get_target_column()]
110
+ y_train = Utils.load_classes_from_targets(self._classes_design, targets)
111
+
112
+ training_data = list(zip(X_train, y_train))
113
+ rng.shuffle(training_data)
114
+ X_train, y_train = zip(*training_data)
115
+
116
+ # retrieve the ids corresponding the to entities in test
117
+ X_test = []
118
+ for representative in X_test_temp:
119
+ represented_pairing_value = df_filter.loc[representative][pairing_column]
120
+ X_test.extend(groups[represented_pairing_value])
121
+ # retrieve targets corresponding to ids and then convert to labels
122
+ X_test = pd.Series(X_test)
123
+ targets = df.loc[X_test][self._metadata.get_target_column()]
124
+ y_test = Utils.load_classes_from_targets(self._classes_design, targets)
125
+
126
+ testing_data = list(zip(X_test, y_test))
127
+ rng.shuffle(testing_data)
128
+ X_test, y_test = zip(*testing_data)
129
+
130
+ if balance_correction > 0:
131
+ X_train, y_train = Utils.remove_random_samples_from_class(X_train,
132
+ y_train,
133
+ balance_correction,
134
+ classes_repartition)
135
+ X_train = list(X_train)
136
+ y_train = list(y_train)
137
+ X_test = list(X_test)
138
+ y_test = list(y_test)
139
+
140
+ if not self._validate_split(y_train, y_test):
141
+ raise RuntimeError(f"_compute_split step #4 aborted for the invalid split #{split_index}.")
142
+
143
+ self._splits.append([X_train, X_test, y_train, y_test])
144
+
145
+ self._number_of_split = len(self._splits) # Update the number of splits if some have been removed
146
+
147
+ self._logger.info("_compute_split function done")
148
+
149
+ def load_split_with_index(self, split_index: int) -> list:
150
+ return self._splits[split_index]
151
+
152
+ def get_number_of_splits(self):
153
+ """
154
+ Return the attribute of number of split
155
+ """
156
+ return self._number_of_split
157
+
158
+ def filter_sample_with_pairing_group(self, pairing_column: str) -> Tuple[List[str], List[str]]:
159
+ """
160
+ Function only needs the name of the column used to pair samples together.
161
+ It retrieves other informations from the attributes (object MetaData).
162
+ Then it iterates over all the metadata dataframe to store only one sample of each entity (entity stands for a
163
+ biological source, like an individual). Multiple samples can originate from one entity.
164
+ """
165
+ metadata_dataframe = self._metadata.get_metadata()
166
+ id_column = self._metadata.get_id_column()
167
+ target_column = self._metadata.get_target_column()
168
+ filtered_id = []
169
+ filtered_target = []
170
+ already_selected_value = set()
171
+ # TODO : might want to change the process to sorting all lines and then picking the first one
172
+ for index, row in metadata_dataframe.iterrows():
173
+ if row[pairing_column] not in already_selected_value:
174
+ already_selected_value.add(row[pairing_column])
175
+ filtered_id.append(row[id_column])
176
+ filtered_target.append(row[target_column])
177
+ return filtered_id, filtered_target
178
+
179
+ def get_selected_targets_and_ids(self, selected_targets: List[str], samples_id: List[str],
180
+ targets: List[str]) -> Tuple[Tuple[str], Tuple[str]]:
181
+ """
182
+ Function just filters out the target/id that are not in the selected_targets list
183
+ """
184
+ return tuple(zip(*[(target, id) for target, id in zip(targets, samples_id) if target in selected_targets]))
185
+
186
+ def _validate_split(self, y_train: list, y_test: list) -> bool:
187
+ # Test and train validation: they must have at least 2 classes.
188
+ nb_test_classes: int = len(set(y_test))
189
+ nb_train_classes: int = len(set(y_train))
190
+
191
+ if nb_test_classes < 2 or nb_train_classes < 2:
192
+ error_msg: str = "At least 2 classes must be present in both train and test splits."
193
+ if nb_test_classes < 2:
194
+ error_msg += f" Test set contains only the class '{next(iter(set(y_test)))}'."
195
+ if nb_train_classes < 2:
196
+ error_msg += f" Train set contains only the class '{next(iter(set(y_train)))}'."
197
+
198
+ self._logger.error(error_msg)
199
+ return False
200
+
201
+ return True
202
+
@@ -0,0 +1,9 @@
1
+ from .SplitGroup import SplitGroup
2
+ from .MetaData import MetaData
3
+ from .ClassificationDesign import ClassificationDesign
4
+ from .MetaboExperiment import MetaboExperiment
5
+ from .ModelFactory import ModelFactory
6
+ from .Results import Results
7
+ from .Controller import Controller
8
+ from .DataMatrix import DataMatrix
9
+ from .MetaboModel import MetaboModel
File without changes
File without changes
@@ -0,0 +1,180 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import os
4
+ import base64
5
+ import io
6
+ from .Utils import *
7
+
8
+
9
+ class DataFormat:
10
+ """
11
+ Take data file(s) as input and output a matrix where columns are samples and lines features. With the matrix comes
12
+ a list of the columns names to retrieve the samples properly.
13
+ """
14
+
15
+ def __init__(self, filename, data=None, use_raw=False, from_base64_str=True):
16
+ self.use_raw = use_raw
17
+ self.filename = filename
18
+ self.base64 = from_base64_str
19
+ self.data = data
20
+
21
+ # TODO : make sure to check if "not progen" matrix are well handled
22
+ if self.base64:
23
+ self.in_format = "base64"
24
+ elif os.path.isfile(filename):
25
+ self.in_format = "file"
26
+ elif os.path.isdir(filename):
27
+ self.in_format = "LDTD"
28
+ else:
29
+ raise TypeError(
30
+ "The given path is not valid, it has to be a file or a directory."
31
+ )
32
+
33
+ def convert(self):
34
+ """
35
+ Convert data format for easier use by the PICO
36
+ """
37
+ if self.in_format == "base64":
38
+ data_type, data_string = self.data.split(",")
39
+ self.data = base64.b64decode(data_string)
40
+ data = self._convert_from_file()
41
+ elif self.in_format == "file":
42
+ data = self._convert_from_file()
43
+ elif self.in_format == "LDTD":
44
+ raise ValueError("reading data from LDTD is not supported yet")
45
+ #data = self._convert_from_LDTD()
46
+ else:
47
+ raise ValueError("self.in_format does not correspond to accepted values (base64, file)")
48
+ return data
49
+
50
+ def _convert_from_file(self):
51
+ """
52
+ Take a file path or an StringIO object and read it as a pandas Dataframe
53
+ """
54
+ file_ext = self.filename.split(".")[-1]
55
+ # TODO : beware of the sep (, or ;)
56
+ if "csv" in file_ext: # Abundance matrices of Progenesis are always in csv format, so its checked first
57
+ if self.in_format == "base64": # this condition is to make readable the input data from dcc.Upload
58
+ self.data = io.StringIO(self.data.decode("utf-8"))
59
+ else: # this else is to enable the pd dataframe to be read from full file path
60
+ self.data = self.filename
61
+ header = pd.read_csv(self.data, header=None, sep=None, engine="python", nrows=3,
62
+ index_col=0,).fillna("").to_numpy()
63
+
64
+ # Needs to reset the pointer to the top of the ioString (to be able to read the string again)
65
+ if self.in_format == "base64":
66
+ self.data.seek(0)
67
+
68
+ if "Normalised abundance" in header[0] or "Raw abundance" in header[0]:
69
+ datatable = pd.read_csv(self.data, header=[0, 1, 2], sep=None, engine="python", index_col=0)
70
+ # Will return : datatable_compoundsInfo, datatable, labels, sample_names
71
+ return self._read_Progenesis_data_table(datatable, header)
72
+ else:
73
+ datatable = pd.read_csv(self.data, sep=None, engine="python", index_col=0)
74
+ # WARNING : return None, datatable, None, None
75
+ return self._read_general_data_table(datatable)
76
+
77
+ elif ("xls" in file_ext or "od" in file_ext): # TODO : restrict the "od" condition, might be too large
78
+ if self.in_format == "base64": # same as above
79
+ self.data = io.StringIO(io.BytesIO(self.data))
80
+ else:
81
+ self.data = self.filename
82
+ datatable = pd.read_excel(self.data, index_col=0)
83
+ # WARNING : return None, datatable, None, None
84
+ return self._read_general_data_table(datatable)
85
+
86
+ else:
87
+ raise TypeError("The input file is not of the right type, must be excel, odt or csv.")
88
+
89
+ def _convert_from_LDTD(self):
90
+ # TODO : implement the handling of LDTD data format
91
+ return ""
92
+
93
+ def _read_general_data_table(self, datatable):
94
+ """
95
+ for now does nothing, but might be the place to deal with custom format of matrices with extra/unecessary columns
96
+ or informations
97
+ ! careful : output only the datable and 3 empty strings because the function that calls it only needs datatable,
98
+ but that might change
99
+ """
100
+ # This return list fits the return when reading a Progenesis file
101
+ return None, datatable, None, None
102
+
103
+ def _read_Progenesis_data_table(self, datatable, header):
104
+ """
105
+ Assumes Raw data columns are written after Normalized data columns in the file.
106
+ :param datatable:
107
+ :return:
108
+ """
109
+ # print(header)
110
+ if not self.use_raw and "Normalised abundance" in header[0]: # header.columns.tolist():
111
+ start_data = list(header[0]).index("Normalised abundance")
112
+ elif self.use_raw and "Raw abundance" in header[0]: # header.columns.tolist():
113
+ start_data = list(header[0]).index("Raw abundance")
114
+ else:
115
+ raise KeyError("There is no Raw or Normalized abundance detected in the header.")
116
+
117
+ new_header = []
118
+ for l in header:
119
+ new_header.append(list_filler(l))
120
+
121
+ datatable.columns = new_header
122
+ datatable_compoundsInfo = datatable.iloc[:, 0:start_data]
123
+ datatable_compoundsInfo.columns = datatable_compoundsInfo.columns.droplevel([0, 1])
124
+ datatable_compoundsInfo = datatable_compoundsInfo.T
125
+
126
+ if self.use_raw:
127
+ datatable = datatable["Raw abundance"]
128
+ labels, sample_names = list(zip(*datatable.columns))
129
+ else:
130
+ datatable = datatable["Normalised abundance"]
131
+ labels, sample_names = list(zip(*datatable.columns))
132
+
133
+ datatable.columns = datatable.columns.droplevel(0)
134
+ datatable = datatable.T
135
+
136
+ datatable = datatable.loc[[index for index in datatable.index if "QC" not in index]]
137
+
138
+ return datatable_compoundsInfo, datatable, labels, sample_names
139
+
140
+ # start_normalized = header.columns.tolist().index("Normalised abundance")
141
+ # labels_array = np.array(header.iloc[0].tolist())
142
+
143
+ # if with_raw:
144
+ # start_raw = header.columns.tolist().index("Raw abundance")
145
+ # sample_names = datatable.iloc[:, start_normalized:start_raw].columns
146
+ # labels = labels_array.tolist()[start_normalized:start_raw]
147
+ # else:
148
+ # sample_names = datatable.iloc[:, start_normalized:].columns
149
+ # labels = labels_array.tolist()[start_normalized:]
150
+ #
151
+ # current_label = ""
152
+ # for idx, l in enumerate(labels):
153
+ # if l != "nan":
154
+ # current_label = l
155
+ # else:
156
+ # labels[idx] = current_label
157
+ #
158
+ # if with_raw:
159
+ # datatable_compoundsInfo = datatable.iloc[:, 0:start_normalized]
160
+ # datatable_normalized = datatable.iloc[:, start_normalized:start_raw]
161
+ # datatable_raw = datatable.iloc[:, start_raw:]
162
+ # datatable_raw.columns = [i.rstrip(".1") for i in datatable_raw.columns] # Fix the columns names
163
+ #
164
+ # datatable_normalized = datatable_normalized.T
165
+ # datatable_raw = datatable_raw.T
166
+ # datatable_compoundsInfo = datatable_compoundsInfo.T
167
+ # datatable_normalized.rename(columns={"Compound": "Sample"})
168
+ # datatable_raw.rename(columns={"Compound": "Sample"})
169
+ #
170
+ # if self.use_raw:
171
+ # return datatable_compoundsInfo, datatable_raw, labels, sample_names
172
+ # else:
173
+ # return datatable_compoundsInfo, datatable_normalized, labels, sample_names
174
+ # else:
175
+ # datatable_compoundsInfo = datatable.iloc[:, 0:start_normalized]
176
+ # datatable_normalized = datatable.iloc[:, start_normalized:]
177
+ # datatable_normalized = datatable_normalized.T
178
+ # datatable_compoundsInfo = datatable_compoundsInfo.T
179
+ # datatable_normalized.rename(columns={"Compound": "Sample"})
180
+ # return datatable_compoundsInfo, datatable_normalized, labels, sample_names
@@ -0,0 +1,30 @@
1
+ DATA_MATRIX = "Data\\Matrix_normalised_pos.csv"
2
+
3
+ # Cas, Temoin, TC
4
+ EXPERIMENT_DESIGNS = {
5
+ "Ctrl_vs_Case": {
6
+ "classes": {"Controls": ["Temoin"], "Cases": ["Cas"]},
7
+ "TestSize": 0.2,
8
+ },
9
+ # "Control vs TC":{
10
+ # "classes": {
11
+ # "Controles":["Cas"],
12
+ # "TC": ["TC"]
13
+ # },
14
+ # "TestSize": 0.2,
15
+ # },
16
+ # "TC vs Cas":{
17
+ # "classes": {
18
+ # "TC":["TC"],
19
+ # "Cases": ["Cas"]
20
+ # },
21
+ # "TestSize": 0.2,
22
+ # },
23
+ # "Control vs all":{
24
+ # "classes": {
25
+ # "Control":["Temoin"],
26
+ # "All": ["Cas", "TC"]
27
+ # },
28
+ # "TestSize": 0.2,
29
+ # },
30
+ }
@@ -0,0 +1,150 @@
1
+ import logging
2
+ import coloredlogs # type: ignore
3
+ from datetime import datetime
4
+ import inspect
5
+ import os
6
+ import threading
7
+ import traceback
8
+ from functools import wraps
9
+ from typing import Callable
10
+ from .Utils import get_pico_subdir
11
+
12
+ log_filename: str|None = None # Global variable for log filename
13
+
14
+ def log_exceptions(logger: logging.Logger) -> Callable:
15
+ def decorator(func: Callable):
16
+ @wraps(func)
17
+ def wrapper(*args, **kwargs) -> Callable:
18
+ try:
19
+ return func(*args, **kwargs)
20
+ except Exception as e:
21
+ # Log the exception or handle it as needed
22
+ thread_name = threading.current_thread().name
23
+ logger.error(f"Error in thread {thread_name}: {e}\n{traceback.format_exc()}")
24
+ raise # Re-raise the exception to preserve the original
25
+ return wrapper
26
+ return decorator
27
+
28
+
29
+ def set_log_filename(filename: str="pico.log", add_date: bool=True, level=logging.DEBUG) -> logging.Logger:
30
+ """Sets the log filename with an optional date suffix.
31
+ Args:
32
+ filename (str, optional): The base filename for the log. Defaults to "pico.log".
33
+ add_date (bool, optional): If True, adds the current date to the filename. Defaults to True.
34
+ """
35
+ global log_filename
36
+
37
+ if add_date:
38
+ date_suffix = datetime.now().strftime("%Y-%m-%d")
39
+
40
+ if filename.lower().endswith(".log"):
41
+ log_filename = filename.replace(".log", f"_{date_suffix}.log")
42
+ else:
43
+ log_filename = f"{filename}_{date_suffix}.log"
44
+ else:
45
+ log_filename = filename
46
+
47
+ # Log file in ~/pico_files/logs directory
48
+ logs_directory = get_pico_subdir("logs")
49
+ log_filename = os.path.join(logs_directory, log_filename)
50
+
51
+ # Add "-----------------------" in the log file to start the current session
52
+ with open(log_filename, 'a') as log_file:
53
+ if threading.current_thread() is threading.main_thread():
54
+ log_file.write(f"\n{'----- New start (' + threading.current_thread().name + ') ':-<80}\n")
55
+ else:
56
+ log_file.write(f" New thread ({threading.current_thread().name})\n")
57
+
58
+ # Root logger
59
+ root_logger = logging.getLogger()
60
+ root_logger.handlers = [] # Clear existing handlers
61
+ root_logger.setLevel(logging.WARNING) # Only show WARNING level and above
62
+ root_logger.propagate = False
63
+
64
+ # Terminal (console) handler
65
+ console_handler = logging.StreamHandler()
66
+ console_handler.setLevel(logging.WARNING)
67
+
68
+ # File handler
69
+ file_handler = logging.FileHandler(log_filename)
70
+ file_handler.setLevel(logging.DEBUG) # Ensure DEBUG level for detailed logs
71
+
72
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
73
+ console_handler.setFormatter(formatter)
74
+ file_handler.setFormatter(formatter)
75
+
76
+ root_logger.addHandler(console_handler)
77
+ root_logger.addHandler(file_handler)
78
+
79
+ # Ensure colored logs for terminal
80
+ coloredlogs.install(level=logging.WARNING, logger=root_logger, stream=console_handler.stream)
81
+
82
+ # werkzeug logs
83
+ werkzeug_logger = logging.getLogger('werkzeug')
84
+ werkzeug_logger.setLevel(logging.INFO) # Log level to INFO for console
85
+
86
+ # Add logs for werkzeug in terminal
87
+ werkzeug_console_handler = logging.StreamHandler()
88
+ werkzeug_console_handler.setLevel(logging.INFO)
89
+ werkzeug_console_handler.setFormatter(formatter)
90
+ werkzeug_logger.addHandler(werkzeug_console_handler)
91
+ werkzeug_logger.propagate = False
92
+
93
+ return init_logger()
94
+
95
+
96
+ def init_logger(module_name: str|None=None, level=logging.DEBUG) -> logging.Logger:
97
+ """"pico.log"
98
+
99
+ Args:
100
+ module_name (str | None, optional): The name of the module for the logger. Defaults to None.
101
+ level (int, optional): The logging level. Defaults to logging.DEBUG.
102
+ Levels (from high to low): logging.CRITICAL logging.ERROR
103
+ logging.WARNING logging.INFO logging.DEBUG
104
+
105
+ Returns:
106
+ logging.Logger: The configured logger instance
107
+ """
108
+
109
+ global log_filename
110
+
111
+ def get_module_name() -> str | None:
112
+ cur_frame = inspect.currentframe()
113
+ if not cur_frame:
114
+ return None
115
+ frame = cur_frame.f_back
116
+ if not frame:
117
+ return None
118
+ module = inspect.getmodule(frame)
119
+ if not module:
120
+ return None
121
+ return module.__name__
122
+
123
+ if module_name is None:
124
+ module_name = get_module_name() or "pico"
125
+
126
+ logger = logging.getLogger(module_name)
127
+ logger.setLevel(level)
128
+ logger.propagate = False
129
+
130
+ if not logger.handlers:
131
+ # Terminal (console) handler
132
+ console_handler = logging.StreamHandler()
133
+ console_handler.setLevel(level)
134
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levellevel)s - %(message)s')
135
+ console_handler.setFormatter(formatter)
136
+ logger.addHandler(console_handler)
137
+
138
+ # File handler
139
+ if log_filename is not None:
140
+ file_handler = logging.FileHandler(log_filename)
141
+ file_handler.setLevel(level)
142
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
143
+ file_handler.setFormatter(formatter)
144
+
145
+ logger.addHandler(file_handler)
146
+
147
+ # Ensure colored logs for terminal
148
+ coloredlogs.install(level=level, logger=logger, stream=console_handler.stream)
149
+
150
+ return logger