msreport 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- msreport/__init__.py +4 -6
- msreport/aggregate/condense.py +1 -1
- msreport/aggregate/pivot.py +1 -0
- msreport/aggregate/summarize.py +2 -2
- msreport/analyze.py +171 -38
- msreport/errors.py +1 -2
- msreport/export.py +16 -13
- msreport/fasta.py +2 -1
- msreport/helper/__init__.py +7 -7
- msreport/helper/calc.py +29 -24
- msreport/helper/maxlfq.py +2 -2
- msreport/helper/table.py +5 -6
- msreport/impute.py +7 -8
- msreport/isobar.py +10 -9
- msreport/normalize.py +54 -36
- msreport/peptidoform.py +6 -4
- msreport/plot/__init__.py +41 -0
- msreport/plot/_partial_plots.py +159 -0
- msreport/plot/comparison.py +490 -0
- msreport/plot/distribution.py +253 -0
- msreport/plot/multivariate.py +355 -0
- msreport/plot/quality.py +431 -0
- msreport/plot/style.py +286 -0
- msreport/plot/style_sheets/msreport-notebook.mplstyle +57 -0
- msreport/plot/style_sheets/seaborn-whitegrid.mplstyle +45 -0
- msreport/qtable.py +109 -17
- msreport/reader.py +73 -79
- msreport/rinterface/__init__.py +2 -1
- msreport/rinterface/limma.py +2 -1
- msreport/rinterface/rinstaller.py +3 -3
- {msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/METADATA +7 -3
- msreport-0.0.28.dist-info/RECORD +38 -0
- msreport/plot.py +0 -1132
- msreport-0.0.26.dist-info/RECORD +0 -30
- {msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/WHEEL +0 -0
- {msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/licenses/LICENSE.txt +0 -0
- {msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/top_level.txt +0 -0
msreport/helper/table.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from typing import Iterable, Union
|
|
2
|
+
from typing import Iterable, Sequence, Union
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import pandas as pd
|
|
@@ -63,7 +63,7 @@ def intensities_in_logspace(data: Union[pd.DataFrame, np.ndarray, Iterable]) ->
|
|
|
63
63
|
"""
|
|
64
64
|
data = np.array(data, dtype=float)
|
|
65
65
|
mask = np.isfinite(data)
|
|
66
|
-
return np.all(data[mask].flatten() <= 64)
|
|
66
|
+
return bool(np.all(data[mask].flatten() <= 64))
|
|
67
67
|
|
|
68
68
|
|
|
69
69
|
def rename_sample_columns(table: pd.DataFrame, mapping: dict[str, str]) -> pd.DataFrame:
|
|
@@ -102,7 +102,7 @@ def rename_sample_columns(table: pd.DataFrame, mapping: dict[str, str]) -> pd.Da
|
|
|
102
102
|
|
|
103
103
|
|
|
104
104
|
def rename_mq_reporter_channels(
|
|
105
|
-
table: pd.DataFrame, channel_names:
|
|
105
|
+
table: pd.DataFrame, channel_names: Sequence[str]
|
|
106
106
|
) -> None:
|
|
107
107
|
"""Renames reporter channel numbers with sample names.
|
|
108
108
|
|
|
@@ -157,8 +157,7 @@ def find_columns(
|
|
|
157
157
|
Returns:
|
|
158
158
|
A list of column names.
|
|
159
159
|
"""
|
|
160
|
-
|
|
161
|
-
matched_columns = np.array(table.columns)[matches].tolist()
|
|
160
|
+
matched_columns = [col for col in table.columns if substring in col]
|
|
162
161
|
if must_be_substring:
|
|
163
162
|
matched_columns = [col for col in matched_columns if col != substring]
|
|
164
163
|
return matched_columns
|
|
@@ -255,7 +254,7 @@ def remove_rows_by_partial_match(
|
|
|
255
254
|
|
|
256
255
|
|
|
257
256
|
def join_tables(
|
|
258
|
-
tables:
|
|
257
|
+
tables: Sequence[pd.DataFrame], reset_index: bool = False
|
|
259
258
|
) -> pd.DataFrame:
|
|
260
259
|
"""Returns a joined dataframe.
|
|
261
260
|
|
msreport/impute.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional
|
|
3
4
|
|
|
4
5
|
import numpy as np
|
|
5
6
|
import pandas as pd
|
|
@@ -18,7 +19,7 @@ class FixedValueImputer:
|
|
|
18
19
|
def __init__(
|
|
19
20
|
self,
|
|
20
21
|
strategy: str,
|
|
21
|
-
fill_value:
|
|
22
|
+
fill_value: float = 0.0,
|
|
22
23
|
column_wise: bool = True,
|
|
23
24
|
):
|
|
24
25
|
"""Initializes the FixedValueImputer.
|
|
@@ -51,17 +52,15 @@ class FixedValueImputer:
|
|
|
51
52
|
Returns the fitted FixedValueImputer instance.
|
|
52
53
|
"""
|
|
53
54
|
if self.strategy == "constant":
|
|
54
|
-
|
|
55
|
-
# raise Excpetion()
|
|
56
|
-
fill_values = {column: self.fill_value for column in table.columns}
|
|
55
|
+
fill_values = dict.fromkeys(table.columns, self.fill_value)
|
|
57
56
|
elif self.strategy == "below":
|
|
58
57
|
if self.column_wise:
|
|
59
58
|
fill_values = {}
|
|
60
|
-
for column in table:
|
|
59
|
+
for column in table.columns:
|
|
61
60
|
fill_values[column] = _calculate_integer_below_min(table[column])
|
|
62
61
|
else:
|
|
63
62
|
int_below_min = _calculate_integer_below_min(table)
|
|
64
|
-
fill_values =
|
|
63
|
+
fill_values = dict.fromkeys(table.columns, int_below_min)
|
|
65
64
|
self._sample_fill_values = fill_values
|
|
66
65
|
return self
|
|
67
66
|
|
|
@@ -240,7 +239,7 @@ class PerseusImputer:
|
|
|
240
239
|
return _table
|
|
241
240
|
|
|
242
241
|
|
|
243
|
-
def confirm_is_fitted(imputer:
|
|
242
|
+
def confirm_is_fitted(imputer: Any, msg: Optional[str] = None) -> None:
|
|
244
243
|
"""Perform is_fitted validation for imputer instances.
|
|
245
244
|
|
|
246
245
|
Checks if the imputer is fitted by verifying the presence of fitted attributes
|
msreport/isobar.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import functools
|
|
3
4
|
from typing import Protocol
|
|
4
5
|
|
|
@@ -27,7 +28,7 @@ class IsotopeImpurityCorrecter:
|
|
|
27
28
|
def __init__(self):
|
|
28
29
|
self._impurity_matrix = None
|
|
29
30
|
|
|
30
|
-
def fit(self, impurity_matrix: np.
|
|
31
|
+
def fit(self, impurity_matrix: np.ndarray) -> IsotopeImpurityCorrecter:
|
|
31
32
|
"""Fits the isotope impurity correcter to a given impurity matrix.
|
|
32
33
|
|
|
33
34
|
Args:
|
|
@@ -51,7 +52,7 @@ class IsotopeImpurityCorrecter:
|
|
|
51
52
|
"""Returns True if the IsotopeImpurityCorrecter has been fitted."""
|
|
52
53
|
return self._impurity_matrix is not None
|
|
53
54
|
|
|
54
|
-
def get_fits(self) -> np.
|
|
55
|
+
def get_fits(self) -> np.ndarray:
|
|
55
56
|
"""Returns a copy of the fitted impurity matrix.
|
|
56
57
|
|
|
57
58
|
returns:
|
|
@@ -89,9 +90,9 @@ class IsotopeImpurityCorrecter:
|
|
|
89
90
|
|
|
90
91
|
|
|
91
92
|
def correct_isobaric_reporter_impurities(
|
|
92
|
-
intensity_table: np.
|
|
93
|
-
diagonal_impurity_matrix: np.
|
|
94
|
-
) -> np.
|
|
93
|
+
intensity_table: np.ndarray,
|
|
94
|
+
diagonal_impurity_matrix: np.ndarray,
|
|
95
|
+
) -> np.ndarray:
|
|
95
96
|
"""Performs isotope impurity correction on isobaric reporter expression values.
|
|
96
97
|
|
|
97
98
|
Args:
|
|
@@ -122,8 +123,8 @@ def correct_isobaric_reporter_impurities(
|
|
|
122
123
|
|
|
123
124
|
|
|
124
125
|
def _apply_impurity_contamination(
|
|
125
|
-
intensities: np.
|
|
126
|
-
) -> np.
|
|
126
|
+
intensities: np.ndarray, impurity_matrix: np.ndarray
|
|
127
|
+
) -> np.ndarray:
|
|
127
128
|
"""Applies reporter isotope impurity interference to an intensity array.
|
|
128
129
|
|
|
129
130
|
Args:
|
|
@@ -141,8 +142,8 @@ def _apply_impurity_contamination(
|
|
|
141
142
|
|
|
142
143
|
|
|
143
144
|
def _correct_impurity_contamination(
|
|
144
|
-
intensities: np.
|
|
145
|
-
) -> np.
|
|
145
|
+
intensities: np.ndarray, impurity_matrix: np.ndarray
|
|
146
|
+
) -> np.ndarray:
|
|
146
147
|
"""Applies reporter isotope impurity interference correction to an intensity array.
|
|
147
148
|
|
|
148
149
|
Args:
|
msreport/normalize.py
CHANGED
|
@@ -1,38 +1,29 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import
|
|
4
|
-
from typing import Callable, Iterable, Optional
|
|
2
|
+
|
|
3
|
+
from typing import Callable, Iterable, Optional, Protocol
|
|
5
4
|
|
|
6
5
|
import numpy as np
|
|
7
6
|
import pandas as pd
|
|
8
7
|
import statsmodels.nonparametric.smoothers_lowess
|
|
8
|
+
from typing_extensions import Self
|
|
9
9
|
|
|
10
10
|
import msreport.helper
|
|
11
11
|
import msreport.helper.maxlfq as MAXLFQ
|
|
12
12
|
from msreport.errors import NotFittedError
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
@abc.abstractmethod
|
|
19
|
-
def fit(self, table: pd.DataFrame) -> BaseSampleNormalizer:
|
|
20
|
-
...
|
|
15
|
+
class AbstractTransformer(Protocol):
|
|
16
|
+
def fit(self, table: pd.DataFrame) -> Self:
|
|
17
|
+
"""Fits the Transformer and returns a fitted Transformer instance."""
|
|
21
18
|
|
|
22
|
-
@abc.abstractmethod
|
|
23
19
|
def is_fitted(self) -> bool:
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
@abc.abstractmethod
|
|
27
|
-
def get_fits(self) -> dict[...]:
|
|
28
|
-
...
|
|
20
|
+
"""Returns True if the Transformer has been fitted."""
|
|
29
21
|
|
|
30
|
-
@abc.abstractmethod
|
|
31
22
|
def transform(self, table: pd.DataFrame) -> pd.DataFrame:
|
|
32
|
-
|
|
23
|
+
"""Transform values in table."""
|
|
33
24
|
|
|
34
25
|
|
|
35
|
-
class FixedValueNormalizer
|
|
26
|
+
class FixedValueNormalizer:
|
|
36
27
|
"""Normalization by a constant normalization factor for each sample.
|
|
37
28
|
|
|
38
29
|
Expects log transformed intensity values.
|
|
@@ -58,11 +49,11 @@ class FixedValueNormalizer(BaseSampleNormalizer):
|
|
|
58
49
|
f'"comparison" = {comparison} not allowed. '
|
|
59
50
|
'Must be either "paired" or "reference".'
|
|
60
51
|
)
|
|
61
|
-
self._comparison_mode = comparison
|
|
62
|
-
self._fit_function = center_function
|
|
63
|
-
self._sample_fits =
|
|
52
|
+
self._comparison_mode: str = comparison
|
|
53
|
+
self._fit_function: Callable = center_function
|
|
54
|
+
self._sample_fits: dict[str, float] = {}
|
|
64
55
|
|
|
65
|
-
def fit(self, table: pd.DataFrame) ->
|
|
56
|
+
def fit(self, table: pd.DataFrame) -> Self:
|
|
66
57
|
"""Fits the FixedValueNormalizer.
|
|
67
58
|
|
|
68
59
|
Args:
|
|
@@ -80,7 +71,7 @@ class FixedValueNormalizer(BaseSampleNormalizer):
|
|
|
80
71
|
|
|
81
72
|
def is_fitted(self) -> bool:
|
|
82
73
|
"""Returns True if the FixedValueNormalizer has been fitted."""
|
|
83
|
-
return self._sample_fits
|
|
74
|
+
return True if self._sample_fits else False
|
|
84
75
|
|
|
85
76
|
def get_fits(self) -> dict[str, float]:
|
|
86
77
|
"""Returns a dictionary containing the fitted center values per sample.
|
|
@@ -159,13 +150,13 @@ class FixedValueNormalizer(BaseSampleNormalizer):
|
|
|
159
150
|
self._sample_fits[sample] = sample_fit
|
|
160
151
|
|
|
161
152
|
|
|
162
|
-
class ValueDependentNormalizer
|
|
153
|
+
class ValueDependentNormalizer:
|
|
163
154
|
"""Normalization with a value dependent fit for each sample.
|
|
164
155
|
|
|
165
156
|
Expects log transformed intensity values.
|
|
166
157
|
"""
|
|
167
158
|
|
|
168
|
-
def __init__(self, fit_function: Callable):
|
|
159
|
+
def __init__(self, fit_function: Callable[[Iterable, Iterable], np.ndarray]):
|
|
169
160
|
"""Initializes the ValueDependentNormalizer.
|
|
170
161
|
|
|
171
162
|
Args:
|
|
@@ -175,10 +166,10 @@ class ValueDependentNormalizer(BaseSampleNormalizer):
|
|
|
175
166
|
with two columns. The first column contains the values and the second
|
|
176
167
|
column the fitted deviations.
|
|
177
168
|
"""
|
|
178
|
-
self._sample_fits =
|
|
169
|
+
self._sample_fits: dict[str, np.ndarray] = {}
|
|
179
170
|
self._fit_function = fit_function
|
|
180
171
|
|
|
181
|
-
def fit(self, table: pd.DataFrame) ->
|
|
172
|
+
def fit(self, table: pd.DataFrame) -> Self:
|
|
182
173
|
"""Fits the ValueDependentNormalizer.
|
|
183
174
|
|
|
184
175
|
Args:
|
|
@@ -192,9 +183,9 @@ class ValueDependentNormalizer(BaseSampleNormalizer):
|
|
|
192
183
|
|
|
193
184
|
def is_fitted(self) -> bool:
|
|
194
185
|
"""Returns True if the ValueDependentNormalizer has been fitted."""
|
|
195
|
-
return self._sample_fits
|
|
186
|
+
return True if self._sample_fits else False
|
|
196
187
|
|
|
197
|
-
def get_fits(self) -> dict[str,
|
|
188
|
+
def get_fits(self) -> dict[str, np.ndarray]:
|
|
198
189
|
"""Returns a dictionary containing lists of fitting data per sample.
|
|
199
190
|
|
|
200
191
|
Returns:
|
|
@@ -324,14 +315,14 @@ class CategoricalNormalizer:
|
|
|
324
315
|
column must be present in the reference table and the table to be
|
|
325
316
|
transformed.
|
|
326
317
|
"""
|
|
327
|
-
self._fitted_table =
|
|
328
|
-
self._category_column = category_column
|
|
318
|
+
self._fitted_table: pd.DataFrame = pd.DataFrame()
|
|
319
|
+
self._category_column: str = category_column
|
|
329
320
|
|
|
330
321
|
def is_fitted(self) -> bool:
|
|
331
322
|
"""Returns True if the CategoricalNormalizer has been fitted."""
|
|
332
|
-
return self._fitted_table
|
|
323
|
+
return not self._fitted_table.empty
|
|
333
324
|
|
|
334
|
-
def fit(self, reference_table: pd.DataFrame) ->
|
|
325
|
+
def fit(self, reference_table: pd.DataFrame) -> Self:
|
|
335
326
|
"""Fits the CategoricalNormalizer to a reference table.
|
|
336
327
|
|
|
337
328
|
Args:
|
|
@@ -397,7 +388,34 @@ class CategoricalNormalizer:
|
|
|
397
388
|
return transformed_table
|
|
398
389
|
|
|
399
390
|
|
|
400
|
-
class
|
|
391
|
+
class PercentageScaler:
|
|
392
|
+
"""Transform column values to percentages by dividing them with the column sum."""
|
|
393
|
+
|
|
394
|
+
def fit(self, table: pd.DataFrame) -> Self:
|
|
395
|
+
"""Returns the instance itself."""
|
|
396
|
+
return self
|
|
397
|
+
|
|
398
|
+
def is_fitted(self) -> bool:
|
|
399
|
+
"""Always returns True because the ZscoreScaler does not need to be fitted."""
|
|
400
|
+
return True
|
|
401
|
+
|
|
402
|
+
def get_fits(self) -> dict:
|
|
403
|
+
"""Returns a dictionary containing the parameters 'with_mean' and 'with_std'."""
|
|
404
|
+
return {}
|
|
405
|
+
|
|
406
|
+
def transform(self, table: pd.DataFrame) -> pd.DataFrame:
|
|
407
|
+
"""Transforms column values into percentages by devision with the column sum.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
table: The table used to scale row values.
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
A copy of the table containing the scaled values.
|
|
414
|
+
"""
|
|
415
|
+
return table.divide(table.sum(axis=0), axis=1)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
class ZscoreScaler:
|
|
401
419
|
"""Normalize samples by z-score scaling."""
|
|
402
420
|
|
|
403
421
|
def __init__(self, with_mean: bool = True, with_std: bool = True):
|
|
@@ -410,7 +428,7 @@ class ZscoreScaler(BaseSampleNormalizer):
|
|
|
410
428
|
self._with_mean = with_mean
|
|
411
429
|
self._with_std = with_std
|
|
412
430
|
|
|
413
|
-
def fit(self, table: pd.DataFrame) ->
|
|
431
|
+
def fit(self, table: pd.DataFrame) -> Self:
|
|
414
432
|
"""Returns the instance itself."""
|
|
415
433
|
return self
|
|
416
434
|
|
|
@@ -440,7 +458,7 @@ class ZscoreScaler(BaseSampleNormalizer):
|
|
|
440
458
|
|
|
441
459
|
|
|
442
460
|
def confirm_is_fitted(
|
|
443
|
-
normalizer:
|
|
461
|
+
normalizer: AbstractTransformer, msg: Optional[str] = None
|
|
444
462
|
) -> None:
|
|
445
463
|
"""Perform is_fitted validation for normalizer instances.
|
|
446
464
|
|
msreport/peptidoform.py
CHANGED
|
@@ -67,7 +67,9 @@ class Peptide:
|
|
|
67
67
|
probabilities = []
|
|
68
68
|
for site in self.list_modified_peptide_sites(modification):
|
|
69
69
|
probabilities.append(self.get_peptide_site_probability(site))
|
|
70
|
-
|
|
70
|
+
if None in probabilities:
|
|
71
|
+
return None
|
|
72
|
+
return float(np.prod(probabilities))
|
|
71
73
|
|
|
72
74
|
def get_peptide_site_probability(self, position: int) -> Optional[float]:
|
|
73
75
|
"""Return the modification localization probability of the peptide position.
|
|
@@ -161,7 +163,7 @@ def parse_modified_sequence(
|
|
|
161
163
|
modified_sequence: str,
|
|
162
164
|
tag_open: str,
|
|
163
165
|
tag_close: str,
|
|
164
|
-
) -> tuple[str, list]:
|
|
166
|
+
) -> tuple[str, list[tuple[int, str]]]:
|
|
165
167
|
"""Returns the plain sequence and a list of modification positions and tags.
|
|
166
168
|
|
|
167
169
|
Args:
|
|
@@ -253,7 +255,7 @@ def make_localization_string(
|
|
|
253
255
|
return localization_string
|
|
254
256
|
|
|
255
257
|
|
|
256
|
-
def read_localization_string(localization_string: str) -> dict:
|
|
258
|
+
def read_localization_string(localization_string: str) -> dict[str, dict[int, float]]:
|
|
257
259
|
"""Converts a site localization probability string into a dictionary.
|
|
258
260
|
|
|
259
261
|
Args:
|
|
@@ -269,7 +271,7 @@ def read_localization_string(localization_string: str) -> dict:
|
|
|
269
271
|
A dictionary in the form {"modification tag": {position: probability}}, where
|
|
270
272
|
positions are integers and probabilitiesa are floats ranging from 0 to 1.
|
|
271
273
|
"""
|
|
272
|
-
localization = {}
|
|
274
|
+
localization: dict[str, dict[int, float]] = {}
|
|
273
275
|
if localization_string == "":
|
|
274
276
|
return localization
|
|
275
277
|
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""This module provides various plotting functions for visualizing data within a Qtable.
|
|
2
|
+
|
|
3
|
+
The functions in this module generate a wide range of plots, including heatmaps, PCA
|
|
4
|
+
plots, volcano plots, and histograms, to analyze and compare expression values,
|
|
5
|
+
missingness, contaminants, and other features in proteomics datasets. The plots are
|
|
6
|
+
designed to work with the Qtable class as input, which provides structured access to
|
|
7
|
+
proteomics data and experimental design information.
|
|
8
|
+
|
|
9
|
+
The style of the plots can be customized using the `set_active_style` function, which
|
|
10
|
+
allows applying style sheets from the msreport library or those available in matplotlib.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from .comparison import expression_comparison, pvalue_histogram, volcano_ma
|
|
14
|
+
from .distribution import experiment_ratios, replicate_ratios
|
|
15
|
+
from .multivariate import expression_clustermap, sample_pca
|
|
16
|
+
from .quality import (
|
|
17
|
+
contaminants,
|
|
18
|
+
missing_values_horizontal,
|
|
19
|
+
missing_values_vertical,
|
|
20
|
+
sample_correlation,
|
|
21
|
+
sample_intensities,
|
|
22
|
+
)
|
|
23
|
+
from .style import ColorWheelDict, set_active_style, set_dpi
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"ColorWheelDict",
|
|
27
|
+
"set_dpi",
|
|
28
|
+
"set_active_style",
|
|
29
|
+
"missing_values_vertical",
|
|
30
|
+
"missing_values_horizontal",
|
|
31
|
+
"contaminants",
|
|
32
|
+
"sample_intensities",
|
|
33
|
+
"replicate_ratios",
|
|
34
|
+
"experiment_ratios",
|
|
35
|
+
"sample_pca",
|
|
36
|
+
"volcano_ma",
|
|
37
|
+
"expression_comparison",
|
|
38
|
+
"expression_clustermap",
|
|
39
|
+
"pvalue_histogram",
|
|
40
|
+
"sample_correlation",
|
|
41
|
+
]
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
from collections.abc import Iterable, Sequence
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import adjustText
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
import seaborn as sns
|
|
7
|
+
|
|
8
|
+
from .style import with_active_style
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@with_active_style
|
|
12
|
+
def annotated_scatter(
|
|
13
|
+
x_values,
|
|
14
|
+
y_values,
|
|
15
|
+
labels,
|
|
16
|
+
ax=None,
|
|
17
|
+
scatter_kws=None,
|
|
18
|
+
text_kws=None,
|
|
19
|
+
) -> None:
|
|
20
|
+
ax = plt.gca() if ax is None else ax
|
|
21
|
+
if scatter_kws is None:
|
|
22
|
+
scatter_kws = {}
|
|
23
|
+
if text_kws is None:
|
|
24
|
+
text_kws = {}
|
|
25
|
+
text_params = {
|
|
26
|
+
"force_text": 0.15,
|
|
27
|
+
"arrowprops": {
|
|
28
|
+
"arrowstyle": "-",
|
|
29
|
+
"color": scatter_kws["color"],
|
|
30
|
+
"lw": 0.75,
|
|
31
|
+
"alpha": 0.5,
|
|
32
|
+
},
|
|
33
|
+
"lim": 100,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
texts = []
|
|
37
|
+
for x, y, text in zip(x_values, y_values, labels, strict=True):
|
|
38
|
+
texts.append(ax.text(x, y, text, **text_kws))
|
|
39
|
+
|
|
40
|
+
if texts:
|
|
41
|
+
adjustText.adjust_text(texts, ax=ax, **text_params) # type: ignore
|
|
42
|
+
ax.scatter(x_values, y_values, **scatter_kws)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@with_active_style
|
|
46
|
+
def box_and_bars(
|
|
47
|
+
box_values: Sequence[Iterable[float]],
|
|
48
|
+
bar_values: Sequence[float],
|
|
49
|
+
group_names: Sequence[str],
|
|
50
|
+
colors: Optional[Sequence[str]] = None,
|
|
51
|
+
edge_colors: Optional[Sequence[str]] = None,
|
|
52
|
+
) -> tuple[plt.Figure, list[plt.Axes]]:
|
|
53
|
+
"""Generates a figure with horizontally aligned box and bar subplots.
|
|
54
|
+
|
|
55
|
+
In the top subplot the 'box_values' are displayed as box plots, in lower subplot the
|
|
56
|
+
'bar_values' are displayed as bar plots. The figure width is automatically adjusted
|
|
57
|
+
to the number of groups that will be plotted. The length of group_names must be the
|
|
58
|
+
same as the length of the of the 'bar_values' and the number of iterables from
|
|
59
|
+
'box_values'. Each group from 'box_values' and 'bar_values' is horizontally aligned
|
|
60
|
+
between the two subplots.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
box_values: A sequence of sequences that each contain y values for generating a
|
|
64
|
+
box plot.
|
|
65
|
+
bar_values: A sequence of y values for generating bar plots.
|
|
66
|
+
group_names: Used to label groups from box and bar plots.
|
|
67
|
+
colors: Sequence of hex color codes for each group that is used for the boxes of
|
|
68
|
+
the box and bar plots. Must be the same length as group names. If 'colors'
|
|
69
|
+
is None, boxes are colored in light grey.
|
|
70
|
+
edge_colors: Sequence of hex color codes for each group that is used for the
|
|
71
|
+
edges of the boxes and bars. Must be the same length as group names. If
|
|
72
|
+
None, black is used as edge color.
|
|
73
|
+
|
|
74
|
+
Raises:
|
|
75
|
+
ValueError: If the length of box_values, bar_values and group_names is not the
|
|
76
|
+
same or if the length of colors is not the same as group_names.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
A matplotlib Figure and a list of Axes objects containing the box and bar plots.
|
|
80
|
+
"""
|
|
81
|
+
if not (len(box_values) == len(bar_values) == len(group_names)):
|
|
82
|
+
raise ValueError(
|
|
83
|
+
"The length of 'box_values', 'bar_values' and 'group_names' must be the "
|
|
84
|
+
"same."
|
|
85
|
+
)
|
|
86
|
+
if colors is not None and len(colors) != len(group_names):
|
|
87
|
+
raise ValueError(
|
|
88
|
+
"The length of 'colors' must be the same as the length of 'group_names'."
|
|
89
|
+
)
|
|
90
|
+
if edge_colors is not None and len(edge_colors) != len(group_names):
|
|
91
|
+
raise ValueError(
|
|
92
|
+
"The length of 'edge_colors' must be the same as the length of "
|
|
93
|
+
"'group_names'."
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if colors is None:
|
|
97
|
+
colors = ["#D0D0D0" for _ in group_names]
|
|
98
|
+
if edge_colors is None:
|
|
99
|
+
edge_colors = ["#000000" for _ in group_names]
|
|
100
|
+
|
|
101
|
+
num_samples = len(group_names)
|
|
102
|
+
x_values = range(num_samples)
|
|
103
|
+
bar_width = 0.8
|
|
104
|
+
|
|
105
|
+
suptitle_space_inch = 0.4
|
|
106
|
+
ax_height_inch = 1.6
|
|
107
|
+
ax_hspace_inch = 0.35
|
|
108
|
+
bar_width_inches = 0.24
|
|
109
|
+
x_padding = 0.24
|
|
110
|
+
fig_height = suptitle_space_inch + ax_height_inch * 2 + ax_hspace_inch
|
|
111
|
+
|
|
112
|
+
fig_width = (num_samples + (2 * x_padding)) * bar_width_inches
|
|
113
|
+
fig_size = (fig_width, fig_height)
|
|
114
|
+
|
|
115
|
+
subplot_top = 1 - (suptitle_space_inch / fig_height)
|
|
116
|
+
subplot_hspace = ax_hspace_inch / ax_height_inch
|
|
117
|
+
|
|
118
|
+
bar_half_width = 0.5
|
|
119
|
+
lower_xbound = (0 - bar_half_width) - x_padding
|
|
120
|
+
upper_xbound = (num_samples - 1) + bar_half_width + x_padding
|
|
121
|
+
|
|
122
|
+
fig, axes = plt.subplots(2, figsize=fig_size, sharex=True)
|
|
123
|
+
fig.subplots_adjust(
|
|
124
|
+
bottom=0, top=subplot_top, left=0, right=1, hspace=subplot_hspace
|
|
125
|
+
)
|
|
126
|
+
fig.suptitle("A box and bars plot", y=1)
|
|
127
|
+
|
|
128
|
+
# Plot boxplots using the box_values
|
|
129
|
+
ax = axes[0]
|
|
130
|
+
ax.axhline(0, color="#999999", lw=1, zorder=2)
|
|
131
|
+
boxplots = ax.boxplot(
|
|
132
|
+
box_values,
|
|
133
|
+
positions=x_values,
|
|
134
|
+
vert=True,
|
|
135
|
+
showfliers=False,
|
|
136
|
+
patch_artist=True,
|
|
137
|
+
widths=bar_width,
|
|
138
|
+
medianprops={"color": "#000000"},
|
|
139
|
+
)
|
|
140
|
+
for color, edge_color, box in zip(
|
|
141
|
+
colors, edge_colors, boxplots["boxes"], strict=True
|
|
142
|
+
):
|
|
143
|
+
box.set(facecolor=color)
|
|
144
|
+
box.set(edgecolor=edge_color)
|
|
145
|
+
ylim = ax.get_ylim()
|
|
146
|
+
ax.set_ylim(min(-0.4, ylim[0]), max(0.401, ylim[1]))
|
|
147
|
+
|
|
148
|
+
# Plot barplots using the bar_values
|
|
149
|
+
ax = axes[1]
|
|
150
|
+
ax.bar(x_values, bar_values, width=bar_width, color=colors, edgecolor=edge_colors)
|
|
151
|
+
ax.set_xticklabels(
|
|
152
|
+
group_names, fontsize=plt.rcParams["axes.labelsize"], rotation=90
|
|
153
|
+
)
|
|
154
|
+
for ax in axes:
|
|
155
|
+
ax.grid(False, axis="x")
|
|
156
|
+
sns.despine(top=True, right=True)
|
|
157
|
+
|
|
158
|
+
ax.set_xlim(lower_xbound, upper_xbound)
|
|
159
|
+
return fig, axes
|