msreport 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
msreport/helper/table.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import re
2
- from typing import Iterable, Union
2
+ from typing import Iterable, Sequence, Union
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
@@ -63,7 +63,7 @@ def intensities_in_logspace(data: Union[pd.DataFrame, np.ndarray, Iterable]) ->
63
63
  """
64
64
  data = np.array(data, dtype=float)
65
65
  mask = np.isfinite(data)
66
- return np.all(data[mask].flatten() <= 64)
66
+ return bool(np.all(data[mask].flatten() <= 64))
67
67
 
68
68
 
69
69
  def rename_sample_columns(table: pd.DataFrame, mapping: dict[str, str]) -> pd.DataFrame:
@@ -102,7 +102,7 @@ def rename_sample_columns(table: pd.DataFrame, mapping: dict[str, str]) -> pd.Da
102
102
 
103
103
 
104
104
  def rename_mq_reporter_channels(
105
- table: pd.DataFrame, channel_names: Iterable[str]
105
+ table: pd.DataFrame, channel_names: Sequence[str]
106
106
  ) -> None:
107
107
  """Renames reporter channel numbers with sample names.
108
108
 
@@ -157,8 +157,7 @@ def find_columns(
157
157
  Returns:
158
158
  A list of column names.
159
159
  """
160
- matches = [substring in col for col in table.columns]
161
- matched_columns = np.array(table.columns)[matches].tolist()
160
+ matched_columns = [col for col in table.columns if substring in col]
162
161
  if must_be_substring:
163
162
  matched_columns = [col for col in matched_columns if col != substring]
164
163
  return matched_columns
@@ -255,7 +254,7 @@ def remove_rows_by_partial_match(
255
254
 
256
255
 
257
256
  def join_tables(
258
- tables: Iterable[pd.DataFrame], reset_index: bool = False
257
+ tables: Sequence[pd.DataFrame], reset_index: bool = False
259
258
  ) -> pd.DataFrame:
260
259
  """Returns a joined dataframe.
261
260
 
msreport/impute.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
- from typing import Optional
2
+
3
+ from typing import Any, Optional
3
4
 
4
5
  import numpy as np
5
6
  import pandas as pd
@@ -18,7 +19,7 @@ class FixedValueImputer:
18
19
  def __init__(
19
20
  self,
20
21
  strategy: str,
21
- fill_value: Optional[float] = None,
22
+ fill_value: float = 0.0,
22
23
  column_wise: bool = True,
23
24
  ):
24
25
  """Initializes the FixedValueImputer.
@@ -51,17 +52,15 @@ class FixedValueImputer:
51
52
  Returns the fitted FixedValueImputer instance.
52
53
  """
53
54
  if self.strategy == "constant":
54
- # if not isinstance(self.fill_value, (float, int)):
55
- # raise Excpetion()
56
- fill_values = {column: self.fill_value for column in table.columns}
55
+ fill_values = dict.fromkeys(table.columns, self.fill_value)
57
56
  elif self.strategy == "below":
58
57
  if self.column_wise:
59
58
  fill_values = {}
60
- for column in table:
59
+ for column in table.columns:
61
60
  fill_values[column] = _calculate_integer_below_min(table[column])
62
61
  else:
63
62
  int_below_min = _calculate_integer_below_min(table)
64
- fill_values = {column: int_below_min for column in table.columns}
63
+ fill_values = dict.fromkeys(table.columns, int_below_min)
65
64
  self._sample_fill_values = fill_values
66
65
  return self
67
66
 
@@ -240,7 +239,7 @@ class PerseusImputer:
240
239
  return _table
241
240
 
242
241
 
243
- def confirm_is_fitted(imputer: any, msg: Optional[str] = None) -> None:
242
+ def confirm_is_fitted(imputer: Any, msg: Optional[str] = None) -> None:
244
243
  """Perform is_fitted validation for imputer instances.
245
244
 
246
245
  Checks if the imputer is fitted by verifying the presence of fitted attributes
msreport/isobar.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from __future__ import annotations
2
+
2
3
  import functools
3
4
  from typing import Protocol
4
5
 
@@ -27,7 +28,7 @@ class IsotopeImpurityCorrecter:
27
28
  def __init__(self):
28
29
  self._impurity_matrix = None
29
30
 
30
- def fit(self, impurity_matrix: np.array) -> IsotopeImpurityCorrecter:
31
+ def fit(self, impurity_matrix: np.ndarray) -> IsotopeImpurityCorrecter:
31
32
  """Fits the isotope impurity correcter to a given impurity matrix.
32
33
 
33
34
  Args:
@@ -51,7 +52,7 @@ class IsotopeImpurityCorrecter:
51
52
  """Returns True if the IsotopeImpurityCorrecter has been fitted."""
52
53
  return self._impurity_matrix is not None
53
54
 
54
- def get_fits(self) -> np.array:
55
+ def get_fits(self) -> np.ndarray:
55
56
  """Returns a copy of the fitted impurity matrix.
56
57
 
57
58
  returns:
@@ -89,9 +90,9 @@ class IsotopeImpurityCorrecter:
89
90
 
90
91
 
91
92
  def correct_isobaric_reporter_impurities(
92
- intensity_table: np.array,
93
- diagonal_impurity_matrix: np.array,
94
- ) -> np.array:
93
+ intensity_table: np.ndarray,
94
+ diagonal_impurity_matrix: np.ndarray,
95
+ ) -> np.ndarray:
95
96
  """Performs isotope impurity correction on isobaric reporter expression values.
96
97
 
97
98
  Args:
@@ -122,8 +123,8 @@ def correct_isobaric_reporter_impurities(
122
123
 
123
124
 
124
125
  def _apply_impurity_contamination(
125
- intensities: np.array, impurity_matrix: np.array
126
- ) -> np.array:
126
+ intensities: np.ndarray, impurity_matrix: np.ndarray
127
+ ) -> np.ndarray:
127
128
  """Applies reporter isotope impurity interference to an intensity array.
128
129
 
129
130
  Args:
@@ -141,8 +142,8 @@ def _apply_impurity_contamination(
141
142
 
142
143
 
143
144
  def _correct_impurity_contamination(
144
- intensities: np.array, impurity_matrix: np.array
145
- ) -> np.array:
145
+ intensities: np.ndarray, impurity_matrix: np.ndarray
146
+ ) -> np.ndarray:
146
147
  """Applies reporter isotope impurity interference correction to an intensity array.
147
148
 
148
149
  Args:
msreport/normalize.py CHANGED
@@ -1,38 +1,29 @@
1
1
  from __future__ import annotations
2
- import abc
3
- import itertools
4
- from typing import Callable, Iterable, Optional
2
+
3
+ from typing import Callable, Iterable, Optional, Protocol
5
4
 
6
5
  import numpy as np
7
6
  import pandas as pd
8
7
  import statsmodels.nonparametric.smoothers_lowess
8
+ from typing_extensions import Self
9
9
 
10
10
  import msreport.helper
11
11
  import msreport.helper.maxlfq as MAXLFQ
12
12
  from msreport.errors import NotFittedError
13
13
 
14
14
 
15
- class BaseSampleNormalizer(abc.ABC):
16
- """Base class for all sample normalizers."""
17
-
18
- @abc.abstractmethod
19
- def fit(self, table: pd.DataFrame) -> BaseSampleNormalizer:
20
- ...
15
+ class AbstractTransformer(Protocol):
16
+ def fit(self, table: pd.DataFrame) -> Self:
17
+ """Fits the Transformer and returns a fitted Transformer instance."""
21
18
 
22
- @abc.abstractmethod
23
19
  def is_fitted(self) -> bool:
24
- ...
25
-
26
- @abc.abstractmethod
27
- def get_fits(self) -> dict[...]:
28
- ...
20
+ """Returns True if the Transformer has been fitted."""
29
21
 
30
- @abc.abstractmethod
31
22
  def transform(self, table: pd.DataFrame) -> pd.DataFrame:
32
- ...
23
+ """Transform values in table."""
33
24
 
34
25
 
35
- class FixedValueNormalizer(BaseSampleNormalizer):
26
+ class FixedValueNormalizer:
36
27
  """Normalization by a constant normalization factor for each sample.
37
28
 
38
29
  Expects log transformed intensity values.
@@ -58,11 +49,11 @@ class FixedValueNormalizer(BaseSampleNormalizer):
58
49
  f'"comparison" = {comparison} not allowed. '
59
50
  'Must be either "paired" or "reference".'
60
51
  )
61
- self._comparison_mode = comparison
62
- self._fit_function = center_function
63
- self._sample_fits = None
52
+ self._comparison_mode: str = comparison
53
+ self._fit_function: Callable = center_function
54
+ self._sample_fits: dict[str, float] = {}
64
55
 
65
- def fit(self, table: pd.DataFrame) -> BaseSampleNormalizer:
56
+ def fit(self, table: pd.DataFrame) -> Self:
66
57
  """Fits the FixedValueNormalizer.
67
58
 
68
59
  Args:
@@ -80,7 +71,7 @@ class FixedValueNormalizer(BaseSampleNormalizer):
80
71
 
81
72
  def is_fitted(self) -> bool:
82
73
  """Returns True if the FixedValueNormalizer has been fitted."""
83
- return self._sample_fits is not None
74
+ return True if self._sample_fits else False
84
75
 
85
76
  def get_fits(self) -> dict[str, float]:
86
77
  """Returns a dictionary containing the fitted center values per sample.
@@ -159,13 +150,13 @@ class FixedValueNormalizer(BaseSampleNormalizer):
159
150
  self._sample_fits[sample] = sample_fit
160
151
 
161
152
 
162
- class ValueDependentNormalizer(BaseSampleNormalizer):
153
+ class ValueDependentNormalizer:
163
154
  """Normalization with a value dependent fit for each sample.
164
155
 
165
156
  Expects log transformed intensity values.
166
157
  """
167
158
 
168
- def __init__(self, fit_function: Callable):
159
+ def __init__(self, fit_function: Callable[[Iterable, Iterable], np.ndarray]):
169
160
  """Initializes the ValueDependentNormalizer.
170
161
 
171
162
  Args:
@@ -175,10 +166,10 @@ class ValueDependentNormalizer(BaseSampleNormalizer):
175
166
  with two columns. The first column contains the values and the second
176
167
  column the fitted deviations.
177
168
  """
178
- self._sample_fits = None
169
+ self._sample_fits: dict[str, np.ndarray] = {}
179
170
  self._fit_function = fit_function
180
171
 
181
- def fit(self, table: pd.DataFrame) -> BaseSampleNormalizer:
172
+ def fit(self, table: pd.DataFrame) -> Self:
182
173
  """Fits the ValueDependentNormalizer.
183
174
 
184
175
  Args:
@@ -192,9 +183,9 @@ class ValueDependentNormalizer(BaseSampleNormalizer):
192
183
 
193
184
  def is_fitted(self) -> bool:
194
185
  """Returns True if the ValueDependentNormalizer has been fitted."""
195
- return self._sample_fits is not None
186
+ return True if self._sample_fits else False
196
187
 
197
- def get_fits(self) -> dict[str, Iterable[float, float]]:
188
+ def get_fits(self) -> dict[str, np.ndarray]:
198
189
  """Returns a dictionary containing lists of fitting data per sample.
199
190
 
200
191
  Returns:
@@ -324,14 +315,14 @@ class CategoricalNormalizer:
324
315
  column must be present in the reference table and the table to be
325
316
  transformed.
326
317
  """
327
- self._fitted_table = None
328
- self._category_column = category_column
318
+ self._fitted_table: pd.DataFrame = pd.DataFrame()
319
+ self._category_column: str = category_column
329
320
 
330
321
  def is_fitted(self) -> bool:
331
322
  """Returns True if the CategoricalNormalizer has been fitted."""
332
- return self._fitted_table is not None
323
+ return not self._fitted_table.empty
333
324
 
334
- def fit(self, reference_table: pd.DataFrame) -> BaseSampleNormalizer:
325
+ def fit(self, reference_table: pd.DataFrame) -> Self:
335
326
  """Fits the CategoricalNormalizer to a reference table.
336
327
 
337
328
  Args:
@@ -397,7 +388,34 @@ class CategoricalNormalizer:
397
388
  return transformed_table
398
389
 
399
390
 
400
- class ZscoreScaler(BaseSampleNormalizer):
391
+ class PercentageScaler:
392
+ """Transform column values to percentages by dividing them with the column sum."""
393
+
394
+ def fit(self, table: pd.DataFrame) -> Self:
395
+ """Returns the instance itself."""
396
+ return self
397
+
398
+ def is_fitted(self) -> bool:
399
+ """Always returns True because the ZscoreScaler does not need to be fitted."""
400
+ return True
401
+
402
+ def get_fits(self) -> dict:
403
+ """Returns a dictionary containing the parameters 'with_mean' and 'with_std'."""
404
+ return {}
405
+
406
+ def transform(self, table: pd.DataFrame) -> pd.DataFrame:
407
+ """Transforms column values into percentages by devision with the column sum.
408
+
409
+ Args:
410
+ table: The table used to scale row values.
411
+
412
+ Returns:
413
+ A copy of the table containing the scaled values.
414
+ """
415
+ return table.divide(table.sum(axis=0), axis=1)
416
+
417
+
418
+ class ZscoreScaler:
401
419
  """Normalize samples by z-score scaling."""
402
420
 
403
421
  def __init__(self, with_mean: bool = True, with_std: bool = True):
@@ -410,7 +428,7 @@ class ZscoreScaler(BaseSampleNormalizer):
410
428
  self._with_mean = with_mean
411
429
  self._with_std = with_std
412
430
 
413
- def fit(self, table: pd.DataFrame) -> BaseSampleNormalizer:
431
+ def fit(self, table: pd.DataFrame) -> Self:
414
432
  """Returns the instance itself."""
415
433
  return self
416
434
 
@@ -440,7 +458,7 @@ class ZscoreScaler(BaseSampleNormalizer):
440
458
 
441
459
 
442
460
  def confirm_is_fitted(
443
- normalizer: BaseSampleNormalizer, msg: Optional[str] = None
461
+ normalizer: AbstractTransformer, msg: Optional[str] = None
444
462
  ) -> None:
445
463
  """Perform is_fitted validation for normalizer instances.
446
464
 
msreport/peptidoform.py CHANGED
@@ -67,7 +67,9 @@ class Peptide:
67
67
  probabilities = []
68
68
  for site in self.list_modified_peptide_sites(modification):
69
69
  probabilities.append(self.get_peptide_site_probability(site))
70
- return np.prod(probabilities)
70
+ if None in probabilities:
71
+ return None
72
+ return float(np.prod(probabilities))
71
73
 
72
74
  def get_peptide_site_probability(self, position: int) -> Optional[float]:
73
75
  """Return the modification localization probability of the peptide position.
@@ -161,7 +163,7 @@ def parse_modified_sequence(
161
163
  modified_sequence: str,
162
164
  tag_open: str,
163
165
  tag_close: str,
164
- ) -> tuple[str, list]:
166
+ ) -> tuple[str, list[tuple[int, str]]]:
165
167
  """Returns the plain sequence and a list of modification positions and tags.
166
168
 
167
169
  Args:
@@ -253,7 +255,7 @@ def make_localization_string(
253
255
  return localization_string
254
256
 
255
257
 
256
- def read_localization_string(localization_string: str) -> dict:
258
+ def read_localization_string(localization_string: str) -> dict[str, dict[int, float]]:
257
259
  """Converts a site localization probability string into a dictionary.
258
260
 
259
261
  Args:
@@ -269,7 +271,7 @@ def read_localization_string(localization_string: str) -> dict:
269
271
  A dictionary in the form {"modification tag": {position: probability}}, where
270
272
  positions are integers and probabilitiesa are floats ranging from 0 to 1.
271
273
  """
272
- localization = {}
274
+ localization: dict[str, dict[int, float]] = {}
273
275
  if localization_string == "":
274
276
  return localization
275
277
 
@@ -0,0 +1,41 @@
1
+ """This module provides various plotting functions for visualizing data within a Qtable.
2
+
3
+ The functions in this module generate a wide range of plots, including heatmaps, PCA
4
+ plots, volcano plots, and histograms, to analyze and compare expression values,
5
+ missingness, contaminants, and other features in proteomics datasets. The plots are
6
+ designed to work with the Qtable class as input, which provides structured access to
7
+ proteomics data and experimental design information.
8
+
9
+ The style of the plots can be customized using the `set_active_style` function, which
10
+ allows applying style sheets from the msreport library or those available in matplotlib.
11
+ """
12
+
13
+ from .comparison import expression_comparison, pvalue_histogram, volcano_ma
14
+ from .distribution import experiment_ratios, replicate_ratios
15
+ from .multivariate import expression_clustermap, sample_pca
16
+ from .quality import (
17
+ contaminants,
18
+ missing_values_horizontal,
19
+ missing_values_vertical,
20
+ sample_correlation,
21
+ sample_intensities,
22
+ )
23
+ from .style import ColorWheelDict, set_active_style, set_dpi
24
+
25
+ __all__ = [
26
+ "ColorWheelDict",
27
+ "set_dpi",
28
+ "set_active_style",
29
+ "missing_values_vertical",
30
+ "missing_values_horizontal",
31
+ "contaminants",
32
+ "sample_intensities",
33
+ "replicate_ratios",
34
+ "experiment_ratios",
35
+ "sample_pca",
36
+ "volcano_ma",
37
+ "expression_comparison",
38
+ "expression_clustermap",
39
+ "pvalue_histogram",
40
+ "sample_correlation",
41
+ ]
@@ -0,0 +1,159 @@
1
+ from collections.abc import Iterable, Sequence
2
+ from typing import Optional
3
+
4
+ import adjustText
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+
8
+ from .style import with_active_style
9
+
10
+
11
+ @with_active_style
12
+ def annotated_scatter(
13
+ x_values,
14
+ y_values,
15
+ labels,
16
+ ax=None,
17
+ scatter_kws=None,
18
+ text_kws=None,
19
+ ) -> None:
20
+ ax = plt.gca() if ax is None else ax
21
+ if scatter_kws is None:
22
+ scatter_kws = {}
23
+ if text_kws is None:
24
+ text_kws = {}
25
+ text_params = {
26
+ "force_text": 0.15,
27
+ "arrowprops": {
28
+ "arrowstyle": "-",
29
+ "color": scatter_kws["color"],
30
+ "lw": 0.75,
31
+ "alpha": 0.5,
32
+ },
33
+ "lim": 100,
34
+ }
35
+
36
+ texts = []
37
+ for x, y, text in zip(x_values, y_values, labels, strict=True):
38
+ texts.append(ax.text(x, y, text, **text_kws))
39
+
40
+ if texts:
41
+ adjustText.adjust_text(texts, ax=ax, **text_params) # type: ignore
42
+ ax.scatter(x_values, y_values, **scatter_kws)
43
+
44
+
45
+ @with_active_style
46
+ def box_and_bars(
47
+ box_values: Sequence[Iterable[float]],
48
+ bar_values: Sequence[float],
49
+ group_names: Sequence[str],
50
+ colors: Optional[Sequence[str]] = None,
51
+ edge_colors: Optional[Sequence[str]] = None,
52
+ ) -> tuple[plt.Figure, list[plt.Axes]]:
53
+ """Generates a figure with horizontally aligned box and bar subplots.
54
+
55
+ In the top subplot the 'box_values' are displayed as box plots, in lower subplot the
56
+ 'bar_values' are displayed as bar plots. The figure width is automatically adjusted
57
+ to the number of groups that will be plotted. The length of group_names must be the
58
+ same as the length of the of the 'bar_values' and the number of iterables from
59
+ 'box_values'. Each group from 'box_values' and 'bar_values' is horizontally aligned
60
+ between the two subplots.
61
+
62
+ Args:
63
+ box_values: A sequence of sequences that each contain y values for generating a
64
+ box plot.
65
+ bar_values: A sequence of y values for generating bar plots.
66
+ group_names: Used to label groups from box and bar plots.
67
+ colors: Sequence of hex color codes for each group that is used for the boxes of
68
+ the box and bar plots. Must be the same length as group names. If 'colors'
69
+ is None, boxes are colored in light grey.
70
+ edge_colors: Sequence of hex color codes for each group that is used for the
71
+ edges of the boxes and bars. Must be the same length as group names. If
72
+ None, black is used as edge color.
73
+
74
+ Raises:
75
+ ValueError: If the length of box_values, bar_values and group_names is not the
76
+ same or if the length of colors is not the same as group_names.
77
+
78
+ Returns:
79
+ A matplotlib Figure and a list of Axes objects containing the box and bar plots.
80
+ """
81
+ if not (len(box_values) == len(bar_values) == len(group_names)):
82
+ raise ValueError(
83
+ "The length of 'box_values', 'bar_values' and 'group_names' must be the "
84
+ "same."
85
+ )
86
+ if colors is not None and len(colors) != len(group_names):
87
+ raise ValueError(
88
+ "The length of 'colors' must be the same as the length of 'group_names'."
89
+ )
90
+ if edge_colors is not None and len(edge_colors) != len(group_names):
91
+ raise ValueError(
92
+ "The length of 'edge_colors' must be the same as the length of "
93
+ "'group_names'."
94
+ )
95
+
96
+ if colors is None:
97
+ colors = ["#D0D0D0" for _ in group_names]
98
+ if edge_colors is None:
99
+ edge_colors = ["#000000" for _ in group_names]
100
+
101
+ num_samples = len(group_names)
102
+ x_values = range(num_samples)
103
+ bar_width = 0.8
104
+
105
+ suptitle_space_inch = 0.4
106
+ ax_height_inch = 1.6
107
+ ax_hspace_inch = 0.35
108
+ bar_width_inches = 0.24
109
+ x_padding = 0.24
110
+ fig_height = suptitle_space_inch + ax_height_inch * 2 + ax_hspace_inch
111
+
112
+ fig_width = (num_samples + (2 * x_padding)) * bar_width_inches
113
+ fig_size = (fig_width, fig_height)
114
+
115
+ subplot_top = 1 - (suptitle_space_inch / fig_height)
116
+ subplot_hspace = ax_hspace_inch / ax_height_inch
117
+
118
+ bar_half_width = 0.5
119
+ lower_xbound = (0 - bar_half_width) - x_padding
120
+ upper_xbound = (num_samples - 1) + bar_half_width + x_padding
121
+
122
+ fig, axes = plt.subplots(2, figsize=fig_size, sharex=True)
123
+ fig.subplots_adjust(
124
+ bottom=0, top=subplot_top, left=0, right=1, hspace=subplot_hspace
125
+ )
126
+ fig.suptitle("A box and bars plot", y=1)
127
+
128
+ # Plot boxplots using the box_values
129
+ ax = axes[0]
130
+ ax.axhline(0, color="#999999", lw=1, zorder=2)
131
+ boxplots = ax.boxplot(
132
+ box_values,
133
+ positions=x_values,
134
+ vert=True,
135
+ showfliers=False,
136
+ patch_artist=True,
137
+ widths=bar_width,
138
+ medianprops={"color": "#000000"},
139
+ )
140
+ for color, edge_color, box in zip(
141
+ colors, edge_colors, boxplots["boxes"], strict=True
142
+ ):
143
+ box.set(facecolor=color)
144
+ box.set(edgecolor=edge_color)
145
+ ylim = ax.get_ylim()
146
+ ax.set_ylim(min(-0.4, ylim[0]), max(0.401, ylim[1]))
147
+
148
+ # Plot barplots using the bar_values
149
+ ax = axes[1]
150
+ ax.bar(x_values, bar_values, width=bar_width, color=colors, edgecolor=edge_colors)
151
+ ax.set_xticklabels(
152
+ group_names, fontsize=plt.rcParams["axes.labelsize"], rotation=90
153
+ )
154
+ for ax in axes:
155
+ ax.grid(False, axis="x")
156
+ sns.despine(top=True, right=True)
157
+
158
+ ax.set_xlim(lower_xbound, upper_xbound)
159
+ return fig, axes