PyPI - msreport - Versions diffs - 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl - Mend

msreport 0.0.26py3-none-any.whl → 0.0.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

msreport/__init__.py +4 -6
msreport/aggregate/condense.py +1 -1
msreport/aggregate/pivot.py +1 -0
msreport/aggregate/summarize.py +2 -2
msreport/analyze.py +171 -38
msreport/errors.py +1 -2
msreport/export.py +16 -13
msreport/fasta.py +2 -1
msreport/helper/__init__.py +7 -7
msreport/helper/calc.py +29 -24
msreport/helper/maxlfq.py +2 -2
msreport/helper/table.py +5 -6
msreport/impute.py +7 -8
msreport/isobar.py +10 -9
msreport/normalize.py +54 -36
msreport/peptidoform.py +6 -4
msreport/plot/__init__.py +41 -0
msreport/plot/_partial_plots.py +159 -0
msreport/plot/comparison.py +490 -0
msreport/plot/distribution.py +253 -0
msreport/plot/multivariate.py +355 -0
msreport/plot/quality.py +431 -0
msreport/plot/style.py +286 -0
msreport/plot/style_sheets/msreport-notebook.mplstyle +57 -0
msreport/plot/style_sheets/seaborn-whitegrid.mplstyle +45 -0
msreport/qtable.py +109 -17
msreport/reader.py +73 -79
msreport/rinterface/__init__.py +2 -1
msreport/rinterface/limma.py +2 -1
msreport/rinterface/rinstaller.py +3 -3
{msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/METADATA +7 -3
msreport-0.0.28.dist-info/RECORD +38 -0
msreport/plot.py +0 -1132
msreport-0.0.26.dist-info/RECORD +0 -30
{msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/WHEEL +0 -0
{msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/licenses/LICENSE.txt +0 -0
{msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/top_level.txt +0 -0

msreport/helper/table.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import re
-from typing import Iterable, Union
+from typing import Iterable, Sequence, Union
 import numpy as np
 import pandas as pd
@@ -63,7 +63,7 @@ def intensities_in_logspace(data: Union[pd.DataFrame, np.ndarray, Iterable]) ->
     """
     data = np.array(data, dtype=float)
     mask = np.isfinite(data)
-    return np.all(data[mask].flatten() <= 64)
+    return bool(np.all(data[mask].flatten() <= 64))
 def rename_sample_columns(table: pd.DataFrame, mapping: dict[str, str]) -> pd.DataFrame:
@@ -102,7 +102,7 @@ def rename_sample_columns(table: pd.DataFrame, mapping: dict[str, str]) -> pd.Da
 def rename_mq_reporter_channels(
-    table: pd.DataFrame, channel_names: Iterable[str]
+    table: pd.DataFrame, channel_names: Sequence[str]
 ) -> None:
     """Renames reporter channel numbers with sample names.
@@ -157,8 +157,7 @@ def find_columns(
     Returns:
         A list of column names.
     """
-    matches = [substring in col for col in table.columns]
-    matched_columns = np.array(table.columns)[matches].tolist()
+    matched_columns = [col for col in table.columns if substring in col]
     if must_be_substring:
         matched_columns = [col for col in matched_columns if col != substring]
     return matched_columns
@@ -255,7 +254,7 @@ def remove_rows_by_partial_match(
 def join_tables(
-    tables: Iterable[pd.DataFrame], reset_index: bool = False
+    tables: Sequence[pd.DataFrame], reset_index: bool = False
 ) -> pd.DataFrame:
     """Returns a joined dataframe.

msreport/impute.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
-from typing import Optional
+from typing import Any, Optional
 import numpy as np
 import pandas as pd
@@ -18,7 +19,7 @@ class FixedValueImputer:
     def __init__(
         self,
         strategy: str,
-        fill_value: Optional[float] = None,
+        fill_value: float = 0.0,
         column_wise: bool = True,
     ):
         """Initializes the FixedValueImputer.
@@ -51,17 +52,15 @@ class FixedValueImputer:
             Returns the fitted FixedValueImputer instance.
         """
         if self.strategy == "constant":
-            # if not isinstance(self.fill_value, (float, int)):
-            #     raise Excpetion()
-            fill_values = {column: self.fill_value for column in table.columns}
+            fill_values = dict.fromkeys(table.columns, self.fill_value)
         elif self.strategy == "below":
             if self.column_wise:
                 fill_values = {}
-                for column in table:
+                for column in table.columns:
                     fill_values[column] = _calculate_integer_below_min(table[column])
             else:
                 int_below_min = _calculate_integer_below_min(table)
-                fill_values = {column: int_below_min for column in table.columns}
+                fill_values = dict.fromkeys(table.columns, int_below_min)
         self._sample_fill_values = fill_values
         return self
@@ -240,7 +239,7 @@ class PerseusImputer:
         return _table
-def confirm_is_fitted(imputer: any, msg: Optional[str] = None) -> None:
+def confirm_is_fitted(imputer: Any, msg: Optional[str] = None) -> None:
     """Perform is_fitted validation for imputer instances.
     Checks if the imputer is fitted by verifying the presence of fitted attributes

msreport/isobar.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from __future__ import annotations
 import functools
 from typing import Protocol
@@ -27,7 +28,7 @@ class IsotopeImpurityCorrecter:
     def __init__(self):
         self._impurity_matrix = None
-    def fit(self, impurity_matrix: np.array) -> IsotopeImpurityCorrecter:
+    def fit(self, impurity_matrix: np.ndarray) -> IsotopeImpurityCorrecter:
         """Fits the isotope impurity correcter to a given impurity matrix.
         Args:
@@ -51,7 +52,7 @@ class IsotopeImpurityCorrecter:
         """Returns True if the IsotopeImpurityCorrecter has been fitted."""
         return self._impurity_matrix is not None
-    def get_fits(self) -> np.array:
+    def get_fits(self) -> np.ndarray:
         """Returns a copy of the fitted impurity matrix.
         returns:
@@ -89,9 +90,9 @@ class IsotopeImpurityCorrecter:
 def correct_isobaric_reporter_impurities(
-    intensity_table: np.array,
-    diagonal_impurity_matrix: np.array,
-) -> np.array:
+    intensity_table: np.ndarray,
+    diagonal_impurity_matrix: np.ndarray,
+) -> np.ndarray:
     """Performs isotope impurity correction on isobaric reporter expression values.
     Args:
@@ -122,8 +123,8 @@ def correct_isobaric_reporter_impurities(
 def _apply_impurity_contamination(
-    intensities: np.array, impurity_matrix: np.array
-) -> np.array:
+    intensities: np.ndarray, impurity_matrix: np.ndarray
+) -> np.ndarray:
     """Applies reporter isotope impurity interference to an intensity array.
     Args:
@@ -141,8 +142,8 @@ def _apply_impurity_contamination(
 def _correct_impurity_contamination(
-    intensities: np.array, impurity_matrix: np.array
-) -> np.array:
+    intensities: np.ndarray, impurity_matrix: np.ndarray
+) -> np.ndarray:
     """Applies reporter isotope impurity interference correction to an intensity array.
     Args:

msreport/normalize.py CHANGED Viewed

@@ -1,38 +1,29 @@
 from __future__ import annotations
-import abc
-import itertools
-from typing import Callable, Iterable, Optional
+from typing import Callable, Iterable, Optional, Protocol
 import numpy as np
 import pandas as pd
 import statsmodels.nonparametric.smoothers_lowess
+from typing_extensions import Self
 import msreport.helper
 import msreport.helper.maxlfq as MAXLFQ
 from msreport.errors import NotFittedError
-class BaseSampleNormalizer(abc.ABC):
-    """Base class for all sample normalizers."""
-    @abc.abstractmethod
-    def fit(self, table: pd.DataFrame) -> BaseSampleNormalizer:
-        ...
+class AbstractTransformer(Protocol):
+    def fit(self, table: pd.DataFrame) -> Self:
+        """Fits the Transformer and returns a fitted Transformer instance."""
-    @abc.abstractmethod
     def is_fitted(self) -> bool:
-        ...
-    @abc.abstractmethod
-    def get_fits(self) -> dict[...]:
-        ...
+        """Returns True if the Transformer has been fitted."""
-    @abc.abstractmethod
     def transform(self, table: pd.DataFrame) -> pd.DataFrame:
-        ...
+        """Transform values in table."""
-class FixedValueNormalizer(BaseSampleNormalizer):
+class FixedValueNormalizer:
     """Normalization by a constant normalization factor for each sample.
     Expects log transformed intensity values.
@@ -58,11 +49,11 @@ class FixedValueNormalizer(BaseSampleNormalizer):
                 f'"comparison" = {comparison} not allowed. '
                 'Must be either "paired" or "reference".'
             )
-        self._comparison_mode = comparison
-        self._fit_function = center_function
-        self._sample_fits = None
+        self._comparison_mode: str = comparison
+        self._fit_function: Callable = center_function
+        self._sample_fits: dict[str, float] = {}
-    def fit(self, table: pd.DataFrame) -> BaseSampleNormalizer:
+    def fit(self, table: pd.DataFrame) -> Self:
         """Fits the FixedValueNormalizer.
         Args:
@@ -80,7 +71,7 @@ class FixedValueNormalizer(BaseSampleNormalizer):
     def is_fitted(self) -> bool:
         """Returns True if the FixedValueNormalizer has been fitted."""
-        return self._sample_fits is not None
+        return True if self._sample_fits else False
     def get_fits(self) -> dict[str, float]:
         """Returns a dictionary containing the fitted center values per sample.
@@ -159,13 +150,13 @@ class FixedValueNormalizer(BaseSampleNormalizer):
             self._sample_fits[sample] = sample_fit
-class ValueDependentNormalizer(BaseSampleNormalizer):
+class ValueDependentNormalizer:
     """Normalization with a value dependent fit for each sample.
     Expects log transformed intensity values.
     """
-    def __init__(self, fit_function: Callable):
+    def __init__(self, fit_function: Callable[[Iterable, Iterable], np.ndarray]):
         """Initializes the ValueDependentNormalizer.
         Args:
@@ -175,10 +166,10 @@ class ValueDependentNormalizer(BaseSampleNormalizer):
                 with two columns. The first column contains the values and the second
                 column the fitted deviations.
         """
-        self._sample_fits = None
+        self._sample_fits: dict[str, np.ndarray] = {}
         self._fit_function = fit_function
-    def fit(self, table: pd.DataFrame) -> BaseSampleNormalizer:
+    def fit(self, table: pd.DataFrame) -> Self:
         """Fits the ValueDependentNormalizer.
         Args:
@@ -192,9 +183,9 @@ class ValueDependentNormalizer(BaseSampleNormalizer):
     def is_fitted(self) -> bool:
         """Returns True if the ValueDependentNormalizer has been fitted."""
-        return self._sample_fits is not None
+        return True if self._sample_fits else False
-    def get_fits(self) -> dict[str, Iterable[float, float]]:
+    def get_fits(self) -> dict[str, np.ndarray]:
         """Returns a dictionary containing lists of fitting data per sample.
         Returns:
@@ -324,14 +315,14 @@ class CategoricalNormalizer:
                 column must be present in the reference table and the table to be
                 transformed.
         """
-        self._fitted_table = None
-        self._category_column = category_column
+        self._fitted_table: pd.DataFrame = pd.DataFrame()
+        self._category_column: str = category_column
     def is_fitted(self) -> bool:
         """Returns True if the CategoricalNormalizer has been fitted."""
-        return self._fitted_table is not None
+        return not self._fitted_table.empty
-    def fit(self, reference_table: pd.DataFrame) -> BaseSampleNormalizer:
+    def fit(self, reference_table: pd.DataFrame) -> Self:
         """Fits the CategoricalNormalizer to a reference table.
         Args:
@@ -397,7 +388,34 @@ class CategoricalNormalizer:
         return transformed_table
-class ZscoreScaler(BaseSampleNormalizer):
+class PercentageScaler:
+    """Transform column values to percentages by dividing them with the column sum."""
+    def fit(self, table: pd.DataFrame) -> Self:
+        """Returns the instance itself."""
+        return self
+    def is_fitted(self) -> bool:
+        """Always returns True because the ZscoreScaler does not need to be fitted."""
+        return True
+    def get_fits(self) -> dict:
+        """Returns a dictionary containing the parameters 'with_mean' and 'with_std'."""
+        return {}
+    def transform(self, table: pd.DataFrame) -> pd.DataFrame:
+        """Transforms column values into percentages by devision with the column sum.
+        Args:
+            table: The table used to scale row values.
+        Returns:
+            A copy of the table containing the scaled values.
+        """
+        return table.divide(table.sum(axis=0), axis=1)
+class ZscoreScaler:
     """Normalize samples by z-score scaling."""
     def __init__(self, with_mean: bool = True, with_std: bool = True):
@@ -410,7 +428,7 @@ class ZscoreScaler(BaseSampleNormalizer):
         self._with_mean = with_mean
         self._with_std = with_std
-    def fit(self, table: pd.DataFrame) -> BaseSampleNormalizer:
+    def fit(self, table: pd.DataFrame) -> Self:
         """Returns the instance itself."""
         return self
@@ -440,7 +458,7 @@ class ZscoreScaler(BaseSampleNormalizer):
 def confirm_is_fitted(
-    normalizer: BaseSampleNormalizer, msg: Optional[str] = None
+    normalizer: AbstractTransformer, msg: Optional[str] = None
 ) -> None:
     """Perform is_fitted validation for normalizer instances.

msreport/peptidoform.py CHANGED Viewed

@@ -67,7 +67,9 @@ class Peptide:
         probabilities = []
         for site in self.list_modified_peptide_sites(modification):
             probabilities.append(self.get_peptide_site_probability(site))
-        return np.prod(probabilities)
+        if None in probabilities:
+            return None
+        return float(np.prod(probabilities))
     def get_peptide_site_probability(self, position: int) -> Optional[float]:
         """Return the modification localization probability of the peptide position.
@@ -161,7 +163,7 @@ def parse_modified_sequence(
     modified_sequence: str,
     tag_open: str,
     tag_close: str,
-) -> tuple[str, list]:
+) -> tuple[str, list[tuple[int, str]]]:
     """Returns the plain sequence and a list of modification positions and tags.
     Args:
@@ -253,7 +255,7 @@ def make_localization_string(
     return localization_string
-def read_localization_string(localization_string: str) -> dict:
+def read_localization_string(localization_string: str) -> dict[str, dict[int, float]]:
     """Converts a site localization probability string into a dictionary.
     Args:
@@ -269,7 +271,7 @@ def read_localization_string(localization_string: str) -> dict:
         A dictionary in the form {"modification tag": {position: probability}}, where
         positions are integers and probabilitiesa are floats ranging from 0 to 1.
     """
-    localization = {}
+    localization: dict[str, dict[int, float]] = {}
     if localization_string == "":
         return localization

msreport/plot/__init__.py ADDED Viewed

@@ -0,0 +1,41 @@
+"""This module provides various plotting functions for visualizing data within a Qtable.
+The functions in this module generate a wide range of plots, including heatmaps, PCA
+plots, volcano plots, and histograms, to analyze and compare expression values,
+missingness, contaminants, and other features in proteomics datasets. The plots are
+designed to work with the Qtable class as input, which provides structured access to
+proteomics data and experimental design information.
+The style of the plots can be customized using the `set_active_style` function, which
+allows applying style sheets from the msreport library or those available in matplotlib.
+"""
+from .comparison import expression_comparison, pvalue_histogram, volcano_ma
+from .distribution import experiment_ratios, replicate_ratios
+from .multivariate import expression_clustermap, sample_pca
+from .quality import (
+    contaminants,
+    missing_values_horizontal,
+    missing_values_vertical,
+    sample_correlation,
+    sample_intensities,
+)
+from .style import ColorWheelDict, set_active_style, set_dpi
+__all__ = [
+    "ColorWheelDict",
+    "set_dpi",
+    "set_active_style",
+    "missing_values_vertical",
+    "missing_values_horizontal",
+    "contaminants",
+    "sample_intensities",
+    "replicate_ratios",
+    "experiment_ratios",
+    "sample_pca",
+    "volcano_ma",
+    "expression_comparison",
+    "expression_clustermap",
+    "pvalue_histogram",
+    "sample_correlation",
+]

msreport/plot/_partial_plots.py ADDED Viewed

@@ -0,0 +1,159 @@
+from collections.abc import Iterable, Sequence
+from typing import Optional
+import adjustText
+import matplotlib.pyplot as plt
+import seaborn as sns
+from .style import with_active_style
+@with_active_style
+def annotated_scatter(
+    x_values,
+    y_values,
+    labels,
+    ax=None,
+    scatter_kws=None,
+    text_kws=None,
+) -> None:
+    ax = plt.gca() if ax is None else ax
+    if scatter_kws is None:
+        scatter_kws = {}
+    if text_kws is None:
+        text_kws = {}
+    text_params = {
+        "force_text": 0.15,
+        "arrowprops": {
+            "arrowstyle": "-",
+            "color": scatter_kws["color"],
+            "lw": 0.75,
+            "alpha": 0.5,
+        },
+        "lim": 100,
+    }
+    texts = []
+    for x, y, text in zip(x_values, y_values, labels, strict=True):
+        texts.append(ax.text(x, y, text, **text_kws))
+    if texts:
+        adjustText.adjust_text(texts, ax=ax, **text_params)  # type: ignore
+        ax.scatter(x_values, y_values, **scatter_kws)
+@with_active_style
+def box_and_bars(
+    box_values: Sequence[Iterable[float]],
+    bar_values: Sequence[float],
+    group_names: Sequence[str],
+    colors: Optional[Sequence[str]] = None,
+    edge_colors: Optional[Sequence[str]] = None,
+) -> tuple[plt.Figure, list[plt.Axes]]:
+    """Generates a figure with horizontally aligned box and bar subplots.
+    In the top subplot the 'box_values' are displayed as box plots, in lower subplot the
+    'bar_values' are displayed as bar plots. The figure width is automatically adjusted
+    to the number of groups that will be plotted. The length of group_names must be the
+    same as the length of the of the 'bar_values' and the number of iterables from
+    'box_values'. Each group from 'box_values' and 'bar_values' is horizontally aligned
+    between the two subplots.
+    Args:
+        box_values: A sequence of sequences that each contain y values for generating a
+            box plot.
+        bar_values: A sequence of y values for generating bar plots.
+        group_names: Used to label groups from box and bar plots.
+        colors: Sequence of hex color codes for each group that is used for the boxes of
+            the box and bar plots. Must be the same length as group names. If 'colors'
+            is None, boxes are colored in light grey.
+        edge_colors: Sequence of hex color codes for each group that is used for the
+            edges of the boxes and bars. Must be the same length as group names. If
+            None, black is used as edge color.
+    Raises:
+        ValueError: If the length of box_values, bar_values and group_names is not the
+            same or if the length of colors is not the same as group_names.
+    Returns:
+        A matplotlib Figure and a list of Axes objects containing the box and bar plots.
+    """
+    if not (len(box_values) == len(bar_values) == len(group_names)):
+        raise ValueError(
+            "The length of 'box_values', 'bar_values' and 'group_names' must be the "
+            "same."
+        )
+    if colors is not None and len(colors) != len(group_names):
+        raise ValueError(
+            "The length of 'colors' must be the same as the length of 'group_names'."
+        )
+    if edge_colors is not None and len(edge_colors) != len(group_names):
+        raise ValueError(
+            "The length of 'edge_colors' must be the same as the length of "
+            "'group_names'."
+        )
+    if colors is None:
+        colors = ["#D0D0D0" for _ in group_names]
+    if edge_colors is None:
+        edge_colors = ["#000000" for _ in group_names]
+    num_samples = len(group_names)
+    x_values = range(num_samples)
+    bar_width = 0.8
+    suptitle_space_inch = 0.4
+    ax_height_inch = 1.6
+    ax_hspace_inch = 0.35
+    bar_width_inches = 0.24
+    x_padding = 0.24
+    fig_height = suptitle_space_inch + ax_height_inch * 2 + ax_hspace_inch
+    fig_width = (num_samples + (2 * x_padding)) * bar_width_inches
+    fig_size = (fig_width, fig_height)
+    subplot_top = 1 - (suptitle_space_inch / fig_height)
+    subplot_hspace = ax_hspace_inch / ax_height_inch
+    bar_half_width = 0.5
+    lower_xbound = (0 - bar_half_width) - x_padding
+    upper_xbound = (num_samples - 1) + bar_half_width + x_padding
+    fig, axes = plt.subplots(2, figsize=fig_size, sharex=True)
+    fig.subplots_adjust(
+        bottom=0, top=subplot_top, left=0, right=1, hspace=subplot_hspace
+    )
+    fig.suptitle("A box and bars plot", y=1)
+    # Plot boxplots using the box_values
+    ax = axes[0]
+    ax.axhline(0, color="#999999", lw=1, zorder=2)
+    boxplots = ax.boxplot(
+        box_values,
+        positions=x_values,
+        vert=True,
+        showfliers=False,
+        patch_artist=True,
+        widths=bar_width,
+        medianprops={"color": "#000000"},
+    )
+    for color, edge_color, box in zip(
+        colors, edge_colors, boxplots["boxes"], strict=True
+    ):
+        box.set(facecolor=color)
+        box.set(edgecolor=edge_color)
+    ylim = ax.get_ylim()
+    ax.set_ylim(min(-0.4, ylim[0]), max(0.401, ylim[1]))
+    # Plot barplots using the bar_values
+    ax = axes[1]
+    ax.bar(x_values, bar_values, width=bar_width, color=colors, edgecolor=edge_colors)
+    ax.set_xticklabels(
+        group_names, fontsize=plt.rcParams["axes.labelsize"], rotation=90
+    )
+    for ax in axes:
+        ax.grid(False, axis="x")
+    sns.despine(top=True, right=True)
+    ax.set_xlim(lower_xbound, upper_xbound)
+    return fig, axes

msreport 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl

msreport 0.0.26py3-none-any.whl → 0.0.28py3-none-any.whl