PyPI - msreport - Versions diffs - 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl - Mend

msreport 0.0.26py3-none-any.whl → 0.0.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

msreport/__init__.py +4 -6
msreport/aggregate/condense.py +1 -1
msreport/aggregate/pivot.py +1 -0
msreport/aggregate/summarize.py +2 -2
msreport/analyze.py +171 -38
msreport/errors.py +1 -2
msreport/export.py +16 -13
msreport/fasta.py +2 -1
msreport/helper/__init__.py +7 -7
msreport/helper/calc.py +29 -24
msreport/helper/maxlfq.py +2 -2
msreport/helper/table.py +5 -6
msreport/impute.py +7 -8
msreport/isobar.py +10 -9
msreport/normalize.py +54 -36
msreport/peptidoform.py +6 -4
msreport/plot/__init__.py +41 -0
msreport/plot/_partial_plots.py +159 -0
msreport/plot/comparison.py +490 -0
msreport/plot/distribution.py +253 -0
msreport/plot/multivariate.py +355 -0
msreport/plot/quality.py +431 -0
msreport/plot/style.py +286 -0
msreport/plot/style_sheets/msreport-notebook.mplstyle +57 -0
msreport/plot/style_sheets/seaborn-whitegrid.mplstyle +45 -0
msreport/qtable.py +109 -17
msreport/reader.py +73 -79
msreport/rinterface/__init__.py +2 -1
msreport/rinterface/limma.py +2 -1
msreport/rinterface/rinstaller.py +3 -3
{msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/METADATA +7 -3
msreport-0.0.28.dist-info/RECORD +38 -0
msreport/plot.py +0 -1132
msreport-0.0.26.dist-info/RECORD +0 -30
{msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/WHEEL +0 -0
{msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/licenses/LICENSE.txt +0 -0
{msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/top_level.txt +0 -0

msreport/plot/style.py ADDED Viewed

@@ -0,0 +1,286 @@
+"""Manage and apply custom plotting styles for the `msreport.plot` module.
+Active styles in msreport are predefined or customizable Matplotlib style sheets that
+are automatically applied to all plots generated within the library. By changing the
+active style, users can define the rcParams used for styling the plots, such as color
+and font settings.
+The `set_active_style` function allows users to select style sheets from the msreport
+library or any style sheets available in Matplotlib. Additionally, it supports passing
+a dictionary of rcParams to further customize the active style. The additional
+parameters are applied after the style sheet, potentially overriding settings from the
+style sheet.
+Available msreport style sheets:
+- "msreport-notebook"
+- "seaborn-whitegrid
+"""
+import colorsys
+import functools
+import pathlib
+import re
+from collections import UserDict
+from contextlib import contextmanager
+from typing import Any, Optional
+import matplotlib.colors as mcolors
+import matplotlib.pyplot as plt
+import matplotlib.style
+__all__ = ["ColorWheelDict", "set_active_style", "set_dpi"]
+class ColorWheelDict(UserDict):
+    """Lookup dictionary that maps keys to hex colors by using a color wheel.
+    When a key is not present the first color of the color wheel is added as the value,
+    and the color is moved from the beginning to the end of the color wheel. If no list
+    of colors is specified, a default list of ten colors is added to the color wheel.
+    It is also possible to manually set key and color pairs by using the same syntax as
+    for a regular dictionary.
+    """
+    def __init__(self, colors: Optional[list[str]] = None):
+        """Initializes a ColorWheelDict.
+        Args:
+            colors: Optional, a list of hex colors used for the color wheel. By default
+                a list with ten colors is used.
+        """
+        self.data = {}
+        if colors is not None:
+            self.colors = colors
+        else:
+            self.colors = [
+                "#80b1d3",
+                "#fdb462",
+                "#8dd3c7",
+                "#bebada",
+                "#fb8072",
+                "#b3de69",
+                "#fccde5",
+                "#d9d9d9",
+                "#bc80bd",
+                "#ccebc5",
+            ]
+        self._color_wheel = self.colors.copy()
+    def modified_color(self, key: str, factor: float) -> str:
+        """Returns a color for the specified key with modified lightness.
+        Args:
+            key: The key for which to get the color.
+            factor: The factor by which to modify the lightness. Values > 1 lighten,
+                < 1 darken.
+        Returns:
+            A hex color string with modified lightness.
+        """
+        return _modify_lightness_hex(self[key], factor)
+    def _next_color(self) -> str:
+        color = self._color_wheel.pop(0)
+        self._color_wheel.append(color)
+        return color
+    def __setitem__(self, key, value):
+        is_hexcolor = re.search(r"^#(?:[0-9a-fA-F]{3}){1,2}$", value)
+        if is_hexcolor:
+            self.data[key] = value
+        else:
+            raise ValueError(f"the specified value {value} is not a hexcolor.")
+    def __getitem__(self, key):
+        if key not in self.data:
+            self.data[key] = self._next_color()
+        return self.data[key]
+def set_dpi(dpi: int) -> None:
+    """Changes the default dots per inch settings for matplotlib plots.
+    This effectively makes figures smaller or larger, without affecting the relative
+    sizes of elements within the figures.
+    Args:
+        dpi: New default dots per inch.
+    """
+    plt.rcParams["figure.dpi"] = dpi
+@contextmanager
+def use_active_style():
+    """Context manager to temporarily apply the active style for plotting.
+    The rc parameters 'backend' and 'interactive' will not be reset by the context
+    manager. This is required for compatibility with jupyter notebooks automatically
+    setting up the backend and interactive mode for inline plotting.
+    """
+    active_style_context_arg = _get_active_style_context_arg()
+    orig = dict(matplotlib.rcParams.copy())
+    del orig["backend"]
+    del orig["interactive"]
+    try:
+        matplotlib.style.use(active_style_context_arg)
+        yield
+    finally:
+        # Use `.update` instead of `._update_raw` for matplotlib backward compatibility
+        matplotlib.rcParams.update(orig)
+def with_active_style(func):
+    """Decorator to apply the active style context to a function."""
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        with use_active_style():
+            return func(*args, **kwargs)
+    return wrapper
+def set_active_style(style: str | None, rc: dict[str, Any] | None = None):
+    """Set the active plotting style for the msreport.plot submodule.
+    The chosen style, potentially modified by the rc dictionary, will be
+    applied temporarily using a context manager within the library's
+    plotting functions. This does not modify the global matplotlib rcParams
+    permanently.
+    Args:
+        style: The name of the base style to activate. This can be one of the
+            built-in msreport styles (e.g., 'notebook', 'powerpoint'),
+            a standard matplotlib style, or a style registered by another
+            library like Seaborn (if available).
+        rc: An optional dictionary mapping matplotlib rcParams names (strings)
+            to their desired values. These settings will be applied *after*
+            the base style, overriding any conflicting parameters from the
+            base style for the duration of the plot context.
+    Raises:
+        ValueError: If the specified base style name is not found among the
+            library's styles or the available matplotlib styles.
+        TypeError: If rc is not a dictionary or None.
+    """
+    global _active_style_name, _active_style_rc_override
+    if style is not None and style not in _AVAILABLE_STYLES:
+        current_available = _get_available_styles()
+        if style not in current_available:
+            raise ValueError(
+                f"Style '{style}' not found. Available styles are: "
+                f"{', '.join(current_available)}"
+            )
+    if rc is not None and not isinstance(rc, dict):
+        raise TypeError(f"rc argument must be a dictionary or None, got {type(rc)}")
+    _active_style_name = style
+    _active_style_rc_override = rc.copy() if rc is not None else None
+def get_active_style() -> str | None:
+    """Return the name of the currently active 'msreport.plot' plotting style."""
+    return _active_style_name
+def get_active_override() -> dict[str, Any] | None:
+    """Return the currently active rcParam overrides for the 'msreport.plot' style."""
+    return _active_style_rc_override
+def _get_active_style_context_arg() -> list[str | dict[str, Any]]:
+    """Get the argument needed for matplotlib.style.context for the active style.
+    This combines the base style name/path with any active rcParam overrides.
+    Matplotlib's style context manager can accept a list where later elements
+    override earlier ones.
+    Returns:
+        A list containing the style name or path and any active rcParam overrides.
+    """
+    context_args: list[str | dict[str, Any]] = []
+    active_style_name = get_active_style()
+    if active_style_name is None:
+        ...
+    elif active_style_name in _LIBRARY_STYLE_PATHS:
+        context_args.append(_LIBRARY_STYLE_PATHS[active_style_name])
+    else:
+        context_args.append(active_style_name)
+    active_override = get_active_override()
+    if active_override is not None:
+        context_args.append(active_override)
+    return context_args
+def _modify_lightness_rgb(
+    rgb_color: tuple[float, float, float], lightness_scale_factor: float
+) -> tuple[float, float, float]:
+    """Modifies the lightness of a color while preserving hue and saturation.
+    Parameters:
+        rgb_color: A tuple of RGB values in the range [0, 1]
+        lightness_scale_factor: Factor to scale the lightness by (values > 1 lighten, < 1 darken)
+    Returns:
+        A tuple of RGB values with adjusted lightness
+    """
+    hue, lightness, saturation = colorsys.rgb_to_hls(*rgb_color)
+    new_lightness = min(1.0, lightness * lightness_scale_factor)
+    return colorsys.hls_to_rgb(hue, new_lightness, saturation)
+def _modify_lightness_hex(hex_color: str, lightness_scale_factor: float) -> str:
+    """Modifies the lightness of a hex color while preserving hue and saturation.
+    Parameters:
+        hex_color: A hex color string (e.g., "#80b1d3").
+        lightness_scale_factor: Factor to scale the lightness by (values > 1 lighten, < 1 darken).
+    Returns:
+        A hex color string with adjusted lightness.
+    """
+    rgb_color = mcolors.to_rgb(hex_color)
+    new_ligthness_rgb = _modify_lightness_rgb(rgb_color, lightness_scale_factor)
+    return mcolors.to_hex(new_ligthness_rgb)
+def _get_library_styles() -> dict[str, str]:
+    """Scan the style directory and returns a dict of available library styles.
+    Returns:
+        A dictionary mapping style names (without extension) to their full paths.
+        Returns an empty dictionary if the style directory doesn't exist or is empty.
+    """
+    styles = {}
+    try:
+        for filepath in pathlib.Path(_STYLE_DIR).iterdir():
+            if filepath.suffix == ".mplstyle":
+                styles[filepath.stem] = filepath.resolve().as_posix()
+    except OSError as err:
+        raise OSError(
+            f"Could not read 'msreport.plot' style directory {_STYLE_DIR}: {err}. "
+            "Please check if the directory exists and is accessible."
+        ) from err
+    return styles
+def _get_available_styles() -> list[str]:
+    """Get a list of all available style names from library and matplotlib."""
+    lib_styles = _get_library_styles().keys()
+    mpl_styles = matplotlib.style.available
+    return list(set(lib_styles) | set(mpl_styles))
+_STYLE_DIR: str = (pathlib.Path(__file__).parent / "style_sheets").resolve().as_posix()
+_AVAILABLE_STYLES: list[str] = _get_available_styles()
+_LIBRARY_STYLE_PATHS: dict[str, str] = _get_library_styles()
+_DEFAULT_STYLE: str = "msreport-notebook"
+_active_style_name: str | None = _DEFAULT_STYLE
+_active_style_rc_override: dict[str, Any] | None = None

msreport/plot/style_sheets/msreport-notebook.mplstyle ADDED Viewed

@@ -0,0 +1,57 @@
+## LINES
+lines.solid_capstyle: round
+## PATCHES
+patch.edgecolor:       white
+patch.force_edgecolor: True
+## FONT
+font.family:     sans-serif
+font.sans-serif: Arial, DejaVu Sans, Liberation Sans, Bitstream Vera Sans, sans-serif
+font.size:       10
+## TEXT
+text.color: black
+## FIGURE
+figure.facecolor: white
+figure.titlesize: 12
+## AXES
+axes.facecolor:     white
+axes.edgecolor:     black  # Color of the axes border / spines
+axes.linewidth:     1      # Width of the axes border / spines
+axes.labelcolor:    black
+axes.labelsize:     10
+axes.axisbelow:     True
+axes.grid:          True
+axes.titlesize:     10
+axes.spines.left:   True
+axes.spines.bottom: True
+axes.spines.right:  True
+axes.spines.top:    True
+## TICKS
+xtick.top:         False
+xtick.bottom:      False
+xtick.color:       black
+xtick.direction:   out
+xtick.major.width: 1
+xtick.labelsize:   8
+ytick.left:        False
+ytick.right:       False
+ytick.color:       black
+ytick.direction:   out
+ytick.major.width: 1
+ytick.labelsize:   8
+## GRIDS
+grid.alpha:     1.0
+grid.color:     0.8
+grid.linestyle: dashed
+grid.linewidth: 1
+## LEGEND
+legend.fontsize:       10
+legend.title_fontsize: None  # Set to None to use the same as axes.titlesize

msreport/plot/style_sheets/seaborn-whitegrid.mplstyle ADDED Viewed

@@ -0,0 +1,45 @@
+## LINES
+lines.solid_capstyle: round
+## PATCHES
+patch.edgecolor:       white
+patch.force_edgecolor: True
+## FONT
+font.family:     sans-serif
+font.sans-serif: Arial, DejaVu Sans, Liberation Sans, Bitstream Vera Sans, sans-serif
+## TEXT
+text.color: 0.15
+## FIGURE
+figure.facecolor: white
+## AXES
+axes.facecolor:     white
+axes.edgecolor:     0.15
+axes.labelcolor:    0.15
+axes.axisbelow:     True
+axes.grid:          True
+axes.spines.left:   True
+axes.spines.bottom: True
+axes.spines.right:  True
+axes.spines.top:    True
+## TICKS
+xtick.top:       False
+xtick.bottom:    False
+xtick.color:     0.15
+xtick.direction: out
+ytick.left:      False
+ytick.right:     False
+ytick.color:     0.15
+ytick.direction: out
+## GRID
+grid.color:     0.8
+grid.linestyle: -
+## IMAGE
+image.cmap: rocket

msreport/qtable.py CHANGED Viewed

@@ -1,7 +1,10 @@
 from __future__ import annotations
-from typing import Any, Iterable, Optional
+import copy
 import os
 import warnings
+from contextlib import contextmanager
+from typing import Any, Iterable, Optional
 import numpy as np
 import pandas as pd
@@ -24,7 +27,14 @@ class Qtable:
         design: A pandas.DataFrame describing the experimental design.
     """
-    def __init__(self, data: pd.DataFrame, design: Optional[pd.DataFrame] = None):
+    _default_id_column = "Representative protein"
+    def __init__(
+        self,
+        data: pd.DataFrame,
+        design: Optional[pd.DataFrame] = None,
+        id_column: str = "Representative protein",
+    ):
         """Initializes the Qtable.
         If data does not contain a "Valid" column, this column is added and all its row
@@ -36,11 +46,34 @@ class Qtable:
                 contain the columns "Sample" and "Experiment". The "Sample" entries
                 should correspond to the Sample names present in the quantitative
                 columns of the data.
+            id_column: The name of the column that contains the unique identifiers for
+                the entries in the data table. Default is "Representative protein".
+        Raises:
+            KeyError: If the specified id_column is not found in data.
+            ValueError: If the specified id_column does not contain unique identifiers.
         """
         self.design: pd.DataFrame
         self.data: pd.DataFrame
+        self._id_column: str
+        if not data.index.is_unique:
+            raise ValueError(
+                "The index of the 'data' table must contain unique values."
+            )
+        if id_column not in data.columns:
+            raise KeyError(
+                f"Column '{id_column}' not found in 'data'. Please specify a valid "
+                "column that contains unique identifiers for the entries in 'data'."
+            )
+        if not data[id_column].is_unique:
+            raise ValueError(
+                f"Column '{id_column}' in 'data' table must contain unique identifiers"
+                ", i.e. no duplicated values. Please provide a valid 'id_column'."
+            )
         self.data = data.copy()
+        self._id_column = id_column
         if "Valid" not in self.data.columns:
             self.data["Valid"] = True
         if design is not None:
@@ -73,12 +106,12 @@ class Qtable:
         """
         columns = design.columns.tolist()
         required_columns = ["Experiment", "Sample", "Replicate"]
-        if not all([c in columns for c in required_columns]):
+        if not all(c in columns for c in required_columns):
             exception_message = "".join(
                 [
                     "The design table must at least contain the columns: ",
                     ", ".join(f'"{c}"' for c in required_columns),
-                    ". " "It only contains the columns: ",
+                    ". It only contains the columns: ",
                     ", ".join(f'"{c}"' for c in columns),
                     ".",
                 ]
@@ -105,6 +138,11 @@ class Qtable:
         """Returns a copy of the design table."""
         return self.design.copy()
+    @property
+    def id_column(self) -> str:
+        """Returns the name of the id column."""
+        return self._id_column
     def get_samples(self, experiment: Optional[str] = None) -> list[str]:
         """Returns a list of samples present in the design table.
@@ -315,6 +353,50 @@ class Qtable:
             expression_features.columns.difference(self._expression_features)
         )
+    @contextmanager
+    def temp_design(
+        self,
+        design: Optional[pd.DataFrame] = None,
+        exclude_experiments: Optional[Iterable[str]] = None,
+        keep_experiments: Optional[Iterable[str]] = None,
+        exclude_samples: Optional[Iterable[str]] = None,
+        keep_samples: Optional[Iterable[str]] = None,
+    ):
+        """Context manager to temporarily modify the design table.
+        Args:
+            design: A DataFrame to temporarily replace the current design table.
+            exclude_experiments: A list of experiments to exclude from the design.
+            keep_experiments: A list of experiments to keep in the design (all others are removed).
+            exclude_samples: A list of samples to exclude from the design.
+            keep_samples: A list of samples to keep in the design (all others are removed).
+        Yields:
+            None. Restores the original design table after the context ends.
+        """
+        original_design = self.design
+        _design: pd.DataFrame
+        if design is None:
+            _design = self.get_design()
+        else:
+            _design = design
+        if exclude_experiments is not None:
+            _design = _design[~_design["Experiment"].isin(exclude_experiments)]
+        if keep_experiments is not None:
+            _design = _design[_design["Experiment"].isin(keep_experiments)]
+        if exclude_samples is not None:
+            _design = _design[~_design["Sample"].isin(exclude_samples)]
+        if keep_samples is not None:
+            _design = _design[_design["Sample"].isin(keep_samples)]
+        try:
+            self.add_design(_design)
+            yield
+        finally:
+            self.add_design(original_design)
     def save(self, directory: str, basename: str):
         """Save qtable to disk, creating a data, design, and config file.
@@ -333,6 +415,8 @@ class Qtable:
             "Expression features": self._expression_features,
             "Expression sample mapping": self._expression_sample_mapping,
             "Data dtypes": self.data.dtypes.astype(str).to_dict(),
+            "Design dtypes": self.design.dtypes.astype(str).to_dict(),
+            "Unique ID column": self._id_column,
         }
         with open(filepaths["config"], "w") as openfile:
             yaml.safe_dump(config_data, openfile)
@@ -363,14 +447,24 @@ class Qtable:
         data = _read_csv_str_safe(
             filepaths["data"], dtypes, **{"sep": "\t", "index_col": 0}
         )
-        design = pd.read_csv(
-            filepaths["design"], sep="\t", index_col=0, keep_default_na=True
-        )
+        # This check is required for backwards compatibility with msreport <= 0.0.27
+        if "Design dtypes" in config_data:
+            design_dtypes = config_data["Design dtypes"]
+            design = _read_csv_str_safe(
+                filepaths["design"], design_dtypes, **{"sep": "\t", "index_col": 0}
+            )
+        else:
+            design = pd.read_csv(
+                filepaths["design"], sep="\t", index_col=0, keep_default_na=True
+            )
         qtable = Qtable(data, design)
         qtable._expression_columns = config_data["Expression columns"]
         qtable._expression_features = config_data["Expression features"]
         qtable._expression_sample_mapping = config_data["Expression sample mapping"]
+        # This check is required for backwards compatibility with msreport <= 0.0.27
+        if "Unique ID column" in config_data:
+            qtable._id_column = config_data["Unique ID column"]
         return qtable
     def to_tsv(self, path: str, index: bool = False):
@@ -388,7 +482,6 @@ class Qtable:
     def copy(self) -> Qtable:
         """Returns a copy of this Qtable instance."""
-        # not tested #
         return self.__copy__()
     def _set_expression(
@@ -417,22 +510,22 @@ class Qtable:
         samples = list(columns_to_samples.values())
         if not expression_columns:
-            raise KeyError(f"No expression columns matched in qtable")
-        if not all([e in data_columns for e in expression_columns]):
+            raise KeyError("No expression columns matched in qtable")
+        if not all(e in data_columns for e in expression_columns):
             exception_message = (
                 f"Not all specified columns {expression_columns} are present in the"
                 " qtable"
             )
             raise KeyError(exception_message)
-        if not all([s in self.get_samples() for s in samples]):
+        if not all(s in self.get_samples() for s in samples):
             exception_message = (
                 f"Not all specified samples {samples} are present in the qtable.design"
             )
             raise ValueError(exception_message)
-        if not all([s in samples for s in self.get_samples()]):
+        if not all(s in samples for s in self.get_samples()):
             exception_message = (
-                f"Not all samples from qtable.design are also present in the specified"
-                f"samples."
+                "Not all samples from qtable.design are also present in the specified"
+                "samples."
             )
             raise ValueError(exception_message)
@@ -477,7 +570,6 @@ class Qtable:
         self._expression_sample_mapping = {}
     def __copy__(self) -> Qtable:
-        # not tested #
         new_instance = Qtable(self.data, self.design)
         # Copy all private attributes
         for attr in dir(self):
@@ -486,7 +578,7 @@ class Qtable:
                 and attr.startswith("_")
                 and not attr.startswith("__")
             ):
-                attr_values = self.__getattribute__(attr).copy()
+                attr_values = copy.deepcopy(self.__getattribute__(attr))
                 new_instance.__setattr__(attr, attr_values)
         return new_instance
@@ -521,7 +613,7 @@ def _match_samples_to_tag_columns(
     """
     WHITESPACE_CHARS = " ."
-    mapping = dict()
+    mapping = {}
     for sample in samples:
         for col in columns:
             if col.replace(tag, "").replace(sample, "").strip(WHITESPACE_CHARS) == "":

msreport 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl

msreport 0.0.26py3-none-any.whl → 0.0.28py3-none-any.whl