PyPI - msreport - Versions diffs - 0.0.30__py3-none-any.whl → 0.0.32__py3-none-any.whl - Mend

msreport 0.0.30py3-none-any.whl → 0.0.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

msreport/__init__.py +1 -1
msreport/aggregate/__init__.py +10 -0
msreport/aggregate/condense.py +9 -0
msreport/aggregate/pivot.py +14 -5
msreport/aggregate/summarize.py +14 -4
msreport/analyze.py +67 -5
msreport/export.py +10 -16
msreport/fasta.py +9 -2
msreport/helper/__init__.py +18 -0
msreport/helper/maxlfq.py +3 -3
msreport/impute.py +18 -10
msreport/isobar.py +11 -14
msreport/normalize.py +95 -10
msreport/peptidoform.py +21 -11
msreport/plot/__init__.py +3 -3
msreport/plot/comparison.py +7 -2
msreport/plot/multivariate.py +34 -15
msreport/plot/quality.py +1 -1
msreport/qtable.py +25 -11
msreport/reader.py +362 -37
msreport/rinterface/limma.py +1 -1
{msreport-0.0.30.dist-info → msreport-0.0.32.dist-info}/METADATA +11 -1
msreport-0.0.32.dist-info/RECORD +38 -0
msreport-0.0.30.dist-info/RECORD +0 -38
{msreport-0.0.30.dist-info → msreport-0.0.32.dist-info}/WHEEL +0 -0
{msreport-0.0.30.dist-info → msreport-0.0.32.dist-info}/licenses/LICENSE.txt +0 -0
{msreport-0.0.30.dist-info → msreport-0.0.32.dist-info}/top_level.txt +0 -0

msreport/peptidoform.py CHANGED Viewed

@@ -1,5 +1,14 @@
+"""Defines the `Peptide` class and associated utilities for handling peptidoforms.
+This module provides a `Peptide` class for representing modified peptide sequences,
+and their site localization probabilities. It offers methods to access and manipulate
+peptide information, summarize isoform probabilities, and retrieve modification sites.
+Additionally, it includes utility functions for parsing modified sequence strings and
+converting site localization probabilities to and from a standardized string format.
+"""
 from collections import defaultdict as ddict
-from typing import Optional, Union
+from typing import Optional
 import numpy as np
@@ -10,7 +19,7 @@ class Peptide:
     def __init__(
         self,
         modified_sequence: str,
-        localization_probabilities: Optional[dict] = None,
+        localization_probabilities: Optional[dict[str, dict[int, float]]] = None,
         protein_position: Optional[int] = None,
     ):
         plain_sequence, modifications = parse_modified_sequence(
@@ -28,7 +37,7 @@ class Peptide:
             self.modification_positions[mod_tag].append(position)
             self.modified_residues[position] = mod_tag
-    def make_modified_sequence(self, include: Optional[list] = None) -> str:
+    def make_modified_sequence(self, include: Optional[list[str]] = None) -> str:
         """Returns a modified sequence string.
         Args:
@@ -55,7 +64,7 @@ class Peptide:
             return 0
         return len(self.modification_positions[modification])
-    def isoform_probability(self, modification: str) -> Union[float, None]:
+    def isoform_probability(self, modification: str) -> float | None:
         """Calculates the isoform probability for a given modification.
         Returns:
@@ -66,12 +75,13 @@ class Peptide:
         """
         probabilities = []
         for site in self.list_modified_peptide_sites(modification):
-            probabilities.append(self.get_peptide_site_probability(site))
-        if None in probabilities:
-            return None
+            probability = self.get_peptide_site_probability(site)
+            if probability is None:
+                return None
+            probabilities.append(probability)
         return float(np.prod(probabilities))
-    def get_peptide_site_probability(self, position: int) -> Optional[float]:
+    def get_peptide_site_probability(self, position: int) -> float | None:
         """Return the modification localization probability of the peptide position.
         Args:
@@ -85,7 +95,7 @@ class Peptide:
         """
         return self._get_site_probability(position, is_protein_position=False)
-    def get_protein_site_probability(self, position: int) -> Optional[float]:
+    def get_protein_site_probability(self, position: int) -> float | None:
         """Return the modification localization probability of the protein position.
         Args:
@@ -109,7 +119,7 @@ class Peptide:
     def _get_site_probability(
         self, position: int, is_protein_position: bool
-    ) -> Optional[float]:
+    ) -> float | None:
         """Return the modification localization probability of the peptide position.
         Args:
@@ -224,7 +234,7 @@ def modify_peptide(
 def make_localization_string(
-    localization_probabilities: dict, decimal_places: int = 3
+    localization_probabilities: dict[str, dict[int, float]], decimal_places: int = 3
 ) -> str:
     """Generates a site localization probability string.

msreport/plot/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""This module provides various plotting functions for visualizing data within a Qtable.
+"""Plotting functions for visualizing proteomics data from `Qtable`.
 The functions in this module generate a wide range of plots, including heatmaps, PCA
 plots, volcano plots, and histograms, to analyze and compare expression values,
@@ -6,8 +6,8 @@ missingness, contaminants, and other features in proteomics datasets. The plots
 designed to work with the Qtable class as input, which provides structured access to
 proteomics data and experimental design information.
-The style of the plots can be customized using the `set_active_style` function, which
-allows applying style sheets from the msreport library or those available in matplotlib.
+Users can customize plot styles via the `set_active_style` function, which allows
+applying style sheets from the msreport library or those available in matplotlib.
 """
 from .comparison import expression_comparison, pvalue_histogram, volcano_ma

msreport/plot/comparison.py CHANGED Viewed

@@ -77,10 +77,15 @@ def volcano_ma(
         )
         special_entries = list(special_entries) + list(special_proteins)
-    data = qtable.get_data(exclude_invalid=exclude_invalid)
-    if annotation_column not in data.columns:
+    if annotation_column not in qtable.data.columns:
         annotation_column = qtable.id_column
+    data = qtable.get_data(exclude_invalid=exclude_invalid)
+    mask = np.ones(data.shape[0], dtype=bool)
+    for tag in [ratio_tag, expression_tag, pvalue_tag]:
+        mask = mask & np.isfinite(data[f"{tag} {comparison_group}"])
+    data = data[mask]
     scatter_size = 2 / (max(min(data.shape[0], 10000), 1000) / 1000)
     masks = {

msreport/plot/multivariate.py CHANGED Viewed

@@ -21,6 +21,7 @@ def sample_pca(
     pc_x: str = "PC1",
     pc_y: str = "PC2",
     exclude_invalid: bool = True,
+    exclude_missing: bool = False,
 ) -> tuple[plt.Figure, list[plt.Axes]]:
     """Figure to compare sample similarities with a principle component analysis.
@@ -44,11 +45,14 @@ def sample_pca(
             samples.
         exclude_invalid: If True, rows are filtered according to the Boolean entries of
             the "Valid" column.
+        exclude_missing: If True, only rows without any missing values are used.
     Returns:
         A matplotlib Figure and a list of Axes objects, containing the PCA plots.
     """
     design = qtable.get_design()
+    samples = qtable.get_samples()
     if design.shape[0] < 3:
         fig, ax = plt.subplots(1, 1, figsize=(2, 1.3))
         fig.suptitle(f'PCA of "{tag}" values', y=1.1)
@@ -65,13 +69,22 @@ def sample_pca(
         return fig, np.array([ax])
     table = qtable.make_sample_table(
-        tag, samples_as_columns=True, exclude_invalid=exclude_invalid
+        tag, samples_as_columns=True, exclude_invalid=False
     )
+    inclusion_mask = np.ones(qtable.data.shape[0], dtype=bool)
+    if exclude_invalid:
+        inclusion_mask = inclusion_mask & qtable["Valid"]
+    if exclude_missing:
+        _non_missing_masks = [(qtable[f"Missing {s}"] == 0) for s in samples]
+        inclusion_mask = inclusion_mask & (np.all(_non_missing_masks, axis=0))
+    table = table[inclusion_mask]
     table = table.replace({0: np.nan})
     table = table[np.isfinite(table).sum(axis=1) > 0]
     if not msreport.helper.intensities_in_logspace(table):
         table = np.log2(table)
-    table[table.isna()] = 0
+    table = table.fillna(0)
     table = table.transpose()
     sample_index = table.index.tolist()
@@ -203,6 +216,7 @@ def sample_pca(
 def expression_clustermap(
     qtable: Qtable,
     exclude_invalid: bool = True,
+    exclude_missing: bool = False,
     remove_imputation: bool = True,
     mean_center: bool = False,
     cluster_samples: bool = True,
@@ -218,6 +232,7 @@ def expression_clustermap(
         qtable: A `Qtable` instance, which data is used for plotting.
         exclude_invalid: If True, rows are filtered according to the Boolean entries of
             the "Valid" column.
+        exclude_missing: If True, only rows without any missing values are used.
         remove_imputation: If True, imputed values are set to 0 before clustering.
             Defaults to True.
         mean_center: If True, the data is mean-centered before clustering. Defaults to
@@ -242,25 +257,29 @@ def expression_clustermap(
     if len(samples) < 2:
         raise ValueError("At least two samples are required to generate a clustermap.")
-    data = qtable.make_expression_table(samples_as_columns=True)
+    data = qtable.make_expression_table(samples_as_columns=True, exclude_invalid=False)
     data = data[samples]
+    data = data.fillna(0)
-    for sample in samples:
-        if remove_imputation:
-            data.loc[qtable.data[f"Missing {sample}"], sample] = 0
-        data[sample] = data[sample].fillna(0)
-    if not mean_center:
-        # Hide missing values in the heatmap, making them appear white
-        mask_values = qtable.data[
+    if not mean_center:  # Hide missing values in the heatmap, making them appear white
+        hide_values_mask = qtable.data[
             [f"Missing {sample}" for sample in samples]
         ].to_numpy()
     else:
-        mask_values = np.zeros(data.shape, dtype=bool)
+        hide_values_mask = np.zeros(data.shape, dtype=bool)
+    if remove_imputation:
+        for sample in samples:
+            data.loc[qtable.data[f"Missing {sample}"], sample] = 0
+    inclusion_mask = np.ones(data.shape[0], dtype=bool)
     if exclude_invalid:
-        data = data[qtable.data["Valid"]]
-        mask_values = mask_values[qtable.data["Valid"]]
+        inclusion_mask = inclusion_mask & qtable["Valid"]
+    if exclude_missing:
+        _non_missing_masks = [(qtable[f"Missing {s}"] == 0) for s in samples]
+        inclusion_mask = inclusion_mask & (np.all(_non_missing_masks, axis=0))
+    hide_values_mask = hide_values_mask[inclusion_mask]
+    data = data[inclusion_mask]
     color_wheel = ColorWheelDict()
     for exp in experiments:
@@ -314,7 +333,7 @@ def expression_clustermap(
         col_cluster=cluster_samples,
         col_colors=sample_colors,
         row_colors=["#000000" for _ in range(len(data))],
-        mask=mask_values,
+        mask=hide_values_mask,
         method=cluster_method,
         metric="euclidean",
         **heatmap_args,

msreport/plot/quality.py CHANGED Viewed

@@ -314,7 +314,7 @@ def sample_intensities(
 @with_active_style
 def sample_correlation(
-    qtable, exclude_invalid: bool = True, labels: bool = False
+    qtable: Qtable, exclude_invalid: bool = True, labels: bool = False
 ) -> tuple[plt.Figure, list[plt.Axes]]:
     """Generates a pair-wise correlation matrix of samples 'Expression' values.

msreport/qtable.py CHANGED Viewed

@@ -1,14 +1,28 @@
-from __future__ import annotations
+"""Defines the `Qtable` class, the central container for quantitative proteomics data.
+The `Qtable` class serves as the standardized data structure for `msreport`,
+storing a main table with quantitative values and associated metadata for its entries;
+it also maintains the name of the unique ID column for the main table. Additionally,
+it stores an experimental design table that links sample names to experimental
+conditions and replicate information.
+`Qtable` provides convenience methods for creating subtables and accessing design
+related information (e.g., samples per experiment), and instances of `Qtable` can be
+easily saved to disk and loaded back. As the central data container, the `Qtable`
+facilitates seamless integration with the high-level modules `analyze`, `plot` and
+`export`, which all directly operate on `Qtable` instances.
+"""
 import copy
 import os
 import warnings
 from contextlib import contextmanager
-from typing import Any, Iterable, Optional
+from typing import Any, Generator, Iterable, Optional
 import numpy as np
 import pandas as pd
 import yaml
+from typing_extensions import Self
 import msreport.helper as helper
@@ -359,7 +373,7 @@ class Qtable:
         keep_experiments: Optional[Iterable[str]] = None,
         exclude_samples: Optional[Iterable[str]] = None,
         keep_samples: Optional[Iterable[str]] = None,
-    ):
+    ) -> Generator[None, None, None]:
         """Context manager to temporarily modify the design table.
         Args:
@@ -422,7 +436,7 @@ class Qtable:
         self.design.to_csv(filepaths["design"], sep="\t", index=True)
     @classmethod
-    def load(cls, directory: str, basename: str) -> Qtable:
+    def load(cls, directory: str, basename: str) -> Self:
         """Load a qtable from disk by reading a data, design, and config file.
         Loading a qtable will first import the three files generated during saving, then
@@ -470,7 +484,7 @@ class Qtable:
             )
         id_column = config_data["Unique ID column"]
-        qtable = Qtable(data, design, id_column)
+        qtable = cls(data, design, id_column)
         qtable._expression_columns = config_data["Expression columns"]
         qtable._expression_features = config_data["Expression features"]
         qtable._expression_sample_mapping = config_data["Expression sample mapping"]
@@ -486,11 +500,11 @@ class Qtable:
         )
         self.data.to_csv(path, sep="\t", index=index)
-    def to_clipboard(self, index: bool = False):
+    def to_clipboard(self, index: bool = False) -> None:
         """Writes the data table to the system clipboard."""
         self.data.to_clipboard(sep="\t", index=index)
-    def copy(self) -> Qtable:
+    def copy(self) -> Self:
         """Returns a copy of this Qtable instance."""
         return self.__copy__()
@@ -579,8 +593,8 @@ class Qtable:
         self._expression_features = []
         self._expression_sample_mapping = {}
-    def __copy__(self) -> Qtable:
-        new_instance = Qtable(self.data, self.design, self.id_column)
+    def __copy__(self) -> Self:
+        new_instance = type(self)(self.data, self.design, self.id_column)
         # Copy all private attributes
         for attr in dir(self):
             if (
@@ -609,7 +623,7 @@ def _match_samples_to_tag_columns(
     samples: Iterable[str],
     columns: Iterable[str],
     tag: str,
-) -> dict:
+) -> dict[str, str]:
     """Mapping of samples to columns which contain the sample and the tag.
     Args:
@@ -632,7 +646,7 @@ def _match_samples_to_tag_columns(
     return mapping
-def _get_qtable_export_filepaths(directory: str, name: str):
+def _get_qtable_export_filepaths(directory: str, name: str) -> dict[str, str]:
     """Returns a dictionary of standard filepaths for loading and saving a qtable."""
     filenames = {
         "data": f"{name}.data.tsv",

msreport 0.0.30__py3-none-any.whl → 0.0.32__py3-none-any.whl

msreport 0.0.30py3-none-any.whl → 0.0.32py3-none-any.whl