PyPI - msreport - Versions diffs - 0.0.30__py3-none-any.whl → 0.0.31__py3-none-any.whl - Mend

msreport 0.0.30py3-none-any.whl → 0.0.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

msreport/__init__.py +1 -1
msreport/aggregate/__init__.py +10 -0
msreport/aggregate/condense.py +9 -0
msreport/aggregate/pivot.py +14 -5
msreport/aggregate/summarize.py +14 -4
msreport/analyze.py +67 -5
msreport/export.py +9 -15
msreport/fasta.py +9 -2
msreport/helper/__init__.py +18 -0
msreport/impute.py +18 -10
msreport/isobar.py +11 -14
msreport/normalize.py +95 -10
msreport/peptidoform.py +21 -11
msreport/plot/__init__.py +3 -3
msreport/plot/quality.py +1 -1
msreport/qtable.py +25 -11
msreport/reader.py +216 -23
msreport/rinterface/limma.py +1 -1
{msreport-0.0.30.dist-info → msreport-0.0.31.dist-info}/METADATA +11 -1
msreport-0.0.31.dist-info/RECORD +38 -0
msreport-0.0.30.dist-info/RECORD +0 -38
{msreport-0.0.30.dist-info → msreport-0.0.31.dist-info}/WHEEL +0 -0
{msreport-0.0.30.dist-info → msreport-0.0.31.dist-info}/licenses/LICENSE.txt +0 -0
{msreport-0.0.30.dist-info → msreport-0.0.31.dist-info}/top_level.txt +0 -0

msreport/peptidoform.py CHANGED Viewed

@@ -1,5 +1,14 @@
+"""Defines the `Peptide` class and associated utilities for handling peptidoforms.
+This module provides a `Peptide` class for representing modified peptide sequences,
+and their site localization probabilities. It offers methods to access and manipulate
+peptide information, summarize isoform probabilities, and retrieve modification sites.
+Additionally, it includes utility functions for parsing modified sequence strings and
+converting site localization probabilities to and from a standardized string format.
+"""
 from collections import defaultdict as ddict
-from typing import Optional, Union
+from typing import Optional
 import numpy as np
@@ -10,7 +19,7 @@ class Peptide:
     def __init__(
         self,
         modified_sequence: str,
-        localization_probabilities: Optional[dict] = None,
+        localization_probabilities: Optional[dict[str, dict[int, float]]] = None,
         protein_position: Optional[int] = None,
     ):
         plain_sequence, modifications = parse_modified_sequence(
@@ -28,7 +37,7 @@ class Peptide:
             self.modification_positions[mod_tag].append(position)
             self.modified_residues[position] = mod_tag
-    def make_modified_sequence(self, include: Optional[list] = None) -> str:
+    def make_modified_sequence(self, include: Optional[list[str]] = None) -> str:
         """Returns a modified sequence string.
         Args:
@@ -55,7 +64,7 @@ class Peptide:
             return 0
         return len(self.modification_positions[modification])
-    def isoform_probability(self, modification: str) -> Union[float, None]:
+    def isoform_probability(self, modification: str) -> float | None:
         """Calculates the isoform probability for a given modification.
         Returns:
@@ -66,12 +75,13 @@ class Peptide:
         """
         probabilities = []
         for site in self.list_modified_peptide_sites(modification):
-            probabilities.append(self.get_peptide_site_probability(site))
-        if None in probabilities:
-            return None
+            probability = self.get_peptide_site_probability(site)
+            if probability is None:
+                return None
+            probabilities.append(probability)
         return float(np.prod(probabilities))
-    def get_peptide_site_probability(self, position: int) -> Optional[float]:
+    def get_peptide_site_probability(self, position: int) -> float | None:
         """Return the modification localization probability of the peptide position.
         Args:
@@ -85,7 +95,7 @@ class Peptide:
         """
         return self._get_site_probability(position, is_protein_position=False)
-    def get_protein_site_probability(self, position: int) -> Optional[float]:
+    def get_protein_site_probability(self, position: int) -> float | None:
         """Return the modification localization probability of the protein position.
         Args:
@@ -109,7 +119,7 @@ class Peptide:
     def _get_site_probability(
         self, position: int, is_protein_position: bool
-    ) -> Optional[float]:
+    ) -> float | None:
         """Return the modification localization probability of the peptide position.
         Args:
@@ -224,7 +234,7 @@ def modify_peptide(
 def make_localization_string(
-    localization_probabilities: dict, decimal_places: int = 3
+    localization_probabilities: dict[str, dict[int, float]], decimal_places: int = 3
 ) -> str:
     """Generates a site localization probability string.

msreport/plot/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""This module provides various plotting functions for visualizing data within a Qtable.
+"""Plotting functions for visualizing proteomics data from `Qtable`.
 The functions in this module generate a wide range of plots, including heatmaps, PCA
 plots, volcano plots, and histograms, to analyze and compare expression values,
@@ -6,8 +6,8 @@ missingness, contaminants, and other features in proteomics datasets. The plots
 designed to work with the Qtable class as input, which provides structured access to
 proteomics data and experimental design information.
-The style of the plots can be customized using the `set_active_style` function, which
-allows applying style sheets from the msreport library or those available in matplotlib.
+Users can customize plot styles via the `set_active_style` function, which allows
+applying style sheets from the msreport library or those available in matplotlib.
 """
 from .comparison import expression_comparison, pvalue_histogram, volcano_ma

msreport/plot/quality.py CHANGED Viewed

@@ -314,7 +314,7 @@ def sample_intensities(
 @with_active_style
 def sample_correlation(
-    qtable, exclude_invalid: bool = True, labels: bool = False
+    qtable: Qtable, exclude_invalid: bool = True, labels: bool = False
 ) -> tuple[plt.Figure, list[plt.Axes]]:
     """Generates a pair-wise correlation matrix of samples 'Expression' values.

msreport/qtable.py CHANGED Viewed

@@ -1,14 +1,28 @@
-from __future__ import annotations
+"""Defines the `Qtable` class, the central container for quantitative proteomics data.
+The `Qtable` class serves as the standardized data structure for `msreport`,
+storing a main table with quantitative values and associated metadata for its entries;
+it also maintains the name of the unique ID column for the main table. Additionally,
+it stores an experimental design table that links sample names to experimental
+conditions and replicate information.
+`Qtable` provides convenience methods for creating subtables and accessing design
+related information (e.g., samples per experiment), and instances of `Qtable` can be
+easily saved to disk and loaded back. As the central data container, the `Qtable`
+facilitates seamless integration with the high-level modules `analyze`, `plot` and
+`export`, which all directly operate on `Qtable` instances.
+"""
 import copy
 import os
 import warnings
 from contextlib import contextmanager
-from typing import Any, Iterable, Optional
+from typing import Any, Generator, Iterable, Optional
 import numpy as np
 import pandas as pd
 import yaml
+from typing_extensions import Self
 import msreport.helper as helper
@@ -359,7 +373,7 @@ class Qtable:
         keep_experiments: Optional[Iterable[str]] = None,
         exclude_samples: Optional[Iterable[str]] = None,
         keep_samples: Optional[Iterable[str]] = None,
-    ):
+    ) -> Generator[None, None, None]:
         """Context manager to temporarily modify the design table.
         Args:
@@ -422,7 +436,7 @@ class Qtable:
         self.design.to_csv(filepaths["design"], sep="\t", index=True)
     @classmethod
-    def load(cls, directory: str, basename: str) -> Qtable:
+    def load(cls, directory: str, basename: str) -> Self:
         """Load a qtable from disk by reading a data, design, and config file.
         Loading a qtable will first import the three files generated during saving, then
@@ -470,7 +484,7 @@ class Qtable:
             )
         id_column = config_data["Unique ID column"]
-        qtable = Qtable(data, design, id_column)
+        qtable = cls(data, design, id_column)
         qtable._expression_columns = config_data["Expression columns"]
         qtable._expression_features = config_data["Expression features"]
         qtable._expression_sample_mapping = config_data["Expression sample mapping"]
@@ -486,11 +500,11 @@ class Qtable:
         )
         self.data.to_csv(path, sep="\t", index=index)
-    def to_clipboard(self, index: bool = False):
+    def to_clipboard(self, index: bool = False) -> None:
         """Writes the data table to the system clipboard."""
         self.data.to_clipboard(sep="\t", index=index)
-    def copy(self) -> Qtable:
+    def copy(self) -> Self:
         """Returns a copy of this Qtable instance."""
         return self.__copy__()
@@ -579,8 +593,8 @@ class Qtable:
         self._expression_features = []
         self._expression_sample_mapping = {}
-    def __copy__(self) -> Qtable:
-        new_instance = Qtable(self.data, self.design, self.id_column)
+    def __copy__(self) -> Self:
+        new_instance = type(self)(self.data, self.design, self.id_column)
         # Copy all private attributes
         for attr in dir(self):
             if (
@@ -609,7 +623,7 @@ def _match_samples_to_tag_columns(
     samples: Iterable[str],
     columns: Iterable[str],
     tag: str,
-) -> dict:
+) -> dict[str, str]:
     """Mapping of samples to columns which contain the sample and the tag.
     Args:
@@ -632,7 +646,7 @@ def _match_samples_to_tag_columns(
     return mapping
-def _get_qtable_export_filepaths(directory: str, name: str):
+def _get_qtable_export_filepaths(directory: str, name: str) -> dict[str, str]:
     """Returns a dictionary of standard filepaths for loading and saving a qtable."""
     filenames = {
         "data": f"{name}.data.tsv",

msreport/reader.py CHANGED Viewed

@@ -1,17 +1,18 @@
-"""Module for reading result tables from various MS analysis tools and converting them
-to a standardized format following the MsReport convention.
+"""Provides tools for importing and standardizing quantitative proteomics data.
-Currently for MaxQuant and FragPipe protein, peptide, and ion tables are supported, and
-for Spectronaut protein tables are supported when exported with the correct report
-scheme.
+This module offers software-specific reader classes to import raw result tables (e.g.,
+proteins, peptides, ions) from various proteomics software (MaxQuant, FragPipe,
+Spectronaut) and convert them into a standardized `msreport` format. Additionally, it
+provides functions for annotating imported data with biological metadata, such as
+protein information (e.g., sequence length, molecular weight) and peptide positions,
+extracted from a ProteinDatabase (FASTA file).
-New column names:
+New columns added to imported protein tables:
 - Representative protein
 - Leading proteins
 - Protein reported by software
-Unified column names:
-- Total peptides
+Standardized column names for quantitative values (if available in the software output):
 - Spectral count "sample name"
 - Unique spectral count "sample name"
 - Total spectral count "sample name"
@@ -38,6 +39,7 @@ from msreport.helper.temp import extract_window_around_position
 class Protein(Protocol):
     """Abstract protein entry"""
+    # identifier: str
     header: str
     sequence: str
     header_fields: dict[str, str]
@@ -46,9 +48,9 @@ class Protein(Protocol):
 class ProteinDatabase(Protocol):
     """Abstract protein database"""
-    def __getitem__(self, protein_id: str) -> Protein: ...
+    def __getitem__(self, identifier: str) -> Protein: ...
-    def __contains__(self, protein_id: str) -> bool: ...
+    def __contains__(self, identifier: str) -> bool: ...
 class ResultReader:
@@ -497,7 +499,9 @@ class MaxQuantReader(ResultReader):
         mod_probability_columns = msreport.helper.find_columns(new_df, "Probabilities")
         localization_string_column = "Modification localization string"
-        mod_localization_probabilities = [{} for _ in range(new_df.shape[0])]
+        mod_localization_probabilities: list[dict[str, dict[int, float]]] = [
+            {} for _ in range(new_df.shape[0])
+        ]
         for probability_column in mod_probability_columns:
             # FUTURE: Type should be checked and enforced during the import
             if not pd.api.types.is_string_dtype(new_df[probability_column].dtype):
@@ -681,7 +685,15 @@ class FragPipeReader(ResultReader):
     def import_design(
         self, filename: Optional[str] = None, sort: bool = False
     ) -> pd.DataFrame:
-        """Reads a 'fp-manifest' file and returns a processed design dataframe.
+        """Read a 'fp-manifest' file and returns a processed design dataframe.
+        The manifest columns "Path", "Experiment", and "Bioreplicate" are mapped to the
+        design table columns "Rawfile", "Experiment", and "Replicate". The "Rawfile"
+        column is extracted as the filename from the full path. The "Sample" column is
+        generated by combining "Experiment" and "Replicate" with an underscore
+        (e.g., "Experiment_Replicate"), except when "Replicate" is empty, in which case
+        "Sample" is set to "Experiment". If "Experiment" is missing, it is set to "exp"
+        by default.
         Args:
             filename: Allows specifying an alternative filename, otherwise the default
@@ -704,12 +716,18 @@ class FragPipeReader(ResultReader):
             raise FileNotFoundError(
                 f"File '{filepath}' does not exist. Please check the file path."
             )
-        fp_manifest = pd.read_csv(filepath, sep="\t", header=None, dtype=str)
+        fp_manifest = (
+            pd.read_csv(
+                filepath, sep="\t", header=None, na_values=[""], keep_default_na=False
+            )
+            .fillna("")
+            .astype(str)
+        )
         fp_manifest.columns = ["Path", "Experiment", "Bioreplicate", "Data type"]
         design = pd.DataFrame(
             {
-                "Sample": fp_manifest["Experiment"] + "_" + fp_manifest["Bioreplicate"],
+                "Sample": "",
                 "Experiment": fp_manifest["Experiment"],
                 "Replicate": fp_manifest["Bioreplicate"],
                 "Rawfile": fp_manifest["Path"].apply(
@@ -718,6 +736,12 @@ class FragPipeReader(ResultReader):
                 ),
             }
         )
+        # FragPipe uses "exp" for missing 'Experiment' values
+        design.loc[design["Experiment"] == "", "Experiment"] = "exp"
+        # FragPipe combines 'Experiment' + "_" + 'Replicate' into 'Sample', except when
+        # 'Replicate' is empty, in which case 'Sample' is set to 'Experiment'.
+        design["Sample"] = design["Experiment"] + "_" + design["Replicate"]
+        design.loc[design["Replicate"] == "", "Sample"] = design["Experiment"]
         if sort:
             design.sort_values(by=["Experiment", "Replicate"], inplace=True)
@@ -963,7 +987,7 @@ class FragPipeReader(ResultReader):
         filename: Optional[str] = None,
         rename_columns: bool = True,
         rewrite_modifications: bool = True,
-    ):
+    ) -> pd.DataFrame:
         """Concatenate all "psm.tsv" files and return a processed dataframe.
         Args:
@@ -1499,6 +1523,7 @@ class SpectronautReader(ResultReader):
         filename: Optional[str] = None,
         filetag: Optional[str] = None,
         rename_columns: bool = True,
+        rewrite_modifications: bool = True,
     ) -> pd.DataFrame:
         """Reads an ion evidence file (long format) and returns a processed dataframe.
@@ -1508,8 +1533,15 @@ class SpectronautReader(ResultReader):
         generated by concatenating the "Modified sequence" and "Charge" columns, and if
         present, the "Compensation voltage" column.
-        (!) Note that the modified sequence and modification localization probabilities
-        are currently not processed.
+        "Modified sequence" entries contain modifications within square brackets.
+        "Modification" entries are strings in the form of "position:modification_tag",
+        multiple modifications are joined by ";". An example for a modified sequence and
+        a modification entry: "PEPT[Phospho]IDO[Oxidation]", "4:Phospho;7:Oxidation".
+        "Modification localization string" contains localization probabilities in the
+        format "Mod1@Site1:Probability1,Site2:Probability2;Mod2@Site3:Probability3",
+        e.g. "15.9949@11:1.000;79.9663@3:0.200,4:0.800". Refer to
+        `msreport.peptidoform.make_localization_string` for details.
         Args:
             filename: Optional, allows specifying a specific file that will be imported.
@@ -1517,6 +1549,10 @@ class SpectronautReader(ResultReader):
                 a substring, instead of specifying a filename.
             rename_columns: If True, columns are renamed according to the MsReport
                 convention; default True.
+            rewrite_modifications: If True, the peptide format in "Modified sequence" is
+                changed according to the MsReport convention, and a "Modifications" is
+                added to contains the amino acid position for all modifications.
+                Requires 'rename_columns' to be true. Default True.
         Returns:
             A dataframe containing the processed ion table.
@@ -1544,6 +1580,9 @@ class SpectronautReader(ResultReader):
         df = self._add_protein_entries(df)
         if rename_columns:
             df = self._rename_columns(df, True)
+        if rewrite_modifications and rename_columns:
+            df = self._add_peptide_modification_entries(df)
+            df = self._add_modification_localization_string(df)
             df["Ion ID"] = df["Modified sequence"] + "_c" + df["Charge"].astype(str)
             if "Compensation voltage" in df.columns:
                 _cv = df["Compensation voltage"].astype(str)
@@ -1597,6 +1636,70 @@ class SpectronautReader(ResultReader):
         leading_protein_entries = df["PG.ProteinAccessions"].str.split(";").tolist()
         return leading_protein_entries
+    def _add_peptide_modification_entries(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Adds standardized "Modified sequence" and "Modifications" columns.
+        "Modified sequence" entries contain modifications within square brackets.
+        "Modifications" entries are strings in the form of "position:modification_text",
+        multiple modifications are joined by ";". An example for a modified sequence and
+        a modification entry: "PEPT[Phospho]IDO[Oxidation]", "4:Phospho;7:Oxidation".
+        Requires the columns "Peptide sequence" and "Modified sequence" from the
+        software output.
+        Args:
+            df: Dataframe containing "Peptide sequence" and "Modified sequence" columns.
+        Returns:
+            A copy of the input dataframe with updated columns.
+        """
+        # TODO: not tested
+        mod_sequences = df["Modified sequence"].str[1:-1]  # Remove sourrounding "_"
+        mod_entries = _generate_modification_entries(
+            df["Peptide sequence"], mod_sequences, "[", "]"
+        )
+        new_df = df.copy()
+        new_df["Modified sequence"] = mod_entries["Modified sequence"]
+        new_df["Modifications"] = mod_entries["Modifications"]
+        return new_df
+    def _add_modification_localization_string(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Adds modification localization string columns.
+        Extracts localization probabilities from the "EG.PTMLocalizationProbabilities"
+        column, converts them into the standardized modification localization string
+        format used by msreport, and adds new column "Modification localization string".
+        Probabilities are written in the format
+        "Mod1@Site1:Probability1,Site2:Probability2;Mod2@Site3:Probability3",
+        e.g. "15.9949@11:1.000;79.9663@3:0.200,4:0.800". Refer to
+        `msreport.peptidoform.make_localization_string` for details.
+        Args:
+            df: Dataframe containing a "EG.PTMLocalizationProbabilities" column.
+        Returns:
+            A copy of the input dataframe with the added column
+            "Modification localization string".
+        """
+        # TODO: not tested
+        new_df = df.copy()
+        localization_strings = []
+        for localization_entry in new_df["EG.PTMLocalizationProbabilities"]:
+            if localization_entry == "":
+                localization_strings.append("")
+                continue
+            localization_probabilities = extract_spectronaut_localization_probabilities(
+                localization_entry
+            )
+            localization_string = msreport.peptidoform.make_localization_string(
+                localization_probabilities
+            )
+            localization_strings.append(localization_string)
+        new_df["Modification localization string"] = localization_strings
+        return new_df
 def sort_leading_proteins(
     table: pd.DataFrame,
@@ -1639,7 +1742,7 @@ def sort_leading_proteins(
     db_origins_present = "Leading proteins database origin" in table
     if database_order is not None:
-        database_encoding = defaultdict(lambda: 999)
+        database_encoding: dict[str, int] = defaultdict(lambda: 999)
         database_encoding.update({db: i for i, db in enumerate(database_order)})
     if penalize_contaminants is not None:
         contaminant_encoding = {"False": 0, "True": 1, False: 0, True: 1}
@@ -1647,7 +1750,7 @@ def sort_leading_proteins(
     for _, row in table.iterrows():
         protein_ids = row["Leading proteins"].split(";")
-        sorting_info = [[] for _ in protein_ids]
+        sorting_info: list[list] = [[] for _ in protein_ids]
         if special_proteins is not None:
             for i, _id in enumerate(protein_ids):
                 sorting_info[i].append(_id not in special_proteins)
@@ -1787,7 +1890,7 @@ def add_protein_site_annotation(
     protein_db: ProteinDatabase,
     protein_column: str = "Representative protein",
     site_column: str = "Protein site",
-):
+) -> pd.DataFrame:
     """Uses a FASTA protein database to add protein site annotation columns.
     Adds the columns "Modified residue", which corresponds to the amino acid at the
@@ -1925,6 +2028,61 @@ def add_leading_proteins_annotation(
     return table
+def add_protein_site_identifiers(
+    table: pd.DataFrame,
+    protein_db: ProteinDatabase,
+    site_column: str,
+    protein_name_column: str,
+):
+    """Adds a "Protein site identifier" column to the 'table'.
+    The "Protein site identifier" is generated by concatenating the protein name
+    with the amino acid and position of the protein site or sites, e.g. "P12345 - S123"
+    or "P12345 - S123 / T125". The amino acid is extracted from the protein sequence at
+    the position of the site. If the protein name is not available, the
+    "Representative protein" entry is used instead.
+    Args:
+        table: Dataframe to which the protein site identifiers are added.
+        protein_db: A protein database containing entries from one or multiple FASTA
+            files. Protein identifiers in the 'table' column "Representative protein"
+            are used to look up entries in the 'protein_db'.
+        site_column: Column in 'table' that contains protein site positions. Positions
+            are one-indexed, meaning the first amino acid of the protein is position 1.
+            Multiple sites in a single entry should be separated by ";".
+        protein_name_column: Column in 'table' that contains protein names, which will
+            be used to generate the identifier. If no name is available, the accession
+            is used instead.
+    Raises:
+        ValueError: If the "Representative protein", 'protein_name_column' or
+            'site_column' is not found in the 'table'.
+    """
+    if site_column not in table.columns:
+        raise ValueError(f"Column '{site_column}' not found in the table.")
+    if protein_name_column not in table.columns:
+        raise ValueError(f"Column '{protein_name_column}' not found in the table.")
+    if "Representative protein" not in table.columns:
+        raise ValueError("Column 'Representative protein' not found in the table.")
+    site_identifiers = []
+    for accession, sites, name in zip(
+        table["Representative protein"],
+        table[site_column].astype(str),
+        table[protein_name_column],
+    ):
+        protein_sequence = protein_db[accession].sequence
+        protein_identifier = name if name else accession
+        aa_sites = []
+        for site in sites.split(";"):
+            aa = protein_sequence[int(site) - 1]
+            aa_sites.append(f"{aa}{site}")
+        aa_site_tag = " / ".join(aa_sites)
+        site_identifier = f"{protein_identifier} - {aa_site_tag}"
+        site_identifiers.append(site_identifier)
+    table["Protein site identifier"] = site_identifiers
 def add_sequence_coverage(
     protein_table: pd.DataFrame,
     peptide_table: pd.DataFrame,
@@ -2384,7 +2542,9 @@ def _extract_fragpipe_assigned_modifications(
     return modifications
-def extract_maxquant_localization_probabilities(localization_entry: str) -> dict:
+def extract_maxquant_localization_probabilities(
+    localization_entry: str,
+) -> dict[int, float]:
     """Extract localization probabilites from a MaxQuant "Probabilities" entry.
     Args:
@@ -2441,6 +2601,39 @@ def extract_fragpipe_localization_probabilities(localization_entry: str) -> dict
     return modification_probabilities
+def extract_spectronaut_localization_probabilities(localization_entry: str) -> dict:
+    """Extract localization probabilites from a Spectronaut localization entry.
+    Args:
+        localization_entry: Entry from the "EG.PTMLocalizationProbabilities" column of a
+            spectronaut elution group (EG) output table.
+    Returns:
+        A dictionary of modifications containing a dictionary of {position: probability}
+        mappings. Positions are one-indexed, which means that the first amino acid
+        position is 1.
+    Example:
+    >>> extract_spectronaut_localization_probabilities(
+    ...     "_HM[Oxidation (M): 100%]S[Phospho (STY): 45.5%]GS[Phospho (STY): 54.5%]PG_"
+    ... )
+    {'Oxidation (M)': {2: 1.0}, 'Phospho (STY)': {3: 0.455, 5: 0.545}}
+    """
+    modification_probabilities: dict[str, dict[int, float]] = {}
+    localization_entry = localization_entry.strip("_")
+    _, raw_probability_entries = msreport.peptidoform.parse_modified_sequence(
+        localization_entry, "[", "]"
+    )
+    for site, mod_probability_entry in raw_probability_entries:
+        modification, probability_entry = mod_probability_entry.split(": ")
+        if modification not in modification_probabilities:
+            modification_probabilities[modification] = {}
+        probability = float(probability_entry.replace("%", "")) / 100.0
+        modification_probabilities[modification][site] = probability
+    return modification_probabilities
 def _extract_protein_ids(entries: list[str]) -> list[str]:
     """Returns a list of protein IDs, extracted from protein entries.
@@ -2554,8 +2747,8 @@ def _create_multi_protein_annotations_from_db(
                 query_result.append(query_function(db_entry, default_value))
             else:
                 query_result.append(default_value)
-        query_result = ";".join(map(str, query_result))
-        annotation_values.append(query_result)
+        annotation_value = ";".join(map(str, query_result))
+        annotation_values.append(annotation_value)
     return annotation_values

msreport/rinterface/limma.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Python interface to custome R scripts."""
+"""Python interface to the 'limma.R' script."""
 import os

{msreport-0.0.30.dist-info → msreport-0.0.31.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,11 @@
 Metadata-Version: 2.4
 Name: msreport
-Version: 0.0.30
+Version: 0.0.31
 Summary: Post processing and analysis of quantitative proteomics data
 Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
 License-Expression: Apache-2.0
 Project-URL: homepage, https://github.com/hollenstein/msreport
+Project-URL: documentation, https://hollenstein.github.io/msreport/
 Project-URL: changelog, https://github.com/hollenstein/msreport/blob/main/CHANGELOG.md
 Keywords: mass spectrometry,proteomics,post processing,data analysis
 Classifier: Development Status :: 4 - Beta
@@ -33,6 +34,13 @@ Requires-Dist: rpy2<3.5.13,>=3.5.3; extra == "r"
 Provides-Extra: dev
 Requires-Dist: mypy>=1.15.0; extra == "dev"
 Requires-Dist: pytest>=8.3.5; extra == "dev"
+Provides-Extra: docs
+Requires-Dist: mkdocs-awesome-nav>=3.1.2; extra == "docs"
+Requires-Dist: mkdocs-macros-plugin>=1.3.7; extra == "docs"
+Requires-Dist: mkdocs-material>=9.6.15; extra == "docs"
+Requires-Dist: mkdocs-roamlinks-plugin>=0.3.2; extra == "docs"
+Requires-Dist: mkdocstrings-python>=1.16.12; extra == "docs"
+Requires-Dist: ruff>=0.12.2; extra == "docs"
 Provides-Extra: test
 Requires-Dist: pytest>=8.3.5; extra == "test"
 Dynamic: license-file
@@ -64,6 +72,8 @@ MsReport is a Python library designed to simplify the post-processing and analys
 The library supports importing protein and peptide-level quantification results from MaxQuant, FragPipe, and Spectronaut, as well as post-translational modification (PTM) data from MaxQuant and FragPipe. MsReport provides tools for data annotation, normalization and transformation, statistical testing, and data visualization.
+The [documentation](https://hollenstein.github.io/msreport/) provides an overview of the library's public API.
 ### Key features of MsReport
 #### Data Import and Standardization

msreport 0.0.30__py3-none-any.whl → 0.0.31__py3-none-any.whl

msreport 0.0.30py3-none-any.whl → 0.0.31py3-none-any.whl