PyPI - msreport - Versions diffs - 0.0.30__py3-none-any.whl → 0.0.32__py3-none-any.whl - Mend

msreport 0.0.30py3-none-any.whl → 0.0.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

msreport/__init__.py +1 -1
msreport/aggregate/__init__.py +10 -0
msreport/aggregate/condense.py +9 -0
msreport/aggregate/pivot.py +14 -5
msreport/aggregate/summarize.py +14 -4
msreport/analyze.py +67 -5
msreport/export.py +10 -16
msreport/fasta.py +9 -2
msreport/helper/__init__.py +18 -0
msreport/helper/maxlfq.py +3 -3
msreport/impute.py +18 -10
msreport/isobar.py +11 -14
msreport/normalize.py +95 -10
msreport/peptidoform.py +21 -11
msreport/plot/__init__.py +3 -3
msreport/plot/comparison.py +7 -2
msreport/plot/multivariate.py +34 -15
msreport/plot/quality.py +1 -1
msreport/qtable.py +25 -11
msreport/reader.py +362 -37
msreport/rinterface/limma.py +1 -1
{msreport-0.0.30.dist-info → msreport-0.0.32.dist-info}/METADATA +11 -1
msreport-0.0.32.dist-info/RECORD +38 -0
msreport-0.0.30.dist-info/RECORD +0 -38
{msreport-0.0.30.dist-info → msreport-0.0.32.dist-info}/WHEEL +0 -0
{msreport-0.0.30.dist-info → msreport-0.0.32.dist-info}/licenses/LICENSE.txt +0 -0
{msreport-0.0.30.dist-info → msreport-0.0.32.dist-info}/top_level.txt +0 -0

msreport/reader.py CHANGED Viewed

@@ -1,17 +1,18 @@
-"""Module for reading result tables from various MS analysis tools and converting them
-to a standardized format following the MsReport convention.
+"""Provides tools for importing and standardizing quantitative proteomics data.
-Currently for MaxQuant and FragPipe protein, peptide, and ion tables are supported, and
-for Spectronaut protein tables are supported when exported with the correct report
-scheme.
+This module offers software-specific reader classes to import raw result tables (e.g.,
+proteins, peptides, ions) from various proteomics software (MaxQuant, FragPipe,
+Spectronaut) and convert them into a standardized `msreport` format. Additionally, it
+provides functions for annotating imported data with biological metadata, such as
+protein information (e.g., sequence length, molecular weight) and peptide positions,
+extracted from a ProteinDatabase (FASTA file).
-New column names:
+New columns added to imported protein tables:
 - Representative protein
 - Leading proteins
 - Protein reported by software
-Unified column names:
-- Total peptides
+Standardized column names for quantitative values (if available in the software output):
 - Spectral count "sample name"
 - Unique spectral count "sample name"
 - Total spectral count "sample name"
@@ -38,6 +39,7 @@ from msreport.helper.temp import extract_window_around_position
 class Protein(Protocol):
     """Abstract protein entry"""
+    # identifier: str
     header: str
     sequence: str
     header_fields: dict[str, str]
@@ -46,9 +48,9 @@ class Protein(Protocol):
 class ProteinDatabase(Protocol):
     """Abstract protein database"""
-    def __getitem__(self, protein_id: str) -> Protein: ...
+    def __getitem__(self, identifier: str) -> Protein: ...
-    def __contains__(self, protein_id: str) -> bool: ...
+    def __contains__(self, identifier: str) -> bool: ...
 class ResultReader:
@@ -497,7 +499,9 @@ class MaxQuantReader(ResultReader):
         mod_probability_columns = msreport.helper.find_columns(new_df, "Probabilities")
         localization_string_column = "Modification localization string"
-        mod_localization_probabilities = [{} for _ in range(new_df.shape[0])]
+        mod_localization_probabilities: list[dict[str, dict[int, float]]] = [
+            {} for _ in range(new_df.shape[0])
+        ]
         for probability_column in mod_probability_columns:
             # FUTURE: Type should be checked and enforced during the import
             if not pd.api.types.is_string_dtype(new_df[probability_column].dtype):
@@ -541,7 +545,12 @@ class FragPipeReader(ResultReader):
     """FragPipe result reader.
     Methods:
-        import_design: Reads a "fragpipe-files.fp-manifest" file and returns a
+        import_design: Depending on the quantification strategy, imports either the
+            manifest file or the experiment annotation file and returns a processed
+            design dataframe.
+        import_manifest: Reads a "fragpipe-files.fp-manifest" file and returns a
+            processed design dataframe.
+        import_experiment_annotation: Reads a "experiment_annotation" file and returns a
             processed design dataframe.
         import_proteins: Reads a "combined_protein.tsv" or "protein.tsv" file and
             returns a processed dataframe, conforming to the MsReport naming
@@ -585,12 +594,8 @@ class FragPipeReader(ResultReader):
         "ions": "combined_ion.tsv",
         "ion_evidence": "ion.tsv",
         "psm_evidence": "psm.tsv",
-        "design": "fragpipe-files.fp-manifest",
-    }
-    isobar_filenames: dict[str, str] = {
-        "proteins": "protein.tsv",
-        "peptides": "peptide.tsv",
-        "ions": "ion.tsv",
+        "manifest": "fragpipe-files.fp-manifest",
+        "experiment_annotation": "experiment_annotation.tsv",
     }
     sil_filenames: dict[str, str] = {
         "proteins": "combined_protein_label_quant.tsv",
@@ -671,17 +676,38 @@ class FragPipeReader(ResultReader):
         self._isobar: bool = isobar
         self._sil: bool = sil
         self._contaminant_tag: str = contaminant_tag
-        if isobar:
-            self.filenames = self.isobar_filenames
-        elif sil:
-            self.filenames = self.sil_filenames
+        self.filenames = self.default_filenames.copy()
+        if sil:
+            self.filenames.update(self.sil_filenames)
+    def import_design(self, sort: bool = False) -> pd.DataFrame:
+        """Reads the experimental design file and returns a processed design dataframe.
+        Depending on the quantification strategy (isobaric or label-free/sil), either
+        the experiment annotation file or the manifest file is imported.
+        Args:
+            sort: If True, the design dataframe is sorted by "Experiment" and
+                "Replicate"; default False.
+        """
+        if self._isobar:
+            return self.import_experiment_annotation(sort=sort)
         else:
-            self.filenames = self.default_filenames
+            return self.import_manifest(sort=sort)
-    def import_design(
+    def import_manifest(
         self, filename: Optional[str] = None, sort: bool = False
     ) -> pd.DataFrame:
-        """Reads a 'fp-manifest' file and returns a processed design dataframe.
+        """Read a 'fp-manifest' file and returns a processed design dataframe.
+        The manifest columns "Path", "Experiment", and "Bioreplicate" are mapped to the
+        design table columns "Rawfile", "Experiment", and "Replicate". The "Rawfile"
+        column is extracted as the filename from the full path. The "Sample" column is
+        generated by combining "Experiment" and "Replicate" with an underscore
+        (e.g., "Experiment_Replicate"), except when "Replicate" is empty, in which case
+        "Sample" is set to "Experiment". If "Experiment" is missing, it is set to "exp"
+        by default.
         Args:
             filename: Allows specifying an alternative filename, otherwise the default
@@ -697,19 +723,25 @@ class FragPipeReader(ResultReader):
             FileNotFoundError: If the specified manifest file does not exist.
         """
         if filename is None:
-            filepath = os.path.join(self.data_directory, self.filenames["design"])
+            filepath = os.path.join(self.data_directory, self.filenames["manifest"])
         else:
             filepath = os.path.join(self.data_directory, filename)
         if not os.path.exists(filepath):
             raise FileNotFoundError(
                 f"File '{filepath}' does not exist. Please check the file path."
             )
-        fp_manifest = pd.read_csv(filepath, sep="\t", header=None, dtype=str)
+        fp_manifest = (
+            pd.read_csv(
+                filepath, sep="\t", header=None, na_values=[""], keep_default_na=False
+            )
+            .fillna("")
+            .astype(str)
+        )
         fp_manifest.columns = ["Path", "Experiment", "Bioreplicate", "Data type"]
         design = pd.DataFrame(
             {
-                "Sample": fp_manifest["Experiment"] + "_" + fp_manifest["Bioreplicate"],
+                "Sample": "",
                 "Experiment": fp_manifest["Experiment"],
                 "Replicate": fp_manifest["Bioreplicate"],
                 "Rawfile": fp_manifest["Path"].apply(
@@ -718,10 +750,73 @@ class FragPipeReader(ResultReader):
                 ),
             }
         )
+        # FragPipe uses "exp" for missing 'Experiment' values
+        design.loc[design["Experiment"] == "", "Experiment"] = "exp"
+        # FragPipe combines 'Experiment' + "_" + 'Replicate' into 'Sample', except when
+        # 'Replicate' is empty, in which case 'Sample' is set to 'Experiment'.
+        design["Sample"] = design["Experiment"] + "_" + design["Replicate"]
+        design.loc[design["Replicate"] == "", "Sample"] = design["Experiment"]
+        if sort:
+            design.sort_values(by=["Experiment", "Replicate"], inplace=True)
+            design.reset_index(drop=True, inplace=True)
+        return design
+    def import_experiment_annotation(
+        self, filename: Optional[str] = None, sort: bool = False
+    ) -> pd.DataFrame:
+        """Read a 'experiment_annotation' file and returns a processed design dataframe.
+        The annotation columns "sample", "channel", and "plex" are mapped to the design
+        table columns "Sample", "Channel", and "Plex". The "Experiment" and "Replicate"
+        columns are extracted from the "Sample" column by splitting at the last
+        underscore, if there is no underscore, "Replicate" is set to an empty string.
+        Note that this convention of splitting the "Sample" column does confirm to the
+        FragPipe convention, but FragPipe does not enforce it for the experiment
+        annotation file.
+        Args:
+            filename: Allows specifying an alternative filename, otherwise the default
+                filename is used.
+            sort: If True, the design dataframe is sorted by "Experiment" and
+                "Replicate"; default False.
+        Returns:
+            A dataframe containing the processed design table with columns:
+            "Sample", "Experiment", "Replicate", "Channel", and "Plex".
+        Raises:
+            FileNotFoundError: If the specified manifest file does not exist.
+        """
+        if filename is None:
+            filepath = os.path.join(
+                self.data_directory, self.filenames["experiment_annotation"]
+            )
+        else:
+            filepath = os.path.join(self.data_directory, filename)
+        if not os.path.exists(filepath):
+            raise FileNotFoundError(
+                f"File '{filepath}' does not exist. Please check the file path."
+            )
+        annotation = pd.read_csv(filepath, sep="\t")
+        design = pd.DataFrame(
+            {
+                "Sample": annotation["sample"],
+                "Experiment": annotation["sample"].str.rsplit("_", n=1).str[0],
+                "Replicate": annotation["sample"].str.rsplit("_", n=1).str[1],
+                "Channel": annotation["channel"],
+                "Plex": annotation["plex"],
+            }
+        )
+        design["Replicate"] = design["Replicate"].fillna("")
         if sort:
             design.sort_values(by=["Experiment", "Replicate"], inplace=True)
             design.reset_index(drop=True, inplace=True)
         return design
     def import_proteins(
@@ -963,7 +1058,7 @@ class FragPipeReader(ResultReader):
         filename: Optional[str] = None,
         rename_columns: bool = True,
         rewrite_modifications: bool = True,
-    ):
+    ) -> pd.DataFrame:
         """Concatenate all "psm.tsv" files and return a processed dataframe.
         Args:
@@ -1010,6 +1105,7 @@ class FragPipeReader(ResultReader):
             )
             df["Modified sequence"] = mod_entries["Modified sequence"]
             df["Modifications"] = mod_entries["Modifications"]
+            df = self._add_modification_localization_string_to_psm_evidence(df)
         return df
     def _add_protein_entries(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -1183,6 +1279,66 @@ class FragPipeReader(ResultReader):
             new_df[new_column] = localization_strings
         return new_df
+    def _add_modification_localization_string_to_psm_evidence(
+        self, df: pd.DataFrame
+    ) -> pd.DataFrame:
+        """Adds a modification localization string column to a PSM evidence table.
+        Extracts localization probabilities from all columns in the form
+        f"{aa:modification}", converts them into the standardized modification
+        localization string format used by msreport, and adds a new column
+        "Modification localization string".
+        Probabilities are written in the format
+        "Mod1@Site1:Probability1,Site2:Probability2;Mod2@Site3:Probability3",
+        e.g. "15.9949@11:1.000;79.9663@3:0.200,4:0.800". Refer to
+        `msreport.peptidoform.make_localization_string` for details.
+        Args:
+            df: A dataframe containing PSM tables from FragPipe.
+        Returns:
+            A copy of the input dataframe with the added
+            "Modification localization string" column.
+        """
+        new_df = df.copy()
+        _search_tag = " Best Localization"
+        mod_localization_columns = [
+            c.strip(_search_tag) for c in new_df.columns if c.endswith(_search_tag)
+        ]
+        if not mod_localization_columns:
+            new_df["Modification localization string"] = ""
+            return new_df
+        df[mod_localization_columns] = (
+            df[mod_localization_columns].astype(str).replace("nan", "")
+        )
+        row_mod_probabilities: list[dict[str, dict[int, float]]] = [
+            {} for i in range(df.shape[0])
+        ]
+        for mod_localization_column in mod_localization_columns:
+            modification = mod_localization_column.split(":")[1]
+            for modification_probabilities, probability_sequence in zip(
+                row_mod_probabilities, df[mod_localization_column]
+            ):
+                if not probability_sequence:
+                    continue
+                _, probabilities = msreport.peptidoform.parse_modified_sequence(
+                    probability_sequence, "(", ")"
+                )
+                modification_probabilities[modification] = {
+                    site: float(probability) for site, probability in probabilities
+                }
+        localization_strings = []
+        for localization_probabilities in row_mod_probabilities:
+            localization_string = msreport.peptidoform.make_localization_string(
+                localization_probabilities
+            )
+            localization_strings.append(localization_string)
+        new_df["Modification localization string"] = localization_strings
+        return new_df
 class SpectronautReader(ResultReader):
     """Spectronaut result reader.
@@ -1499,6 +1655,7 @@ class SpectronautReader(ResultReader):
         filename: Optional[str] = None,
         filetag: Optional[str] = None,
         rename_columns: bool = True,
+        rewrite_modifications: bool = True,
     ) -> pd.DataFrame:
         """Reads an ion evidence file (long format) and returns a processed dataframe.
@@ -1508,8 +1665,15 @@ class SpectronautReader(ResultReader):
         generated by concatenating the "Modified sequence" and "Charge" columns, and if
         present, the "Compensation voltage" column.
-        (!) Note that the modified sequence and modification localization probabilities
-        are currently not processed.
+        "Modified sequence" entries contain modifications within square brackets.
+        "Modification" entries are strings in the form of "position:modification_tag",
+        multiple modifications are joined by ";". An example for a modified sequence and
+        a modification entry: "PEPT[Phospho]IDO[Oxidation]", "4:Phospho;7:Oxidation".
+        "Modification localization string" contains localization probabilities in the
+        format "Mod1@Site1:Probability1,Site2:Probability2;Mod2@Site3:Probability3",
+        e.g. "15.9949@11:1.000;79.9663@3:0.200,4:0.800". Refer to
+        `msreport.peptidoform.make_localization_string` for details.
         Args:
             filename: Optional, allows specifying a specific file that will be imported.
@@ -1517,6 +1681,10 @@ class SpectronautReader(ResultReader):
                 a substring, instead of specifying a filename.
             rename_columns: If True, columns are renamed according to the MsReport
                 convention; default True.
+            rewrite_modifications: If True, the peptide format in "Modified sequence" is
+                changed according to the MsReport convention, and a "Modifications" is
+                added to contains the amino acid position for all modifications.
+                Requires 'rename_columns' to be true. Default True.
         Returns:
             A dataframe containing the processed ion table.
@@ -1544,6 +1712,9 @@ class SpectronautReader(ResultReader):
         df = self._add_protein_entries(df)
         if rename_columns:
             df = self._rename_columns(df, True)
+        if rewrite_modifications and rename_columns:
+            df = self._add_peptide_modification_entries(df)
+            df = self._add_modification_localization_string(df)
             df["Ion ID"] = df["Modified sequence"] + "_c" + df["Charge"].astype(str)
             if "Compensation voltage" in df.columns:
                 _cv = df["Compensation voltage"].astype(str)
@@ -1597,6 +1768,70 @@ class SpectronautReader(ResultReader):
         leading_protein_entries = df["PG.ProteinAccessions"].str.split(";").tolist()
         return leading_protein_entries
+    def _add_peptide_modification_entries(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Adds standardized "Modified sequence" and "Modifications" columns.
+        "Modified sequence" entries contain modifications within square brackets.
+        "Modifications" entries are strings in the form of "position:modification_text",
+        multiple modifications are joined by ";". An example for a modified sequence and
+        a modification entry: "PEPT[Phospho]IDO[Oxidation]", "4:Phospho;7:Oxidation".
+        Requires the columns "Peptide sequence" and "Modified sequence" from the
+        software output.
+        Args:
+            df: Dataframe containing "Peptide sequence" and "Modified sequence" columns.
+        Returns:
+            A copy of the input dataframe with updated columns.
+        """
+        # TODO: not tested
+        mod_sequences = df["Modified sequence"].str[1:-1]  # Remove sourrounding "_"
+        mod_entries = _generate_modification_entries(
+            df["Peptide sequence"], mod_sequences, "[", "]"
+        )
+        new_df = df.copy()
+        new_df["Modified sequence"] = mod_entries["Modified sequence"]
+        new_df["Modifications"] = mod_entries["Modifications"]
+        return new_df
+    def _add_modification_localization_string(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Adds modification localization string columns.
+        Extracts localization probabilities from the "EG.PTMLocalizationProbabilities"
+        column, converts them into the standardized modification localization string
+        format used by msreport, and adds new column "Modification localization string".
+        Probabilities are written in the format
+        "Mod1@Site1:Probability1,Site2:Probability2;Mod2@Site3:Probability3",
+        e.g. "15.9949@11:1.000;79.9663@3:0.200,4:0.800". Refer to
+        `msreport.peptidoform.make_localization_string` for details.
+        Args:
+            df: Dataframe containing a "EG.PTMLocalizationProbabilities" column.
+        Returns:
+            A copy of the input dataframe with the added column
+            "Modification localization string".
+        """
+        # TODO: not tested
+        new_df = df.copy()
+        localization_strings = []
+        for localization_entry in new_df["EG.PTMLocalizationProbabilities"]:
+            if localization_entry == "":
+                localization_strings.append("")
+                continue
+            localization_probabilities = extract_spectronaut_localization_probabilities(
+                localization_entry
+            )
+            localization_string = msreport.peptidoform.make_localization_string(
+                localization_probabilities
+            )
+            localization_strings.append(localization_string)
+        new_df["Modification localization string"] = localization_strings
+        return new_df
 def sort_leading_proteins(
     table: pd.DataFrame,
@@ -1639,7 +1874,7 @@ def sort_leading_proteins(
     db_origins_present = "Leading proteins database origin" in table
     if database_order is not None:
-        database_encoding = defaultdict(lambda: 999)
+        database_encoding: dict[str, int] = defaultdict(lambda: 999)
         database_encoding.update({db: i for i, db in enumerate(database_order)})
     if penalize_contaminants is not None:
         contaminant_encoding = {"False": 0, "True": 1, False: 0, True: 1}
@@ -1647,7 +1882,7 @@ def sort_leading_proteins(
     for _, row in table.iterrows():
         protein_ids = row["Leading proteins"].split(";")
-        sorting_info = [[] for _ in protein_ids]
+        sorting_info: list[list] = [[] for _ in protein_ids]
         if special_proteins is not None:
             for i, _id in enumerate(protein_ids):
                 sorting_info[i].append(_id not in special_proteins)
@@ -1787,7 +2022,7 @@ def add_protein_site_annotation(
     protein_db: ProteinDatabase,
     protein_column: str = "Representative protein",
     site_column: str = "Protein site",
-):
+) -> pd.DataFrame:
     """Uses a FASTA protein database to add protein site annotation columns.
     Adds the columns "Modified residue", which corresponds to the amino acid at the
@@ -1925,6 +2160,61 @@ def add_leading_proteins_annotation(
     return table
+def add_protein_site_identifiers(
+    table: pd.DataFrame,
+    protein_db: ProteinDatabase,
+    site_column: str,
+    protein_name_column: str,
+):
+    """Adds a "Protein site identifier" column to the 'table'.
+    The "Protein site identifier" is generated by concatenating the protein name
+    with the amino acid and position of the protein site or sites, e.g. "P12345 - S123"
+    or "P12345 - S123 / T125". The amino acid is extracted from the protein sequence at
+    the position of the site. If the protein name is not available, the
+    "Representative protein" entry is used instead.
+    Args:
+        table: Dataframe to which the protein site identifiers are added.
+        protein_db: A protein database containing entries from one or multiple FASTA
+            files. Protein identifiers in the 'table' column "Representative protein"
+            are used to look up entries in the 'protein_db'.
+        site_column: Column in 'table' that contains protein site positions. Positions
+            are one-indexed, meaning the first amino acid of the protein is position 1.
+            Multiple sites in a single entry should be separated by ";".
+        protein_name_column: Column in 'table' that contains protein names, which will
+            be used to generate the identifier. If no name is available, the accession
+            is used instead.
+    Raises:
+        ValueError: If the "Representative protein", 'protein_name_column' or
+            'site_column' is not found in the 'table'.
+    """
+    if site_column not in table.columns:
+        raise ValueError(f"Column '{site_column}' not found in the table.")
+    if protein_name_column not in table.columns:
+        raise ValueError(f"Column '{protein_name_column}' not found in the table.")
+    if "Representative protein" not in table.columns:
+        raise ValueError("Column 'Representative protein' not found in the table.")
+    site_identifiers = []
+    for accession, sites, name in zip(
+        table["Representative protein"],
+        table[site_column].astype(str),
+        table[protein_name_column],
+    ):
+        protein_sequence = protein_db[accession].sequence
+        protein_identifier = name if name else accession
+        aa_sites = []
+        for site in sites.split(";"):
+            aa = protein_sequence[int(site) - 1]
+            aa_sites.append(f"{aa}{site}")
+        aa_site_tag = " / ".join(aa_sites)
+        site_identifier = f"{protein_identifier} - {aa_site_tag}"
+        site_identifiers.append(site_identifier)
+    table["Protein site identifier"] = site_identifiers
 def add_sequence_coverage(
     protein_table: pd.DataFrame,
     peptide_table: pd.DataFrame,
@@ -2384,7 +2674,9 @@ def _extract_fragpipe_assigned_modifications(
     return modifications
-def extract_maxquant_localization_probabilities(localization_entry: str) -> dict:
+def extract_maxquant_localization_probabilities(
+    localization_entry: str,
+) -> dict[int, float]:
     """Extract localization probabilites from a MaxQuant "Probabilities" entry.
     Args:
@@ -2441,6 +2733,39 @@ def extract_fragpipe_localization_probabilities(localization_entry: str) -> dict
     return modification_probabilities
+def extract_spectronaut_localization_probabilities(localization_entry: str) -> dict:
+    """Extract localization probabilites from a Spectronaut localization entry.
+    Args:
+        localization_entry: Entry from the "EG.PTMLocalizationProbabilities" column of a
+            spectronaut elution group (EG) output table.
+    Returns:
+        A dictionary of modifications containing a dictionary of {position: probability}
+        mappings. Positions are one-indexed, which means that the first amino acid
+        position is 1.
+    Example:
+    >>> extract_spectronaut_localization_probabilities(
+    ...     "_HM[Oxidation (M): 100%]S[Phospho (STY): 45.5%]GS[Phospho (STY): 54.5%]PG_"
+    ... )
+    {'Oxidation (M)': {2: 1.0}, 'Phospho (STY)': {3: 0.455, 5: 0.545}}
+    """
+    modification_probabilities: dict[str, dict[int, float]] = {}
+    localization_entry = localization_entry.strip("_")
+    _, raw_probability_entries = msreport.peptidoform.parse_modified_sequence(
+        localization_entry, "[", "]"
+    )
+    for site, mod_probability_entry in raw_probability_entries:
+        modification, probability_entry = mod_probability_entry.split(": ")
+        if modification not in modification_probabilities:
+            modification_probabilities[modification] = {}
+        probability = float(probability_entry.replace("%", "")) / 100.0
+        modification_probabilities[modification][site] = probability
+    return modification_probabilities
 def _extract_protein_ids(entries: list[str]) -> list[str]:
     """Returns a list of protein IDs, extracted from protein entries.
@@ -2554,8 +2879,8 @@ def _create_multi_protein_annotations_from_db(
                 query_result.append(query_function(db_entry, default_value))
             else:
                 query_result.append(default_value)
-        query_result = ";".join(map(str, query_result))
-        annotation_values.append(query_result)
+        annotation_value = ";".join(map(str, query_result))
+        annotation_values.append(annotation_value)
     return annotation_values

msreport/rinterface/limma.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Python interface to custome R scripts."""
+"""Python interface to the 'limma.R' script."""
 import os

{msreport-0.0.30.dist-info → msreport-0.0.32.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,11 @@
 Metadata-Version: 2.4
 Name: msreport
-Version: 0.0.30
+Version: 0.0.32
 Summary: Post processing and analysis of quantitative proteomics data
 Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
 License-Expression: Apache-2.0
 Project-URL: homepage, https://github.com/hollenstein/msreport
+Project-URL: documentation, https://hollenstein.github.io/msreport/
 Project-URL: changelog, https://github.com/hollenstein/msreport/blob/main/CHANGELOG.md
 Keywords: mass spectrometry,proteomics,post processing,data analysis
 Classifier: Development Status :: 4 - Beta
@@ -33,6 +34,13 @@ Requires-Dist: rpy2<3.5.13,>=3.5.3; extra == "r"
 Provides-Extra: dev
 Requires-Dist: mypy>=1.15.0; extra == "dev"
 Requires-Dist: pytest>=8.3.5; extra == "dev"
+Provides-Extra: docs
+Requires-Dist: mkdocs-awesome-nav>=3.1.2; extra == "docs"
+Requires-Dist: mkdocs-macros-plugin>=1.3.7; extra == "docs"
+Requires-Dist: mkdocs-material>=9.6.15; extra == "docs"
+Requires-Dist: mkdocs-roamlinks-plugin>=0.3.2; extra == "docs"
+Requires-Dist: mkdocstrings-python>=1.16.12; extra == "docs"
+Requires-Dist: ruff>=0.12.2; extra == "docs"
 Provides-Extra: test
 Requires-Dist: pytest>=8.3.5; extra == "test"
 Dynamic: license-file
@@ -64,6 +72,8 @@ MsReport is a Python library designed to simplify the post-processing and analys
 The library supports importing protein and peptide-level quantification results from MaxQuant, FragPipe, and Spectronaut, as well as post-translational modification (PTM) data from MaxQuant and FragPipe. MsReport provides tools for data annotation, normalization and transformation, statistical testing, and data visualization.
+The [documentation](https://hollenstein.github.io/msreport/) provides an overview of the library's public API.
 ### Key features of MsReport
 #### Data Import and Standardization

msreport-0.0.32.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,38 @@
+msreport/__init__.py,sha256=hmq4---v9oHxQm9gidnxGryrWB8HqPfMPHaPryBS_Oc,339
+msreport/analyze.py,sha256=T6ORhBYP3Qnil0r7qF5CkwS2KHUsedpU5P-0paqUmaA,33838
+msreport/errors.py,sha256=X9yFxMiIOCWQdxuqBGr8L7O3vRV2KElXdX1uHbFcZMk,421
+msreport/export.py,sha256=wXQfaVd5UHlGKyKdrt2UWbhzNf-VyJy2Up5qfrPzO2M,20229
+msreport/fasta.py,sha256=hPz4xlkjeTV-2YCrtWMsQQJSkJSmH1ZzNZBxHI89Nqk,1489
+msreport/impute.py,sha256=q21cFKnpENE4GHUPz-R5FipkvagWjX4fa31qeb8uaxc,10782
+msreport/isobar.py,sha256=nh2Wem1wheqJ6wAJYm8be9FuK21c7T1k7nectJjPw7o,6729
+msreport/normalize.py,sha256=73n344jBQ9u-Ube_wOxF5Svi2ltKMnBKaw8M36hEaQM,23441
+msreport/peptidoform.py,sha256=mJhqoolFL6ZzwnmQkWhgJn8zIBoxv_GdYVSb-6gw37g,12615
+msreport/qtable.py,sha256=RhfGdij7cIVO5JiUC-xSQkd7zV-Q8KmC94daA9JotHc,28203
+msreport/reader.py,sha256=02cst1NRyBoeBaspfM67BM_KsTR9pt1NZQX49J_Wev0,131276
+msreport/aggregate/__init__.py,sha256=Y5HnN9C2PRjWfq4epJAoNqyp4Pv6WQfguAcSYKIhRuw,609
+msreport/aggregate/condense.py,sha256=fspY8osQfjzzehw3v4Up2QSihNiixhQpAiCiwXLIpCQ,6301
+msreport/aggregate/pivot.py,sha256=Myk9QhOmQWge7MvGlFYwdD4u7pdqYaAaFZ0uxZH4d28,5491
+msreport/aggregate/summarize.py,sha256=_KbSuLS3rRxIMpoIXfPyC2--5sACV9NsivbS0BPFr9o,12736
+msreport/helper/__init__.py,sha256=IG4xaP_iIugqBLUpDHMj-SbD2_elL5on_V4whLIQTbM,1003
+msreport/helper/calc.py,sha256=J4XltEnMrFR9IQlPtrZhyxlSTj15072huHCMA_nqQ6E,4245
+msreport/helper/maxlfq.py,sha256=kFm3hRNWntM067EuoSrO_x-i5YNXphBfrvssMA3OM1g,14947
+msreport/helper/table.py,sha256=x-Wo8mTENsUxc_gtF-wgOyQa9g7W2fK6tuRiEX7bda0,11430
+msreport/helper/temp.py,sha256=jNulgDATf9sKXEFWMXAhjflciOZPAqlxg_7QZS7IkW8,3736
+msreport/plot/__init__.py,sha256=p-oLxmZIvfC--xkjB0ka321xddW-lst19PmokJq9lTk,1457
+msreport/plot/_partial_plots.py,sha256=tqZTSXEPuruMgVakaGR2tUQl5OrHgo2cROJ0S4cqkR0,5598
+msreport/plot/comparison.py,sha256=Y2KOuakj-TxqdT2XNt7lnVZwimKSszvFQI-K9Pm80k8,18770
+msreport/plot/distribution.py,sha256=QNFL5vG9p-vqhwEk5WcCSXa2B8u5QgySZlAQIPys0-0,10248
+msreport/plot/multivariate.py,sha256=v79gcb-8s5bZVpaJn13MOmqsNA0ZvrV25JlXmHmp4WA,14046
+msreport/plot/quality.py,sha256=ZZKMkghmVESjA49Qg-iukVFBoDIgI2iWLlFa7vJWX7M,15869
+msreport/plot/style.py,sha256=67jWf4uA1ub9RJDu4xhuSoXAW0lbLj6SMP4QXQO76Pc,10591
+msreport/plot/style_sheets/msreport-notebook.mplstyle,sha256=SPYO_7vYT8Ha7tQ0KCTLtykiRQ13-_igAm7kyvsZj1I,1266
+msreport/plot/style_sheets/seaborn-whitegrid.mplstyle,sha256=eC8Zboy8R7ybBwbHPKvKbMIHACystN6X6I0lqm7B80U,833
+msreport/rinterface/__init__.py,sha256=Zs6STvbDqaVZVPRM6iU0kKjq0TWz_2p2ChvNAveRdTA,616
+msreport/rinterface/limma.py,sha256=P-Fs8HARSXz60rO_vLc--of1hafk_IgGgPaNXnS_aKg,5424
+msreport/rinterface/rinstaller.py,sha256=AGs6NFMSwTLrzrIJz1E5BE5jFUz8eQBHlpM_MWVChzA,1370
+msreport/rinterface/rscripts/limma.R,sha256=gr_yjMm_YoG45irDhWOo6gkRQSTwj_7uU_p3NBRHPm8,4331
+msreport-0.0.32.dist-info/licenses/LICENSE.txt,sha256=Pd-b5cKP4n2tFDpdx27qJSIq0d1ok0oEcGTlbtL6QMU,11560
+msreport-0.0.32.dist-info/METADATA,sha256=_OI-LkqJoperzDBo6KjAir7Xq6jANqyjpqyxUfu9T-4,8998
+msreport-0.0.32.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+msreport-0.0.32.dist-info/top_level.txt,sha256=Drl8mCckJHFIw-Ovh5AnyjKnqvLJltDOBUr1JAcHAlI,9
+msreport-0.0.32.dist-info/RECORD,,

msreport 0.0.30__py3-none-any.whl → 0.0.32__py3-none-any.whl

msreport 0.0.30py3-none-any.whl → 0.0.32py3-none-any.whl