PyPI - msreport - Versions diffs - 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl - Mend

msreport 0.0.26py3-none-any.whl → 0.0.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

msreport/__init__.py +4 -6
msreport/aggregate/condense.py +1 -1
msreport/aggregate/pivot.py +1 -0
msreport/aggregate/summarize.py +2 -2
msreport/analyze.py +171 -38
msreport/errors.py +1 -2
msreport/export.py +16 -13
msreport/fasta.py +2 -1
msreport/helper/__init__.py +7 -7
msreport/helper/calc.py +29 -24
msreport/helper/maxlfq.py +2 -2
msreport/helper/table.py +5 -6
msreport/impute.py +7 -8
msreport/isobar.py +10 -9
msreport/normalize.py +54 -36
msreport/peptidoform.py +6 -4
msreport/plot/__init__.py +41 -0
msreport/plot/_partial_plots.py +159 -0
msreport/plot/comparison.py +490 -0
msreport/plot/distribution.py +253 -0
msreport/plot/multivariate.py +355 -0
msreport/plot/quality.py +431 -0
msreport/plot/style.py +286 -0
msreport/plot/style_sheets/msreport-notebook.mplstyle +57 -0
msreport/plot/style_sheets/seaborn-whitegrid.mplstyle +45 -0
msreport/qtable.py +109 -17
msreport/reader.py +73 -79
msreport/rinterface/__init__.py +2 -1
msreport/rinterface/limma.py +2 -1
msreport/rinterface/rinstaller.py +3 -3
{msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/METADATA +7 -3
msreport-0.0.28.dist-info/RECORD +38 -0
msreport/plot.py +0 -1132
msreport-0.0.26.dist-info/RECORD +0 -30
{msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/WHEEL +0 -0
{msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/licenses/LICENSE.txt +0 -0
{msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/top_level.txt +0 -0

msreport/reader.py CHANGED Viewed

@@ -1,4 +1,4 @@
-""" Module for reading result tables from various MS analysis tools and converting them
+"""Module for reading result tables from various MS analysis tools and converting them
 to a standardized format following the MsReport convention.
 Currently for MaxQuant and FragPipe protein, peptide, and ion tables are supported, and
@@ -20,19 +20,19 @@ Unified column names:
 - iBAQ intensity "sample name"
 """
-from collections import OrderedDict, defaultdict
 import os
-from typing import Any, Callable, Iterable, Optional, Protocol
 import pathlib
 import warnings
+from collections import OrderedDict, defaultdict
+from typing import Any, Callable, Iterable, Optional, Protocol
 import numpy as np
 import pandas as pd
 import msreport.helper as helper
-from msreport.helper.temp import extract_window_around_position
-from msreport.errors import ProteinsNotInFastaWarning
 import msreport.peptidoform
+from msreport.errors import ProteinsNotInFastaWarning
+from msreport.helper.temp import extract_window_around_position
 class Protein(Protocol):
@@ -54,6 +54,8 @@ class ProteinDatabase(Protocol):
 class ResultReader:
     """Base Reader class, is by itself not functional."""
+    data_directory: str
+    filenames: dict[str, str]
     default_filenames: dict[str, str]
     protected_columns: list[str]
     column_mapping: dict[str, str]
@@ -61,8 +63,8 @@ class ResultReader:
     sample_column_tags: list[str]
     def __init__(self):
-        self.data_directory: str = ""
-        self.filenames: dict[str, str] = {}
+        self.data_directory = ""
+        self.filenames = {}
     def _read_file(self, which: str, sep: str = "\t") -> pd.DataFrame:
         """Read a result table.
@@ -183,18 +185,16 @@ class MaxQuantReader(ResultReader):
         "MS/MS count",
         "Sequence coverage",
     ]
-    column_mapping: dict[str, str] = dict(
-        [
-            ("Peptides", "Total peptides"),
-            ("Sequence coverage [%]", "Sequence coverage"),
-            ("MS/MS count", "Spectral count Combined"),  # proteinGroups, evidence
-            ("MS/MS Count", "Spectral count Combined"),  # peptides
-            ("Sequence", "Peptide sequence"),  # peptides, evidence
-            ("Sequence length", "Protein length"),
-            ("Mol. weight [kDa]", "Molecular weight [kDa]"),
-            ("Experiment", "Sample"),
-        ]
-    )
+    column_mapping: dict[str, str] = {
+        "Peptides": "Total peptides",
+        "Sequence coverage [%]": "Sequence coverage",
+        "MS/MS count": "Spectral count Combined",  # proteinGroups, evidence
+        "MS/MS Count": "Spectral count Combined",  # peptides
+        "Sequence": "Peptide sequence",  # peptides, evidence
+        "Sequence length": "Protein length",
+        "Mol. weight [kDa]": "Molecular weight [kDa]",
+        "Experiment": "Sample",
+    }
     column_tag_mapping: OrderedDict[str, str] = OrderedDict(
         [("MS/MS count", "Spectral count"), ("iBAQ", "iBAQ intensity")]
     )
@@ -590,20 +590,18 @@ class FragPipeReader(ResultReader):
         "Intensity",
         "MaxLFQ Intensity",
     ]
-    column_mapping: dict[str, str] = dict(
-        [
-            ("Peptide Sequence", "Peptide sequence"),  # Peptide and ion
-            ("Modified Sequence", "Modified sequence"),  # Modified peptide and ion
-            ("Start", "Start position"),  # Peptide and ion
-            ("End", "End position"),  # Peptide and ion
-            ("Combined Total Peptides", "Total peptides"),  # From LFQ
-            ("Total Peptides", "Total peptides"),  # From TMT
-            ("Description", "Protein name"),
-            ("Protein Length", "Protein length"),
-            ("Entry Name", "Protein entry name"),
-            ("Gene", "Gene name"),
-        ]
-    )
+    column_mapping: dict[str, str] = {
+        "Peptide Sequence": "Peptide sequence",  # Peptide and ion
+        "Modified Sequence": "Modified sequence",  # Modified peptide and ion
+        "Start": "Start position",  # Peptide and ion
+        "End": "End position",  # Peptide and ion
+        "Combined Total Peptides": "Total peptides",  # From LFQ
+        "Total Peptides": "Total peptides",  # From TMT
+        "Description": "Protein name",
+        "Protein Length": "Protein length",
+        "Entry Name": "Protein entry name",
+        "Gene": "Gene name",
+    }
     column_tag_mapping: OrderedDict[str, str] = OrderedDict(
         [
             ("MaxLFQ Intensity", "LFQ intensity"),
@@ -1038,40 +1036,32 @@ class SpectronautReader(ResultReader):
         "design": "conditionsetup",
     }
     protected_columns: list[str] = []
-    column_mapping: dict[str, str] = dict(
-        [
-            ("R.FileName", "Filename"),
-            ("R.Label", "Sample"),
-            ("PG.Qvalue", "Protein qvalue"),
-            ("PG.Cscore", "Protein cscore"),
-            ("PG.NrOfStrippedSequencesIdentified (Experiment-wide)", "Total peptides"),
-            ("PG.NrOfPrecursorsIdentified (Experiment-wide)", "Total ions"),
-            ("PG.Cscore", "Cscore"),
-            ("PEP.StrippedSequence", "Peptide sequence"),
-            ("PEP.AllOccurringProteinAccessions", "Mapped proteins"),
-            ("EG.ModifiedSequence", "Modified sequence"),
-            ("EG.CompensationVoltage", "Compensation voltage"),
-            ("EG.Qvalue", "Qvalue"),
-            ("EG.ApexRT", "Apex retention time"),
-            ("EG.DatapointsPerPeak", "Datapoints per peak"),
-            ("EG.FWHM", "FWHM"),
-            ("EG.SignalToNoise", "Signal to noise"),
-            ("FG.FragmentCount", "Fragment count"),
-            ("FG.Charge", "Charge"),
-            ("FG.MS1Quantity", "MS1 intensity"),
-            ("FG.MS1RawQuantity", "MS1 raw intensity"),
-            ("FG.MS2Quantity", "MS2 intensity"),
-            ("FG.MS2RawQuantity", "MS2 raw intensity"),
-            ("FG.MeasuredMz", "Observed m/z"),
-            ("FG.TheoreticalMz", "Theoretical m/z"),
-            ("FG.CalibratedMz", "Calibrated m/z"),
-            # ("PG.ProteinAccessions", ""),
-            # ("EG.HasLocalizationInformation", ""),
-            # ("EG.PTMLocalizationProbabilities", ""),
-            # ("EG.UsedForProteinGroupQuantity", ""),
-            # Modified peptides need to be parsed and rewritten
-        ]
-    )
+    column_mapping: dict[str, str] = {
+        "R.FileName": "Filename",
+        "R.Label": "Sample",
+        "PG.Qvalue": "Protein qvalue",
+        "PG.Cscore": "Protein cscore",
+        "PG.NrOfStrippedSequencesIdentified (Experiment-wide)": "Total peptides",
+        "PG.NrOfPrecursorsIdentified (Experiment-wide)": "Total ions",
+        "PEP.StrippedSequence": "Peptide sequence",
+        "PEP.AllOccurringProteinAccessions": "Mapped proteins",
+        "EG.ModifiedSequence": "Modified sequence",
+        "EG.CompensationVoltage": "Compensation voltage",
+        "EG.Qvalue": "Qvalue",
+        "EG.ApexRT": "Apex retention time",
+        "EG.DatapointsPerPeak": "Datapoints per peak",
+        "EG.FWHM": "FWHM",
+        "EG.SignalToNoise": "Signal to noise",
+        "FG.FragmentCount": "Fragment count",
+        "FG.Charge": "Charge",
+        "FG.MS1Quantity": "MS1 intensity",
+        "FG.MS1RawQuantity": "MS1 raw intensity",
+        "FG.MS2Quantity": "MS2 intensity",
+        "FG.MS2RawQuantity": "MS2 raw intensity",
+        "FG.MeasuredMz": "Observed m/z",
+        "FG.TheoreticalMz": "Theoretical m/z",
+        "FG.CalibratedMz": "Calibrated m/z",
+    }
     sample_column_tags: list[str] = [
         ".PG.NrOfPrecursorsIdentified",
         ".PG.IBAQ",
@@ -1324,7 +1314,7 @@ class SpectronautReader(ResultReader):
         filename: Optional[str] = None,
         filetag: Optional[str] = None,
         rename_columns: bool = True,
-    ) -> None:
+    ) -> pd.DataFrame:
         """Reads an ion evidence file (long format) and returns a processed dataframe.
         Adds new columns to comply with the MsReport convention. "Protein reported
@@ -1462,7 +1452,7 @@ def sort_leading_proteins(
     if penalize_contaminants is not None:
         contaminant_encoding = {"False": 0, "True": 1, False: 0, True: 1}
-    for idx, row in table.iterrows():
+    for _, row in table.iterrows():
         protein_ids = row["Leading proteins"].split(";")
         sorting_info = [[] for _ in protein_ids]
@@ -1559,6 +1549,7 @@ def add_protein_annotation(
         warnings.warn(
             f"Some proteins could not be annotated: {repr(proteins_not_in_db)}",
             ProteinsNotInFastaWarning,
+            stacklevel=2,
         )
     annotations = {}
@@ -1636,9 +1627,10 @@ def add_protein_site_annotation(
         warnings.warn(
             f"Some proteins could not be annotated: {repr(proteins_not_in_db)}",
             ProteinsNotInFastaWarning,
+            stacklevel=2,
         )
-    annotations = {
+    annotations: dict[str, list[str]] = {
         "Modified residue": [],
         "Sequence window": [],
     }
@@ -1702,6 +1694,7 @@ def add_leading_proteins_annotation(
         warnings.warn(
             f"Some proteins could not be annotated: {repr(proteins_not_in_db)}",
             ProteinsNotInFastaWarning,
+            stacklevel=2,
         )
     annotations = {}
@@ -1853,7 +1846,7 @@ def add_peptide_positions(
             find matching entries in the FASTA files.
     """
     # not tested #
-    peptide_positions = {"Start position": [], "End position": []}
+    peptide_positions: dict[str, list[int]] = {"Start position": [], "End position": []}
     proteins_not_in_db = []
     for peptide, protein_id in zip(table[peptide_column], table[protein_column]):
         if protein_id in protein_db:
@@ -1875,6 +1868,7 @@ def add_peptide_positions(
         warnings.warn(
             f"Some peptides could not be annotated: {repr(proteins_not_in_db)}",
             ProteinsNotInFastaWarning,
+            stacklevel=2,
         )
@@ -1894,10 +1888,10 @@ def add_protein_modifications(table: pd.DataFrame):
             for peptide_site, mod in [m.split(":") for m in mod_entry.split(";")]:
                 protein_site = int(peptide_site) + start_pos - 1
                 protein_mods.append([str(protein_site), mod])
-            protein_mods = ";".join([f"{pos}:{mod}" for pos, mod in protein_mods])
+            protein_mod_string = ";".join([f"{pos}:{mod}" for pos, mod in protein_mods])
         else:
-            protein_mods = ""
-        protein_modification_entries.append(protein_mods)
+            protein_mod_string = ""
+        protein_modification_entries.append(protein_mod_string)
     table["Protein modifications"] = protein_modification_entries
@@ -2074,7 +2068,7 @@ def _process_protein_entries(
         A dataframe containing the columns "Protein reported by software",
         "Leading proteins", "Representative protein", and "Potential contaminant".
     """
-    new_entries = {
+    new_entries: dict[str, list[str | bool]] = {
         "Protein reported by software": [],
         "Representative protein": [],
         "Potential contaminant": [],
@@ -2189,7 +2183,7 @@ def extract_fragpipe_localization_probabilities(localization_entry: str) -> dict
     ... )
     {'15.9949': {3: 1.0}, '79.9663': {4: 0.334, 6: 0.666}}
     """
-    modification_probabilities = {}
+    modification_probabilities: dict[str, dict[int, float]] = {}
     for modification_entry in filter(None, localization_entry.split(";")):
         specified_modification, probability_sequence = modification_entry.split("@")
         _, modification = specified_modification.split(":")
@@ -2247,7 +2241,7 @@ def _create_protein_annotations_from_db(
     protein_db: ProteinDatabase,
     query_function: Callable,
     default_value: Any,
-) -> list[str]:
+) -> list[Any]:
     """Returns a list of multi protein entry annotations.
     Used to generate protein annotations for protein entries. For each protein id an
@@ -2274,9 +2268,9 @@ def _create_protein_annotations_from_db(
         if protein_id in protein_db:
             db_entry = protein_db[protein_id]
             query_result = query_function(db_entry, default_value)
+            annotation_values.append(query_result)
         else:
-            query_result = default_value
-        annotation_values.append(query_result)
+            annotation_values.append(default_value)
     return annotation_values

msreport/rinterface/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
-""" Python interface to custome R scripts. """
+"""Python interface to custome R scripts."""
 from .limma import multi_group_limma, two_group_limma
 from .rinstaller import r_package_version

msreport/rinterface/limma.py CHANGED Viewed

@@ -1,4 +1,5 @@
-""" Python interface to custome R scripts. """
+"""Python interface to custome R scripts."""
 import os
 import pandas as pd

msreport/rinterface/rinstaller.py CHANGED Viewed

@@ -1,9 +1,9 @@
-from rpy2.robjects.packages import importr
-import rpy2.robjects.packages as rpackages
 import rpy2.robjects as robjects
+import rpy2.robjects.packages as rpackages
+from rpy2.robjects.packages import importr
-def r_package_version(package_name: str) -> (str, str):
+def r_package_version(package_name: str) -> str:
     """Returns the version number of an installed R package."""
     with robjects.conversion.localconverter(robjects.default_converter):
         utils = importr("utils")

{msreport-0.0.26.dist-info → msreport-0.0.28.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: msreport
-Version: 0.0.26
+Version: 0.0.28
 Summary: Post processing and analysis of quantitative proteomics data
 Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
 License: Apache-2.0
@@ -19,11 +19,15 @@ Requires-Dist: pandas>=1.4.4
 Requires-Dist: profasta>=0.0.4
 Requires-Dist: pyteomics>=4.6.0
 Requires-Dist: pyyaml>=6.0.0
-Requires-Dist: rpy2>=3.5.3
+Requires-Dist: rpy2!=3.5.13,>=3.5.3
 Requires-Dist: scikit-learn>=1.0.0
 Requires-Dist: scipy>=1.9.1
 Requires-Dist: seaborn>=0.12.0
 Requires-Dist: statsmodels>=0.13.2
+Requires-Dist: typing_extensions>=4
+Provides-Extra: dev
+Requires-Dist: mypy>=1.15.0; extra == "dev"
+Requires-Dist: pytest>=8.3.5; extra == "dev"
 Dynamic: license-file
 [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
@@ -117,7 +121,7 @@ command as described above.
 ### Additional requirements
 MsReport provides an interface to the R package LIMMA for differential expression
-analysis, which requires a local installation of R (R version 3.4 or higher) and the
+analysis, which requires a local installation of R (R version 4.0 or higher) and the
 system environment variable "R_HOME" to be set to the R home directory. Note that it
 might be necessary to restart the computer after adding the "R_HOME" variable. The R
 home directory can also be found from within R by using the command below, and might

msreport-0.0.28.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,38 @@
+msreport/__init__.py,sha256=5-d_i-t9A3MV7hC-3z_vcWzaSAJSGY5T6McCBr4UGfc,339
+msreport/analyze.py,sha256=zNs0Vc2ODTfdiX6rSr79jXLJIh-6N11WH-vZpQzKDTE,30889
+msreport/errors.py,sha256=algGlR5iD9Q0U6Q3m25IwZryl9smtlPHsfhAL35PChc,295
+msreport/export.py,sha256=YvY3Nly5JC2CUM-JY1gydU1g2eqnennzToZfQQ5phO0,20156
+msreport/fasta.py,sha256=eXTmA4WGX4dT9wcTw7AdrvybLWG47p7ur48CxIjxjfg,1161
+msreport/impute.py,sha256=bf2Zy8VQNJ0Oh1sKn84Xp9iV5svi_Hp7iHxwRrFBwsI,10327
+msreport/isobar.py,sha256=m6NhLaKBiItIXuBhly_z2wEslxQGFC2f3-e1bzYXB78,6575
+msreport/normalize.py,sha256=K1x3DjL5Rep3t_eDIKIghMr0sAJiROnX6skHnOMPZ_k,20160
+msreport/peptidoform.py,sha256=26USj6WPrMgMIc7LttQ2n6Oq5jo1o7ayUQLR6gsRmZY,12015
+msreport/qtable.py,sha256=0e-TXmuiKBU6W5TL3tz06nNrjtEyT-CI9bvUq8W6qME,26768
+msreport/reader.py,sha256=ja4q8XtOHR_A6RL8ho-c6aGCVu1kzyhvil8ymiPx3PY,104612
+msreport/aggregate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+msreport/aggregate/condense.py,sha256=eIh5A3RUvXrmoFUjRXagiPl0m-ucuRwYD8kDBI7voVs,5862
+msreport/aggregate/pivot.py,sha256=rn8li-FrtOZS4oWA8COk0uV2m71GCEbNu1ALNoMuHOA,5081
+msreport/aggregate/summarize.py,sha256=aYXi_i7MkqjA8k9WWpOgn029TeJ3H5Qo899msDVw89M,12165
+msreport/helper/__init__.py,sha256=UbBHKMcapSXCyNmfQm6rg-2OgS303txkgILtboE05KI,535
+msreport/helper/calc.py,sha256=J4XltEnMrFR9IQlPtrZhyxlSTj15072huHCMA_nqQ6E,4245
+msreport/helper/maxlfq.py,sha256=EP1UjV3IAz4NSpGOQSsWGbuxtGLmtw92dvXUwgBYmF0,14943
+msreport/helper/table.py,sha256=x-Wo8mTENsUxc_gtF-wgOyQa9g7W2fK6tuRiEX7bda0,11430
+msreport/helper/temp.py,sha256=jNulgDATf9sKXEFWMXAhjflciOZPAqlxg_7QZS7IkW8,3736
+msreport/plot/__init__.py,sha256=SnoQORfrjgz9SmqPZ-1J1aeVC5xu-cFfZINP4aYVCmY,1488
+msreport/plot/_partial_plots.py,sha256=tqZTSXEPuruMgVakaGR2tUQl5OrHgo2cROJ0S4cqkR0,5598
+msreport/plot/comparison.py,sha256=J8zWyQrzx7rxDLxeZQkfAlcSmLY3e_7wwPG-cGuWo2M,18564
+msreport/plot/distribution.py,sha256=a2Rw6HxQwGfDwRSy8dwpT7zvEQ968wYHjcVPOdXI3l8,10150
+msreport/plot/multivariate.py,sha256=0xzxggqbIGQYOfgiij93DTRWfG6GvvhqI9u1GNPHarY,13111
+msreport/plot/quality.py,sha256=dIo_dpdexEN_vp35WpUTt626E-QJ2qNbJmjUai_8uck,15861
+msreport/plot/style.py,sha256=67jWf4uA1ub9RJDu4xhuSoXAW0lbLj6SMP4QXQO76Pc,10591
+msreport/plot/style_sheets/msreport-notebook.mplstyle,sha256=SPYO_7vYT8Ha7tQ0KCTLtykiRQ13-_igAm7kyvsZj1I,1266
+msreport/plot/style_sheets/seaborn-whitegrid.mplstyle,sha256=eC8Zboy8R7ybBwbHPKvKbMIHACystN6X6I0lqm7B80U,833
+msreport/rinterface/__init__.py,sha256=g29j2cIrc71qBdF4Zys51feoXlC0dP6YcTIscPTqPdI,146
+msreport/rinterface/limma.py,sha256=fxYRUkkJKI-JpDvivjWj8bUS0ug7RRTMnaf2UOgRsXQ,5421
+msreport/rinterface/rinstaller.py,sha256=AGs6NFMSwTLrzrIJz1E5BE5jFUz8eQBHlpM_MWVChzA,1370
+msreport/rinterface/rscripts/limma.R,sha256=gr_yjMm_YoG45irDhWOo6gkRQSTwj_7uU_p3NBRHPm8,4331
+msreport-0.0.28.dist-info/licenses/LICENSE.txt,sha256=Pd-b5cKP4n2tFDpdx27qJSIq0d1ok0oEcGTlbtL6QMU,11560
+msreport-0.0.28.dist-info/METADATA,sha256=IVyUd3ZATwccffCWbgYYmUmPe8Y4vJvwZC6oMFuBBfw,5497
+msreport-0.0.28.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
+msreport-0.0.28.dist-info/top_level.txt,sha256=Drl8mCckJHFIw-Ovh5AnyjKnqvLJltDOBUr1JAcHAlI,9
+msreport-0.0.28.dist-info/RECORD,,

msreport 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl

msreport 0.0.26py3-none-any.whl → 0.0.28py3-none-any.whl