PyPI - msreport - Versions diffs - 0.0.24__tar.gz → 0.0.26__tar.gz - Mend

msreport 0.0.24tar.gz → 0.0.26tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{msreport-0.0.24 → msreport-0.0.26}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: msreport
-Version: 0.0.24
+Version: 0.0.26
 Summary: Post processing and analysis of quantitative proteomics data
 Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
 License: Apache-2.0

{msreport-0.0.24 → msreport-0.0.26}/msreport/__init__.py RENAMED Viewed

@@ -10,4 +10,4 @@ import msreport.normalize
 import msreport.plot
 import msreport.reader
-__version__ = "0.0.24"
+__version__ = "0.0.26"

{msreport-0.0.24 → msreport-0.0.26}/msreport/helper/table.py RENAMED Viewed

@@ -176,12 +176,22 @@ def find_sample_columns(
             columns.
     Returns:
-        A list of sample column names.
+        A list of column names containing the substring and any entry of 'samples'.
+        Columns are returned in the order of entries in 'samples'.
     """
+    WHITESPACE_CHARS = " ."
     matched_columns = []
-    for column in find_columns(table, substring):
-        if any([sample in column for sample in samples]):
-            matched_columns.append(column)
+    substring_columns = find_columns(table, substring)
+    for sample in samples:
+        sample_columns = [c for c in substring_columns if sample in c]
+        for col in sample_columns:
+            column_remainder = (
+                col.replace(substring, "").replace(sample, "").strip(WHITESPACE_CHARS)
+            )
+            if column_remainder == "":
+                matched_columns.append(col)
+                break
     return matched_columns

{msreport-0.0.24 → msreport-0.0.26}/msreport/plot.py RENAMED Viewed

@@ -718,7 +718,10 @@ def volcano_ma(
         )
         ax.set_xlabel(x_variable)
-        ax.set_ylabel(f"{y_variable} [-log10]")
+        if y_variable == pvalue_tag:
+            ax.set_ylabel(f"{y_variable} [-log10]")
+        else:
+            ax.set_ylabel(f"{y_variable} [log2]")
     fig.tight_layout()
     return fig, axes
@@ -1075,13 +1078,13 @@ def pvalue_histogram(
         )
         # Adjust x- and y-axis
-        ax.set_xlabel(None)
         ax.set_xticks(np.arange(0, 1.01, 0.5))
         ax.tick_params(labelsize=9)
         if plot_number > 0:
             ax.tick_params(axis="y", color="none")
-        # Add second label
+        # Add x-label and second y-label
+        ax.set_xlabel(pvalue_tag, fontsize=9)
         ax2 = ax.twinx()
         ax2.set_yticks([])
         ax2.set_ylabel(comparison_group, fontsize=9)

{msreport-0.0.24 → msreport-0.0.26}/msreport/qtable.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from __future__ import annotations
-from typing import Any, Optional
+from typing import Any, Iterable, Optional
 import os
 import warnings
@@ -196,8 +196,9 @@ class Qtable:
         columns = helper.find_sample_columns(self.data, tag, samples)
         table = self.get_data(exclude_invalid=exclude_invalid)[columns]
         if samples_as_columns:
-            mapping = _str_to_substr_mapping(columns, samples)
-            table.rename(columns=mapping, inplace=True)
+            sample_to_columns = _match_samples_to_tag_columns(samples, columns, tag)
+            columns_to_samples = {v: k for k, v in sample_to_columns.items()}
+            table.rename(columns=columns_to_samples, inplace=True)
         return table
     def make_expression_table(
@@ -502,15 +503,30 @@ def _exclude_invalid(df: pd.DataFrame) -> pd.DataFrame:
     return df[df["Valid"]].copy()
-def _str_to_substr_mapping(strings, substrings) -> dict[str, str]:
-    """Mapping of strings to substrings.
+def _match_samples_to_tag_columns(
+    samples: Iterable[str],
+    columns: Iterable[str],
+    tag: str,
+) -> dict:
+    """Mapping of samples to columns which contain the sample and the tag.
-    Strings point to a matching substring. If multiple substrings are found in a string,
-    only one is reported.
+    Args:
+        samples: A list of sample names.
+        columns: A list of column names.
+        tag: A string that must be present in the column names.
+    Returns:
+        A dictionary that maps sample names to column names that contain the sample
+        name and the tag.
     """
+    WHITESPACE_CHARS = " ."
     mapping = dict()
-    for sub in substrings:
-        mapping.update({s: sub for s in strings if sub in s})
+    for sample in samples:
+        for col in columns:
+            if col.replace(tag, "").replace(sample, "").strip(WHITESPACE_CHARS) == "":
+                mapping[sample] = col
+                break
     return mapping

{msreport-0.0.24 → msreport-0.0.26}/msreport/reader.py RENAMED Viewed

@@ -2196,9 +2196,11 @@ def extract_fragpipe_localization_probabilities(localization_entry: str) -> dict
         _, probabilities = msreport.peptidoform.parse_modified_sequence(
             probability_sequence, "(", ")"
         )
-        modification_probabilities[modification] = {
-            site: float(probability) for site, probability in probabilities
-        }
+        if modification not in modification_probabilities:
+            modification_probabilities[modification] = {}
+        modification_probabilities[modification].update(
+            {site: float(probability) for site, probability in probabilities}
+        )
     return modification_probabilities

{msreport-0.0.24 → msreport-0.0.26}/msreport.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: msreport
-Version: 0.0.24
+Version: 0.0.26
 Summary: Post processing and analysis of quantitative proteomics data
 Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
 License: Apache-2.0

{msreport-0.0.24 → msreport-0.0.26}/tests/test_helper.py RENAMED Viewed

@@ -19,20 +19,35 @@ class TestFindColumns:
         assert columns == ["Test A", "Test B"]
-def test_find_sample_columns():
-    df = pd.DataFrame(
-        columns=[
-            "Test",
-            "Test Not_a_sample",
-            "Test Sample_A",
-            "Test Sample_B",
-            "Something else",
-        ]
-    )
-    samples = ["Sample_A", "Sample_B"]
-    tag = "Test"
-    columns = msreport.helper.find_sample_columns(df, tag, samples)
-    assert columns == ["Test Sample_A", "Test Sample_B"]
+class TestFindSampleColumns:
+    def test_find_sample_columns(self):
+        df = pd.DataFrame(
+            columns=[
+                "Tag",
+                "Tag Not_a_sample",
+                "Tag Sample_A",
+                "Tag Sample_B",
+                "Something else",
+            ]
+        )
+        samples = ["Sample_A", "Sample_B"]
+        tag = "Tag"
+        columns = msreport.helper.find_sample_columns(df, tag, samples)
+        assert columns == ["Tag Sample_A", "Tag Sample_B"]
+    def test_columns_are_returned_in_order_of_samples(self):
+        df = pd.DataFrame(columns=["Tag Sample_B", "Tag Sample_A"])
+        samples = ["Sample_A", "Sample_B"]
+        tag = "Tag"
+        columns = msreport.helper.find_sample_columns(df, tag, samples)
+        assert columns == ["Tag Sample_A", "Tag Sample_B"]
+    def test_correct_mapping_with_samples_that_are_substrings(self):
+        df = pd.DataFrame(columns=["Tag SampleB_1", "Tag B_1"])
+        samples = ["B_1"]
+        tag = "Tag"
+        columns = msreport.helper.find_sample_columns(df, tag, samples)
+        assert columns == ["Tag B_1"]
 class TestKeepRowsByPartialMatch:

{msreport-0.0.24 → msreport-0.0.26}/tests/test_qtable.py RENAMED Viewed

@@ -84,15 +84,24 @@ class TestExcludeInvalid:
             msreport.qtable._exclude_invalid(df)
-def test_str_to_substr_mapping():
-    strings = ["Tag SampleB_1", "Tag SampleA_1", "Tag SampleA_2"]
-    substrs = ["SampleA_1", "SampleB_1", "SampleA_2"]
-    true_mapping = {
-        "Tag SampleA_1": "SampleA_1",
-        "Tag SampleA_2": "SampleA_2",
-        "Tag SampleB_1": "SampleB_1",
-    }
-    assert msreport.qtable._str_to_substr_mapping(strings, substrs) == true_mapping
+class TestMatchSamplesToTagColumns:
+    def test_match_samples_to_tag_columns(self):
+        samples = ["SampleA_1", "SampleA_2", "SampleB_1"]
+        columns = ["Tag SampleA_1", "Tag SampleA_2", "Tag SampleB_1"]
+        true_mapping = {
+            "SampleA_1": "Tag SampleA_1",
+            "SampleA_2": "Tag SampleA_2",
+            "SampleB_1": "Tag SampleB_1",
+        }
+        observed_mapping = msreport.qtable._match_samples_to_tag_columns(samples, columns, "Tag")  # fmt:skip
+        assert observed_mapping == true_mapping
+    def test_correct_mapping_with_samples_that_are_substrings(self):
+        samples = ["B_1", "SampleB_1"]
+        columns = ["Tag B_1", "Tag SampleB_1"]
+        true_mapping = {"B_1": "Tag B_1", "SampleB_1": "Tag SampleB_1"}
+        observed_mapping = msreport.qtable._match_samples_to_tag_columns(samples, columns, "Tag")  # fmt:skip
+        assert observed_mapping == true_mapping
 def test_qtable_setup():