msreport 0.0.24__tar.gz → 0.0.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {msreport-0.0.24 → msreport-0.0.26}/PKG-INFO +1 -1
  2. {msreport-0.0.24 → msreport-0.0.26}/msreport/__init__.py +1 -1
  3. {msreport-0.0.24 → msreport-0.0.26}/msreport/helper/table.py +14 -4
  4. {msreport-0.0.24 → msreport-0.0.26}/msreport/plot.py +6 -3
  5. {msreport-0.0.24 → msreport-0.0.26}/msreport/qtable.py +25 -9
  6. {msreport-0.0.24 → msreport-0.0.26}/msreport/reader.py +5 -3
  7. {msreport-0.0.24 → msreport-0.0.26}/msreport.egg-info/PKG-INFO +1 -1
  8. {msreport-0.0.24 → msreport-0.0.26}/tests/test_helper.py +29 -14
  9. {msreport-0.0.24 → msreport-0.0.26}/tests/test_qtable.py +18 -9
  10. {msreport-0.0.24 → msreport-0.0.26}/LICENSE.txt +0 -0
  11. {msreport-0.0.24 → msreport-0.0.26}/README.md +0 -0
  12. {msreport-0.0.24 → msreport-0.0.26}/msreport/aggregate/__init__.py +0 -0
  13. {msreport-0.0.24 → msreport-0.0.26}/msreport/aggregate/condense.py +0 -0
  14. {msreport-0.0.24 → msreport-0.0.26}/msreport/aggregate/pivot.py +0 -0
  15. {msreport-0.0.24 → msreport-0.0.26}/msreport/aggregate/summarize.py +0 -0
  16. {msreport-0.0.24 → msreport-0.0.26}/msreport/analyze.py +0 -0
  17. {msreport-0.0.24 → msreport-0.0.26}/msreport/errors.py +0 -0
  18. {msreport-0.0.24 → msreport-0.0.26}/msreport/export.py +0 -0
  19. {msreport-0.0.24 → msreport-0.0.26}/msreport/fasta.py +0 -0
  20. {msreport-0.0.24 → msreport-0.0.26}/msreport/helper/__init__.py +0 -0
  21. {msreport-0.0.24 → msreport-0.0.26}/msreport/helper/calc.py +0 -0
  22. {msreport-0.0.24 → msreport-0.0.26}/msreport/helper/maxlfq.py +0 -0
  23. {msreport-0.0.24 → msreport-0.0.26}/msreport/helper/temp.py +0 -0
  24. {msreport-0.0.24 → msreport-0.0.26}/msreport/impute.py +0 -0
  25. {msreport-0.0.24 → msreport-0.0.26}/msreport/isobar.py +0 -0
  26. {msreport-0.0.24 → msreport-0.0.26}/msreport/normalize.py +0 -0
  27. {msreport-0.0.24 → msreport-0.0.26}/msreport/peptidoform.py +0 -0
  28. {msreport-0.0.24 → msreport-0.0.26}/msreport/rinterface/__init__.py +0 -0
  29. {msreport-0.0.24 → msreport-0.0.26}/msreport/rinterface/limma.py +0 -0
  30. {msreport-0.0.24 → msreport-0.0.26}/msreport/rinterface/rinstaller.py +0 -0
  31. {msreport-0.0.24 → msreport-0.0.26}/msreport/rinterface/rscripts/limma.R +0 -0
  32. {msreport-0.0.24 → msreport-0.0.26}/msreport.egg-info/SOURCES.txt +0 -0
  33. {msreport-0.0.24 → msreport-0.0.26}/msreport.egg-info/dependency_links.txt +0 -0
  34. {msreport-0.0.24 → msreport-0.0.26}/msreport.egg-info/requires.txt +0 -0
  35. {msreport-0.0.24 → msreport-0.0.26}/msreport.egg-info/top_level.txt +0 -0
  36. {msreport-0.0.24 → msreport-0.0.26}/pyproject.toml +0 -0
  37. {msreport-0.0.24 → msreport-0.0.26}/setup.cfg +0 -0
  38. {msreport-0.0.24 → msreport-0.0.26}/setup.py +0 -0
  39. {msreport-0.0.24 → msreport-0.0.26}/tests/test_analyze.py +0 -0
  40. {msreport-0.0.24 → msreport-0.0.26}/tests/test_export.py +0 -0
  41. {msreport-0.0.24 → msreport-0.0.26}/tests/test_impute.py +0 -0
  42. {msreport-0.0.24 → msreport-0.0.26}/tests/test_isobar.py +0 -0
  43. {msreport-0.0.24 → msreport-0.0.26}/tests/test_maxlfq.py +0 -0
  44. {msreport-0.0.24 → msreport-0.0.26}/tests/test_peptidoform.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: msreport
3
- Version: 0.0.24
3
+ Version: 0.0.26
4
4
  Summary: Post processing and analysis of quantitative proteomics data
5
5
  Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
6
6
  License: Apache-2.0
@@ -10,4 +10,4 @@ import msreport.normalize
10
10
  import msreport.plot
11
11
  import msreport.reader
12
12
 
13
- __version__ = "0.0.24"
13
+ __version__ = "0.0.26"
@@ -176,12 +176,22 @@ def find_sample_columns(
176
176
  columns.
177
177
 
178
178
  Returns:
179
- A list of sample column names.
179
+ A list of column names containing the substring and any entry of 'samples'.
180
+ Columns are returned in the order of entries in 'samples'.
180
181
  """
182
+ WHITESPACE_CHARS = " ."
183
+
181
184
  matched_columns = []
182
- for column in find_columns(table, substring):
183
- if any([sample in column for sample in samples]):
184
- matched_columns.append(column)
185
+ substring_columns = find_columns(table, substring)
186
+ for sample in samples:
187
+ sample_columns = [c for c in substring_columns if sample in c]
188
+ for col in sample_columns:
189
+ column_remainder = (
190
+ col.replace(substring, "").replace(sample, "").strip(WHITESPACE_CHARS)
191
+ )
192
+ if column_remainder == "":
193
+ matched_columns.append(col)
194
+ break
185
195
  return matched_columns
186
196
 
187
197
 
@@ -718,7 +718,10 @@ def volcano_ma(
718
718
  )
719
719
 
720
720
  ax.set_xlabel(x_variable)
721
- ax.set_ylabel(f"{y_variable} [-log10]")
721
+ if y_variable == pvalue_tag:
722
+ ax.set_ylabel(f"{y_variable} [-log10]")
723
+ else:
724
+ ax.set_ylabel(f"{y_variable} [log2]")
722
725
 
723
726
  fig.tight_layout()
724
727
  return fig, axes
@@ -1075,13 +1078,13 @@ def pvalue_histogram(
1075
1078
  )
1076
1079
 
1077
1080
  # Adjust x- and y-axis
1078
- ax.set_xlabel(None)
1079
1081
  ax.set_xticks(np.arange(0, 1.01, 0.5))
1080
1082
  ax.tick_params(labelsize=9)
1081
1083
  if plot_number > 0:
1082
1084
  ax.tick_params(axis="y", color="none")
1083
1085
 
1084
- # Add second label
1086
+ # Add x-label and second y-label
1087
+ ax.set_xlabel(pvalue_tag, fontsize=9)
1085
1088
  ax2 = ax.twinx()
1086
1089
  ax2.set_yticks([])
1087
1090
  ax2.set_ylabel(comparison_group, fontsize=9)
@@ -1,5 +1,5 @@
1
1
  from __future__ import annotations
2
- from typing import Any, Optional
2
+ from typing import Any, Iterable, Optional
3
3
  import os
4
4
  import warnings
5
5
 
@@ -196,8 +196,9 @@ class Qtable:
196
196
  columns = helper.find_sample_columns(self.data, tag, samples)
197
197
  table = self.get_data(exclude_invalid=exclude_invalid)[columns]
198
198
  if samples_as_columns:
199
- mapping = _str_to_substr_mapping(columns, samples)
200
- table.rename(columns=mapping, inplace=True)
199
+ sample_to_columns = _match_samples_to_tag_columns(samples, columns, tag)
200
+ columns_to_samples = {v: k for k, v in sample_to_columns.items()}
201
+ table.rename(columns=columns_to_samples, inplace=True)
201
202
  return table
202
203
 
203
204
  def make_expression_table(
@@ -502,15 +503,30 @@ def _exclude_invalid(df: pd.DataFrame) -> pd.DataFrame:
502
503
  return df[df["Valid"]].copy()
503
504
 
504
505
 
505
- def _str_to_substr_mapping(strings, substrings) -> dict[str, str]:
506
- """Mapping of strings to substrings.
506
+ def _match_samples_to_tag_columns(
507
+ samples: Iterable[str],
508
+ columns: Iterable[str],
509
+ tag: str,
510
+ ) -> dict:
511
+ """Mapping of samples to columns which contain the sample and the tag.
507
512
 
508
- Strings point to a matching substring. If multiple substrings are found in a string,
509
- only one is reported.
513
+ Args:
514
+ samples: A list of sample names.
515
+ columns: A list of column names.
516
+ tag: A string that must be present in the column names.
517
+
518
+ Returns:
519
+ A dictionary that maps sample names to column names that contain the sample
520
+ name and the tag.
510
521
  """
522
+ WHITESPACE_CHARS = " ."
523
+
511
524
  mapping = dict()
512
- for sub in substrings:
513
- mapping.update({s: sub for s in strings if sub in s})
525
+ for sample in samples:
526
+ for col in columns:
527
+ if col.replace(tag, "").replace(sample, "").strip(WHITESPACE_CHARS) == "":
528
+ mapping[sample] = col
529
+ break
514
530
  return mapping
515
531
 
516
532
 
@@ -2196,9 +2196,11 @@ def extract_fragpipe_localization_probabilities(localization_entry: str) -> dict
2196
2196
  _, probabilities = msreport.peptidoform.parse_modified_sequence(
2197
2197
  probability_sequence, "(", ")"
2198
2198
  )
2199
- modification_probabilities[modification] = {
2200
- site: float(probability) for site, probability in probabilities
2201
- }
2199
+ if modification not in modification_probabilities:
2200
+ modification_probabilities[modification] = {}
2201
+ modification_probabilities[modification].update(
2202
+ {site: float(probability) for site, probability in probabilities}
2203
+ )
2202
2204
  return modification_probabilities
2203
2205
 
2204
2206
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: msreport
3
- Version: 0.0.24
3
+ Version: 0.0.26
4
4
  Summary: Post processing and analysis of quantitative proteomics data
5
5
  Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
6
6
  License: Apache-2.0
@@ -19,20 +19,35 @@ class TestFindColumns:
19
19
  assert columns == ["Test A", "Test B"]
20
20
 
21
21
 
22
- def test_find_sample_columns():
23
- df = pd.DataFrame(
24
- columns=[
25
- "Test",
26
- "Test Not_a_sample",
27
- "Test Sample_A",
28
- "Test Sample_B",
29
- "Something else",
30
- ]
31
- )
32
- samples = ["Sample_A", "Sample_B"]
33
- tag = "Test"
34
- columns = msreport.helper.find_sample_columns(df, tag, samples)
35
- assert columns == ["Test Sample_A", "Test Sample_B"]
22
+ class TestFindSampleColumns:
23
+ def test_find_sample_columns(self):
24
+ df = pd.DataFrame(
25
+ columns=[
26
+ "Tag",
27
+ "Tag Not_a_sample",
28
+ "Tag Sample_A",
29
+ "Tag Sample_B",
30
+ "Something else",
31
+ ]
32
+ )
33
+ samples = ["Sample_A", "Sample_B"]
34
+ tag = "Tag"
35
+ columns = msreport.helper.find_sample_columns(df, tag, samples)
36
+ assert columns == ["Tag Sample_A", "Tag Sample_B"]
37
+
38
+ def test_columns_are_returned_in_order_of_samples(self):
39
+ df = pd.DataFrame(columns=["Tag Sample_B", "Tag Sample_A"])
40
+ samples = ["Sample_A", "Sample_B"]
41
+ tag = "Tag"
42
+ columns = msreport.helper.find_sample_columns(df, tag, samples)
43
+ assert columns == ["Tag Sample_A", "Tag Sample_B"]
44
+
45
+ def test_correct_mapping_with_samples_that_are_substrings(self):
46
+ df = pd.DataFrame(columns=["Tag SampleB_1", "Tag B_1"])
47
+ samples = ["B_1"]
48
+ tag = "Tag"
49
+ columns = msreport.helper.find_sample_columns(df, tag, samples)
50
+ assert columns == ["Tag B_1"]
36
51
 
37
52
 
38
53
  class TestKeepRowsByPartialMatch:
@@ -84,15 +84,24 @@ class TestExcludeInvalid:
84
84
  msreport.qtable._exclude_invalid(df)
85
85
 
86
86
 
87
- def test_str_to_substr_mapping():
88
- strings = ["Tag SampleB_1", "Tag SampleA_1", "Tag SampleA_2"]
89
- substrs = ["SampleA_1", "SampleB_1", "SampleA_2"]
90
- true_mapping = {
91
- "Tag SampleA_1": "SampleA_1",
92
- "Tag SampleA_2": "SampleA_2",
93
- "Tag SampleB_1": "SampleB_1",
94
- }
95
- assert msreport.qtable._str_to_substr_mapping(strings, substrs) == true_mapping
87
+ class TestMatchSamplesToTagColumns:
88
+ def test_match_samples_to_tag_columns(self):
89
+ samples = ["SampleA_1", "SampleA_2", "SampleB_1"]
90
+ columns = ["Tag SampleA_1", "Tag SampleA_2", "Tag SampleB_1"]
91
+ true_mapping = {
92
+ "SampleA_1": "Tag SampleA_1",
93
+ "SampleA_2": "Tag SampleA_2",
94
+ "SampleB_1": "Tag SampleB_1",
95
+ }
96
+ observed_mapping = msreport.qtable._match_samples_to_tag_columns(samples, columns, "Tag") # fmt:skip
97
+ assert observed_mapping == true_mapping
98
+
99
+ def test_correct_mapping_with_samples_that_are_substrings(self):
100
+ samples = ["B_1", "SampleB_1"]
101
+ columns = ["Tag B_1", "Tag SampleB_1"]
102
+ true_mapping = {"B_1": "Tag B_1", "SampleB_1": "Tag SampleB_1"}
103
+ observed_mapping = msreport.qtable._match_samples_to_tag_columns(samples, columns, "Tag") # fmt:skip
104
+ assert observed_mapping == true_mapping
96
105
 
97
106
 
98
107
  def test_qtable_setup():
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes