msreport 0.0.26__tar.gz → 0.0.27__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {msreport-0.0.26 → msreport-0.0.27}/PKG-INFO +2 -1
  2. {msreport-0.0.26 → msreport-0.0.27}/msreport/__init__.py +1 -1
  3. {msreport-0.0.26 → msreport-0.0.27}/msreport/analyze.py +79 -14
  4. {msreport-0.0.26 → msreport-0.0.27}/msreport/helper/calc.py +19 -13
  5. {msreport-0.0.26 → msreport-0.0.27}/msreport/impute.py +4 -6
  6. {msreport-0.0.26 → msreport-0.0.27}/msreport/normalize.py +53 -36
  7. {msreport-0.0.26 → msreport-0.0.27}/msreport/plot.py +9 -7
  8. {msreport-0.0.26 → msreport-0.0.27}/msreport.egg-info/PKG-INFO +2 -1
  9. {msreport-0.0.26 → msreport-0.0.27}/msreport.egg-info/SOURCES.txt +1 -0
  10. {msreport-0.0.26 → msreport-0.0.27}/msreport.egg-info/requires.txt +1 -0
  11. {msreport-0.0.26 → msreport-0.0.27}/pyproject.toml +1 -0
  12. {msreport-0.0.26 → msreport-0.0.27}/tests/test_analyze.py +43 -1
  13. msreport-0.0.27/tests/test_plot.py +121 -0
  14. {msreport-0.0.26 → msreport-0.0.27}/LICENSE.txt +0 -0
  15. {msreport-0.0.26 → msreport-0.0.27}/README.md +0 -0
  16. {msreport-0.0.26 → msreport-0.0.27}/msreport/aggregate/__init__.py +0 -0
  17. {msreport-0.0.26 → msreport-0.0.27}/msreport/aggregate/condense.py +0 -0
  18. {msreport-0.0.26 → msreport-0.0.27}/msreport/aggregate/pivot.py +0 -0
  19. {msreport-0.0.26 → msreport-0.0.27}/msreport/aggregate/summarize.py +0 -0
  20. {msreport-0.0.26 → msreport-0.0.27}/msreport/errors.py +0 -0
  21. {msreport-0.0.26 → msreport-0.0.27}/msreport/export.py +0 -0
  22. {msreport-0.0.26 → msreport-0.0.27}/msreport/fasta.py +0 -0
  23. {msreport-0.0.26 → msreport-0.0.27}/msreport/helper/__init__.py +0 -0
  24. {msreport-0.0.26 → msreport-0.0.27}/msreport/helper/maxlfq.py +0 -0
  25. {msreport-0.0.26 → msreport-0.0.27}/msreport/helper/table.py +0 -0
  26. {msreport-0.0.26 → msreport-0.0.27}/msreport/helper/temp.py +0 -0
  27. {msreport-0.0.26 → msreport-0.0.27}/msreport/isobar.py +0 -0
  28. {msreport-0.0.26 → msreport-0.0.27}/msreport/peptidoform.py +0 -0
  29. {msreport-0.0.26 → msreport-0.0.27}/msreport/qtable.py +0 -0
  30. {msreport-0.0.26 → msreport-0.0.27}/msreport/reader.py +0 -0
  31. {msreport-0.0.26 → msreport-0.0.27}/msreport/rinterface/__init__.py +0 -0
  32. {msreport-0.0.26 → msreport-0.0.27}/msreport/rinterface/limma.py +0 -0
  33. {msreport-0.0.26 → msreport-0.0.27}/msreport/rinterface/rinstaller.py +0 -0
  34. {msreport-0.0.26 → msreport-0.0.27}/msreport/rinterface/rscripts/limma.R +0 -0
  35. {msreport-0.0.26 → msreport-0.0.27}/msreport.egg-info/dependency_links.txt +0 -0
  36. {msreport-0.0.26 → msreport-0.0.27}/msreport.egg-info/top_level.txt +0 -0
  37. {msreport-0.0.26 → msreport-0.0.27}/setup.cfg +0 -0
  38. {msreport-0.0.26 → msreport-0.0.27}/setup.py +0 -0
  39. {msreport-0.0.26 → msreport-0.0.27}/tests/test_export.py +0 -0
  40. {msreport-0.0.26 → msreport-0.0.27}/tests/test_helper.py +0 -0
  41. {msreport-0.0.26 → msreport-0.0.27}/tests/test_impute.py +0 -0
  42. {msreport-0.0.26 → msreport-0.0.27}/tests/test_isobar.py +0 -0
  43. {msreport-0.0.26 → msreport-0.0.27}/tests/test_maxlfq.py +0 -0
  44. {msreport-0.0.26 → msreport-0.0.27}/tests/test_peptidoform.py +0 -0
  45. {msreport-0.0.26 → msreport-0.0.27}/tests/test_qtable.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: msreport
3
- Version: 0.0.26
3
+ Version: 0.0.27
4
4
  Summary: Post processing and analysis of quantitative proteomics data
5
5
  Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
6
6
  License: Apache-2.0
@@ -24,6 +24,7 @@ Requires-Dist: scikit-learn>=1.0.0
24
24
  Requires-Dist: scipy>=1.9.1
25
25
  Requires-Dist: seaborn>=0.12.0
26
26
  Requires-Dist: statsmodels>=0.13.2
27
+ Requires-Dist: typing_extensions>=4
27
28
  Dynamic: license-file
28
29
 
29
30
  [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
@@ -10,4 +10,4 @@ import msreport.normalize
10
10
  import msreport.plot
11
11
  import msreport.reader
12
12
 
13
- __version__ = "0.0.26"
13
+ __version__ = "0.0.27"
@@ -9,7 +9,7 @@ import pandas as pd
9
9
 
10
10
  import msreport.normalize
11
11
  import msreport.rinterface
12
- from msreport.qtable import Qtable
12
+ from msreport.helper import find_sample_columns
13
13
 
14
14
 
15
15
  class Transformer(Protocol):
@@ -33,8 +33,8 @@ class CategoryTransformer(Protocol):
33
33
  def transform(self, table: pd.DataFrame) -> pd.DataFrame:
34
34
  """Transform values in 'table'."""
35
35
 
36
- def get_category_column(self, table: pd.DataFrame) -> pd.DataFrame:
37
- """Returns the specified category column."""
36
+ def get_category_column(self) -> str:
37
+ """Returns the name of the category column."""
38
38
 
39
39
 
40
40
  def analyze_missingness(qtable: Qtable) -> None:
@@ -75,6 +75,7 @@ def analyze_missingness(qtable: Qtable) -> None:
75
75
  def validate_proteins(
76
76
  qtable: Qtable,
77
77
  min_peptides: int = 0,
78
+ min_spectral_counts: int = 0,
78
79
  remove_contaminants: bool = True,
79
80
  min_events: Optional[int] = None,
80
81
  max_missing: Optional[int] = None,
@@ -84,12 +85,13 @@ def validate_proteins(
84
85
  Adds an additional column "Valid" to the qtable, containing Boolean values.
85
86
 
86
87
  Requires expression columns to be set. Depending on the arguments requires the
87
- columns "Total peptides", "Potential contaminant", and the experiment columns
88
- "Missing experiment_name" and "Events experiment_name".
88
+ columns "Total peptides", "Spectral count Combined", "Potential contaminant", and
89
+ the experiment columns "Missing experiment_name" and "Events experiment_name".
89
90
 
90
91
  Args:
91
92
  qtable: A Qtable instance.
92
93
  min_peptides: Minimum number of unique peptides, default 0.
94
+ min_spectral_counts: Minimum number of combined spectral counts, default 0.
93
95
  remove_contaminants: If true, the "Potential contaminant" column is used to
94
96
  remove invalid entries, default True. If no "Potential contaminant" column
95
97
  is present 'remove_contaminants' is ignored.
@@ -107,6 +109,16 @@ def validate_proteins(
107
109
  [valid_entries, qtable["Total peptides"] >= min_peptides], axis=0
108
110
  )
109
111
 
112
+ if min_spectral_counts > 0:
113
+ if "Spectral count Combined" not in qtable:
114
+ raise KeyError(
115
+ "'Spectral count Combined' column not present in qtable.data"
116
+ )
117
+ valid_entries = np.all(
118
+ [valid_entries, qtable["Spectral count Combined"] >= min_spectral_counts],
119
+ axis=0,
120
+ )
121
+
110
122
  # TODO: not tested from here #
111
123
  if remove_contaminants:
112
124
  if "Potential contaminant" not in qtable:
@@ -138,6 +150,50 @@ def validate_proteins(
138
150
  qtable["Valid"] = valid_entries
139
151
 
140
152
 
153
+ def apply_transformer(
154
+ qtable: msreport.Qtable,
155
+ transformer: Transformer,
156
+ tag: str,
157
+ exclude_invalid: bool,
158
+ remove_invalid: bool,
159
+ new_tag: Optional[str] = None,
160
+ ) -> None:
161
+ """Applies a transformer to the values of a Qtable selected with the tag parameter.
162
+
163
+ Args:
164
+ qtable: A Qtable instance, to which the transformer is applied.
165
+ transformer: The transformer to apply.
166
+ tag: The tag used to identify the columns for applying the transformer.
167
+ exclude_invalid: Exclude invalid values from the transformation.
168
+ remove_invalid: Remove invalid values from the table after the transformation.
169
+ new_tag: Optional, if specified than the tag is replaced with this value in the
170
+ column names and the transformed data is stored to these new columns.
171
+ """
172
+ valid = qtable.data["Valid"]
173
+ samples = qtable.get_samples()
174
+ sample_columns = find_sample_columns(qtable.data, tag, samples)
175
+
176
+ if not sample_columns:
177
+ raise ValueError(f"No sample columns found for tag '{tag}'.")
178
+
179
+ if new_tag is not None:
180
+ sample_columns = [c.replace(tag, new_tag) for c in sample_columns]
181
+ column_mapping = dict(zip(samples, sample_columns))
182
+
183
+ data_table = qtable.make_sample_table(tag, samples_as_columns=True)
184
+
185
+ if exclude_invalid:
186
+ data_table[valid] = transformer.transform(data_table[valid])
187
+ else:
188
+ data_table = transformer.transform(data_table)
189
+
190
+ if remove_invalid:
191
+ data_table[~valid] = np.nan
192
+
193
+ data_table.columns = [column_mapping[s] for s in data_table.columns]
194
+ qtable.data[data_table.columns] = data_table
195
+
196
+
141
197
  def normalize_expression(
142
198
  qtable: Qtable,
143
199
  normalizer: Transformer,
@@ -168,11 +224,9 @@ def normalize_expression(
168
224
  raw_data = table[sample_columns]
169
225
  if not normalizer.is_fitted():
170
226
  if exclude_invalid:
171
- valid_mask = table["Valid"]
227
+ normalizer.fit(raw_data[table["Valid"]])
172
228
  else:
173
- valid_mask = np.ones_like(table["Valid"], dtype=bool)
174
- fit_data = raw_data[valid_mask]
175
- normalizer = normalizer.fit(fit_data)
229
+ normalizer = normalizer.fit(raw_data)
176
230
 
177
231
  transformed_data = normalizer.transform(raw_data)
178
232
  qtable[expression_columns] = transformed_data[sample_columns]
@@ -421,7 +475,7 @@ def two_group_comparison(
421
475
 
422
476
  def calculate_multi_group_limma(
423
477
  qtable: Qtable,
424
- experiment_pairs: list[list[str, str]],
478
+ experiment_pairs: Iterable[Iterable[str]],
425
479
  exclude_invalid: bool = True,
426
480
  batch: bool = False,
427
481
  limma_trend: bool = True,
@@ -454,6 +508,14 @@ def calculate_multi_group_limma(
454
508
  limma_trend: If true, an intensity-dependent trend is fitted to the prior
455
509
  variance during calculation of the moderated t-statistics, refer to
456
510
  limma.eBayes for details; default True.
511
+
512
+ Raises:
513
+ KeyError: If the "Batch" column is not present in the qtable.design when
514
+ 'batch' is set to True.
515
+ ValueError: If all values from qtable.design["Batch"] are identical when 'batch'
516
+ is set to True.
517
+ ValueError: If the same experiment pair has been specified multiple times in
518
+ 'experiment_pairs'.
457
519
  """
458
520
  # TODO: not tested #
459
521
  if batch and "Batch" not in qtable.get_design():
@@ -466,6 +528,11 @@ def calculate_multi_group_limma(
466
528
  "When using calculate_multi_group_limma(batch=True), not all values from"
467
529
  ' qtable.design["Batch"] are allowed to be identical.'
468
530
  )
531
+ if len(list(experiment_pairs)) != len(set(experiment_pairs)):
532
+ raise ValueError(
533
+ "The same experiment pair has been specified multiple times."
534
+ " Each entry in the `experiment_pairs` argument must be unique."
535
+ )
469
536
 
470
537
  design = qtable.get_design()
471
538
  table = qtable.make_expression_table(
@@ -504,7 +571,7 @@ def calculate_multi_group_limma(
504
571
  limma_result.rename(columns=mapping, inplace=True)
505
572
 
506
573
  limma_table = pd.DataFrame(index=table.index)
507
- limma_table = limma_table.join(limma_results.values())
574
+ limma_table = limma_table.join(list(limma_results.values()))
508
575
  limma_table.fillna(np.nan, inplace=True)
509
576
  qtable.add_expression_features(limma_table)
510
577
 
@@ -516,7 +583,7 @@ def calculate_multi_group_limma(
516
583
 
517
584
  def calculate_two_group_limma(
518
585
  qtable: Qtable,
519
- experiment_pair: list[str, str],
586
+ experiment_pair: list[str],
520
587
  exclude_invalid: bool = True,
521
588
  limma_trend: bool = True,
522
589
  ) -> None:
@@ -582,5 +649,3 @@ def calculate_two_group_limma(
582
649
  mapping = {col: f"{col} {comparison_group}" for col in limma_table.columns}
583
650
  limma_table.rename(columns=mapping, inplace=True)
584
651
  qtable.add_expression_features(limma_table)
585
-
586
- return limma_result
@@ -19,22 +19,28 @@ def mode(values: Iterable) -> float:
19
19
  Returns:
20
20
  The estimated mode. If no finite values are present, returns nan.
21
21
  """
22
- values = np.array(values)
22
+ values = np.asarray(values)
23
23
  finite_values = values[np.isfinite(values)]
24
24
  if len(finite_values) == 0:
25
- mode = np.nan
25
+ return np.nan
26
26
  elif len(np.unique(finite_values)) == 1:
27
- mode = np.unique(finite_values)[0]
28
- else:
29
- median = np.median(finite_values)
30
- bounds = (median - 1.5, median + 1.5)
31
- kde = scipy.stats.gaussian_kde(finite_values)
32
- optimize_result = scipy.optimize.minimize_scalar(
33
- lambda x: -kde(x)[0], method="Bounded", bounds=bounds
34
- )
35
- mode = optimize_result.x
36
- # Maybe add fallback function if optimize was not successful
37
- return mode
27
+ return np.unique(finite_values)[0]
28
+
29
+ kde = scipy.stats.gaussian_kde(finite_values)
30
+ minimum_function = lambda x: -kde(x)[0]
31
+
32
+ min_slice, max_sclice = np.percentile(finite_values, (2, 98))
33
+ slice_step = 0.2
34
+ brute_optimize_result = scipy.optimize.brute(
35
+ minimum_function, [slice(min_slice, max_sclice + slice_step, slice_step)]
36
+ )
37
+ rough_minimum = brute_optimize_result[0]
38
+
39
+ local_optimize_result = scipy.optimize.minimize(
40
+ minimum_function, x0=rough_minimum, method="BFGS"
41
+ )
42
+ fine_minimum = local_optimize_result.x[0]
43
+ return fine_minimum
38
44
 
39
45
 
40
46
  def calculate_tryptic_ibaq_peptides(protein_sequence: str) -> int:
@@ -1,5 +1,5 @@
1
1
  from __future__ import annotations
2
- from typing import Optional
2
+ from typing import Optional, Any
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
@@ -18,7 +18,7 @@ class FixedValueImputer:
18
18
  def __init__(
19
19
  self,
20
20
  strategy: str,
21
- fill_value: Optional[float] = None,
21
+ fill_value: float = 0.0,
22
22
  column_wise: bool = True,
23
23
  ):
24
24
  """Initializes the FixedValueImputer.
@@ -51,13 +51,11 @@ class FixedValueImputer:
51
51
  Returns the fitted FixedValueImputer instance.
52
52
  """
53
53
  if self.strategy == "constant":
54
- # if not isinstance(self.fill_value, (float, int)):
55
- # raise Excpetion()
56
54
  fill_values = {column: self.fill_value for column in table.columns}
57
55
  elif self.strategy == "below":
58
56
  if self.column_wise:
59
57
  fill_values = {}
60
- for column in table:
58
+ for column in table.columns:
61
59
  fill_values[column] = _calculate_integer_below_min(table[column])
62
60
  else:
63
61
  int_below_min = _calculate_integer_below_min(table)
@@ -240,7 +238,7 @@ class PerseusImputer:
240
238
  return _table
241
239
 
242
240
 
243
- def confirm_is_fitted(imputer: any, msg: Optional[str] = None) -> None:
241
+ def confirm_is_fitted(imputer: Any, msg: Optional[str] = None) -> None:
244
242
  """Perform is_fitted validation for imputer instances.
245
243
 
246
244
  Checks if the imputer is fitted by verifying the presence of fitted attributes
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
- import abc
3
- import itertools
4
- from typing import Callable, Iterable, Optional
2
+ from typing import Callable, Iterable, Optional, Protocol
3
+ from typing_extensions import Self
5
4
 
6
5
  import numpy as np
7
6
  import pandas as pd
@@ -12,27 +11,18 @@ import msreport.helper.maxlfq as MAXLFQ
12
11
  from msreport.errors import NotFittedError
13
12
 
14
13
 
15
- class BaseSampleNormalizer(abc.ABC):
16
- """Base class for all sample normalizers."""
14
+ class AbstractTransformer(Protocol):
15
+ def fit(self, table: pd.DataFrame) -> Self:
16
+ """Fits the Transformer and returns a fitted Transformer instance."""
17
17
 
18
- @abc.abstractmethod
19
- def fit(self, table: pd.DataFrame) -> BaseSampleNormalizer:
20
- ...
21
-
22
- @abc.abstractmethod
23
18
  def is_fitted(self) -> bool:
24
- ...
25
-
26
- @abc.abstractmethod
27
- def get_fits(self) -> dict[...]:
28
- ...
19
+ """Returns True if the Transformer has been fitted."""
29
20
 
30
- @abc.abstractmethod
31
21
  def transform(self, table: pd.DataFrame) -> pd.DataFrame:
32
- ...
22
+ """Transform values in table."""
33
23
 
34
24
 
35
- class FixedValueNormalizer(BaseSampleNormalizer):
25
+ class FixedValueNormalizer:
36
26
  """Normalization by a constant normalization factor for each sample.
37
27
 
38
28
  Expects log transformed intensity values.
@@ -58,11 +48,11 @@ class FixedValueNormalizer(BaseSampleNormalizer):
58
48
  f'"comparison" = {comparison} not allowed. '
59
49
  'Must be either "paired" or "reference".'
60
50
  )
61
- self._comparison_mode = comparison
62
- self._fit_function = center_function
63
- self._sample_fits = None
51
+ self._comparison_mode: str = comparison
52
+ self._fit_function: Callable = center_function
53
+ self._sample_fits: dict[str, float] = {}
64
54
 
65
- def fit(self, table: pd.DataFrame) -> BaseSampleNormalizer:
55
+ def fit(self, table: pd.DataFrame) -> Self:
66
56
  """Fits the FixedValueNormalizer.
67
57
 
68
58
  Args:
@@ -80,7 +70,7 @@ class FixedValueNormalizer(BaseSampleNormalizer):
80
70
 
81
71
  def is_fitted(self) -> bool:
82
72
  """Returns True if the FixedValueNormalizer has been fitted."""
83
- return self._sample_fits is not None
73
+ return True if self._sample_fits else False
84
74
 
85
75
  def get_fits(self) -> dict[str, float]:
86
76
  """Returns a dictionary containing the fitted center values per sample.
@@ -159,13 +149,13 @@ class FixedValueNormalizer(BaseSampleNormalizer):
159
149
  self._sample_fits[sample] = sample_fit
160
150
 
161
151
 
162
- class ValueDependentNormalizer(BaseSampleNormalizer):
152
+ class ValueDependentNormalizer:
163
153
  """Normalization with a value dependent fit for each sample.
164
154
 
165
155
  Expects log transformed intensity values.
166
156
  """
167
157
 
168
- def __init__(self, fit_function: Callable):
158
+ def __init__(self, fit_function: Callable[[Iterable, Iterable], np.ndarray]):
169
159
  """Initializes the ValueDependentNormalizer.
170
160
 
171
161
  Args:
@@ -175,10 +165,10 @@ class ValueDependentNormalizer(BaseSampleNormalizer):
175
165
  with two columns. The first column contains the values and the second
176
166
  column the fitted deviations.
177
167
  """
178
- self._sample_fits = None
168
+ self._sample_fits: dict[str, np.ndarray] = {}
179
169
  self._fit_function = fit_function
180
170
 
181
- def fit(self, table: pd.DataFrame) -> BaseSampleNormalizer:
171
+ def fit(self, table: pd.DataFrame) -> Self:
182
172
  """Fits the ValueDependentNormalizer.
183
173
 
184
174
  Args:
@@ -192,9 +182,9 @@ class ValueDependentNormalizer(BaseSampleNormalizer):
192
182
 
193
183
  def is_fitted(self) -> bool:
194
184
  """Returns True if the ValueDependentNormalizer has been fitted."""
195
- return self._sample_fits is not None
185
+ return True if self._sample_fits else False
196
186
 
197
- def get_fits(self) -> dict[str, Iterable[float, float]]:
187
+ def get_fits(self) -> dict[str, np.ndarray]:
198
188
  """Returns a dictionary containing lists of fitting data per sample.
199
189
 
200
190
  Returns:
@@ -324,14 +314,14 @@ class CategoricalNormalizer:
324
314
  column must be present in the reference table and the table to be
325
315
  transformed.
326
316
  """
327
- self._fitted_table = None
328
- self._category_column = category_column
317
+ self._fitted_table: pd.DataFrame = pd.DataFrame()
318
+ self._category_column: str = category_column
329
319
 
330
320
  def is_fitted(self) -> bool:
331
321
  """Returns True if the CategoricalNormalizer has been fitted."""
332
- return self._fitted_table is not None
322
+ return not self._fitted_table.empty
333
323
 
334
- def fit(self, reference_table: pd.DataFrame) -> BaseSampleNormalizer:
324
+ def fit(self, reference_table: pd.DataFrame) -> Self:
335
325
  """Fits the CategoricalNormalizer to a reference table.
336
326
 
337
327
  Args:
@@ -397,7 +387,34 @@ class CategoricalNormalizer:
397
387
  return transformed_table
398
388
 
399
389
 
400
- class ZscoreScaler(BaseSampleNormalizer):
390
+ class PercentageScaler:
391
+ """Transform column values to percentages by dividing them with the column sum."""
392
+
393
+ def fit(self, table: pd.DataFrame) -> Self:
394
+ """Returns the instance itself."""
395
+ return self
396
+
397
+ def is_fitted(self) -> bool:
398
+ """Always returns True because the ZscoreScaler does not need to be fitted."""
399
+ return True
400
+
401
+ def get_fits(self) -> dict:
402
+ """Returns a dictionary containing the parameters 'with_mean' and 'with_std'."""
403
+ return {}
404
+
405
+ def transform(self, table: pd.DataFrame) -> pd.DataFrame:
406
+ """Transforms column values into percentages by devision with the column sum.
407
+
408
+ Args:
409
+ table: The table used to scale row values.
410
+
411
+ Returns:
412
+ A copy of the table containing the scaled values.
413
+ """
414
+ return table.divide(table.sum(axis=0), axis=1)
415
+
416
+
417
+ class ZscoreScaler:
401
418
  """Normalize samples by z-score scaling."""
402
419
 
403
420
  def __init__(self, with_mean: bool = True, with_std: bool = True):
@@ -410,7 +427,7 @@ class ZscoreScaler(BaseSampleNormalizer):
410
427
  self._with_mean = with_mean
411
428
  self._with_std = with_std
412
429
 
413
- def fit(self, table: pd.DataFrame) -> BaseSampleNormalizer:
430
+ def fit(self, table: pd.DataFrame) -> Self:
414
431
  """Returns the instance itself."""
415
432
  return self
416
433
 
@@ -440,7 +457,7 @@ class ZscoreScaler(BaseSampleNormalizer):
440
457
 
441
458
 
442
459
  def confirm_is_fitted(
443
- normalizer: BaseSampleNormalizer, msg: Optional[str] = None
460
+ normalizer: AbstractTransformer, msg: Optional[str] = None
444
461
  ) -> None:
445
462
  """Perform is_fitted validation for normalizer instances.
446
463
 
@@ -703,16 +703,18 @@ def volcano_ma(
703
703
  y_col = " ".join([y_variable, comparison_group])
704
704
  x_values = data[x_col]
705
705
  y_values = data[y_col]
706
- ax.grid(axis="both", linestyle="dotted", linewidth=1)
706
+ xy_labels = data[annotation_column]
707
707
 
708
- mask = masks["default"]
709
- ax.scatter(x_values[mask], y_values[mask], **params["default"])
708
+ valid_values = np.isfinite(x_values) & np.isfinite(y_values)
709
+ mask_default = masks["default"] & valid_values
710
+ mask_special = masks["highlight"] & valid_values
710
711
 
711
- mask = masks["highlight"]
712
+ ax.grid(axis="both", linestyle="dotted", linewidth=1)
713
+ ax.scatter(x_values[mask_default], y_values[mask_default], **params["default"])
712
714
  _annotated_scatter(
713
- x_values=data[x_col][mask],
714
- y_values=data[y_col][mask],
715
- labels=data[annotation_column][mask],
715
+ x_values=x_values[mask_special],
716
+ y_values=y_values[mask_special],
717
+ labels=xy_labels[mask_special],
716
718
  ax=ax,
717
719
  scatter_kws=params["highlight"],
718
720
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: msreport
3
- Version: 0.0.26
3
+ Version: 0.0.27
4
4
  Summary: Post processing and analysis of quantitative proteomics data
5
5
  Author-email: "David M. Hollenstein" <hollenstein.david@gmail.com>
6
6
  License: Apache-2.0
@@ -24,6 +24,7 @@ Requires-Dist: scikit-learn>=1.0.0
24
24
  Requires-Dist: scipy>=1.9.1
25
25
  Requires-Dist: seaborn>=0.12.0
26
26
  Requires-Dist: statsmodels>=0.13.2
27
+ Requires-Dist: typing_extensions>=4
27
28
  Dynamic: license-file
28
29
 
29
30
  [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
@@ -39,4 +39,5 @@ tests/test_impute.py
39
39
  tests/test_isobar.py
40
40
  tests/test_maxlfq.py
41
41
  tests/test_peptidoform.py
42
+ tests/test_plot.py
42
43
  tests/test_qtable.py
@@ -10,3 +10,4 @@ scikit-learn>=1.0.0
10
10
  scipy>=1.9.1
11
11
  seaborn>=0.12.0
12
12
  statsmodels>=0.13.2
13
+ typing_extensions>=4
@@ -36,6 +36,7 @@ dependencies = [
36
36
  "scipy >= 1.9.1",
37
37
  "seaborn >= 0.12.0",
38
38
  "statsmodels >= 0.13.2",
39
+ "typing_extensions >= 4",
39
40
  ]
40
41
  dynamic = ["version"]
41
42
 
@@ -110,6 +110,48 @@ class TestValidateProteins:
110
110
  assert expected_valid == self.qtable.data["Valid"].sum()
111
111
 
112
112
 
113
+ class TestApplyTransformer:
114
+ @pytest.fixture(autouse=True)
115
+ def _init_imputer(self, example_qtable):
116
+ class MockTransformer:
117
+ def fit(self, table: pd.DataFrame):
118
+ return self
119
+
120
+ def is_fitted(self):
121
+ return True
122
+
123
+ def transform(self, table: pd.DataFrame):
124
+ _table = table.copy()
125
+ _table[_table.columns] = 1.0
126
+ return _table
127
+
128
+ self.transformer = MockTransformer()
129
+
130
+ def test_transformation_applied_to_all_values_with_no_exclusion_and_removal(self, example_qtable): # fmt: skip
131
+ msreport.analyze.apply_transformer(example_qtable, self.transformer, "Expression", exclude_invalid=False, remove_invalid=False) # fmt: skip
132
+ table = example_qtable.make_expression_table()
133
+ assert table.eq(1.0).all().all()
134
+
135
+ def test_invalid_values_are_set_to_nan_with_remove_invalid(self, example_qtable):
136
+ example_qtable.data.loc[0, "Valid"] = False
137
+ msreport.analyze.apply_transformer(example_qtable, self.transformer, "Expression", exclude_invalid=False, remove_invalid=True) # fmt: skip
138
+ table = example_qtable.make_expression_table()
139
+ assert table.loc[0, :].isna().all()
140
+
141
+ def test_invalid_values_are_not_transformed_with_exclude_invalid(self, example_qtable): # fmt: skip
142
+ example_qtable.data.loc[0, "Valid"] = False
143
+ msreport.analyze.apply_transformer(example_qtable, self.transformer, "Expression", exclude_invalid=True, remove_invalid=False) # fmt: skip
144
+ table = example_qtable.make_expression_table()
145
+ assert not table.loc[0, :].eq(1.0).all().all()
146
+ assert table.loc[1:, :].eq(1.0).all().all()
147
+
148
+ # Further test if the transformer creates a new set of columns and leaves the old set untouched
149
+ def test_new_columns_are_created_with_new_tag_parameter(self, example_qtable):
150
+ msreport.analyze.apply_transformer(example_qtable, self.transformer, "Expression", new_tag="New", exclude_invalid=False, remove_invalid=False) # fmt: skip
151
+ new_column_samples = example_qtable.make_sample_table("New", samples_as_columns=True).columns.tolist() # fmt: skip
152
+ assert new_column_samples == example_qtable.get_samples()
153
+
154
+
113
155
  class TestNormalizeExpression:
114
156
  def test_normalization_with_fitted_normalizer(self, example_qtable):
115
157
  shift = 1
@@ -315,7 +357,7 @@ class TestNormalizeExpressionByCategory:
315
357
 
316
358
  def transform(self, table: pd.DataFrame):
317
359
  table = table.copy()
318
- table.loc[:, :] = 0
360
+ table[table.columns] = 0
319
361
  return table
320
362
 
321
363
  def get_category_column(self):
@@ -0,0 +1,121 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import pytest
4
+
5
+ import msreport.qtable
6
+ import msreport.plot
7
+
8
+
9
+ @pytest.fixture
10
+ def example_data():
11
+ design = pd.DataFrame(
12
+ [
13
+ ("Sample_A1", "Experiment_A", "1"),
14
+ ("Sample_A2", "Experiment_A", "1"),
15
+ ("Sample_B1", "Experiment_B", "1"),
16
+ ("Sample_B2", "Experiment_B", "1"),
17
+ ],
18
+ columns=["Sample", "Experiment", "Replicate"],
19
+ )
20
+ data = pd.DataFrame(
21
+ {
22
+ "Representative protein": ["A", "B", "C"],
23
+ "Intensity Sample_A1": [10, 11, 10.3],
24
+ "Intensity Sample_A2": [10, np.nan, 10.3],
25
+ "Intensity Sample_B1": [11, 11, np.nan],
26
+ "Intensity Sample_B2": [15, np.nan, 10.3],
27
+ "Expression Experiment_A": [10, 11, 10.3], # <- Adjust to Sample_A1/A2
28
+ "Expression Experiment_B": [13, 11, 10.3], # <- Adjust to Sample_A1/A2
29
+ "Ratio [log2] Experiment_A vs Experiment_B": [-3, np.nan, 0],
30
+ "P-value Experiment_A vs Experiment_B": [0.0001, np.nan, 0.1],
31
+ "Average expression Experiment_A vs Experiment_B": [11.5, 11, 10.3],
32
+ "Valid": [True, False, True],
33
+ }
34
+ )
35
+ missing_values = pd.DataFrame(
36
+ {
37
+ "Missing total": [0, 4, 1],
38
+ "Missing Experiment_A": [0, 2, 0],
39
+ "Missing Experiment_B": [0, 2, 1],
40
+ "Events total": [4, 0, 3],
41
+ "Events Experiment_A": [2, 0, 2],
42
+ "Events Experiment_B": [2, 0, 1],
43
+ }
44
+ )
45
+ data = data.join(missing_values)
46
+ example_data = {"data": data, "design": design}
47
+ return example_data
48
+
49
+
50
+ @pytest.fixture
51
+ def example_qtable(example_data):
52
+ qtable = msreport.qtable.Qtable(example_data["data"], design=example_data["design"])
53
+ qtable.set_expression_by_tag("Intensity")
54
+ return qtable
55
+
56
+
57
+ class TestVolcanoMa:
58
+ @pytest.fixture(autouse=True)
59
+ def _init_qtable(self, example_qtable):
60
+ self.qtable = example_qtable
61
+
62
+ def test_no_error_without_missing_values_due_to_exclude_invalid(self):
63
+ fig, axes = msreport.plot.volcano_ma(
64
+ self.qtable,
65
+ ["Experiment_A", "Experiment_B"],
66
+ comparison_tag=" vs ",
67
+ pvalue_tag="P-value",
68
+ special_proteins=["A", "B", "C"],
69
+ exclude_invalid=True,
70
+ )
71
+
72
+ def test_no_error_with_missing_values_but_no_special_protein_labeling(self):
73
+ fig, axes = msreport.plot.volcano_ma(
74
+ self.qtable,
75
+ ["Experiment_A", "Experiment_B"],
76
+ comparison_tag=" vs ",
77
+ pvalue_tag="P-value",
78
+ exclude_invalid=False,
79
+ )
80
+
81
+ def test_no_error_with_missing_values_of_special_proteins(self):
82
+ fig, axes = msreport.plot.volcano_ma(
83
+ self.qtable,
84
+ ["Experiment_A", "Experiment_B"],
85
+ comparison_tag=" vs ",
86
+ pvalue_tag="P-value",
87
+ special_proteins=["A", "B", "C"],
88
+ exclude_invalid=False,
89
+ )
90
+
91
+
92
+ class TestExpressionComparison:
93
+ @pytest.fixture(autouse=True)
94
+ def _init_qtable(self, example_qtable):
95
+ self.qtable = example_qtable
96
+
97
+ def test_no_error_without_missing_values_due_to_exclude_invalid(self):
98
+ fig, axes = msreport.plot.expression_comparison(
99
+ self.qtable,
100
+ ["Experiment_A", "Experiment_B"],
101
+ comparison_tag=" vs ",
102
+ special_proteins=["A", "B", "C"],
103
+ exclude_invalid=True,
104
+ )
105
+
106
+ def test_no_error_with_missing_values_but_no_special_protein_labeling(self):
107
+ fig, axes = msreport.plot.expression_comparison(
108
+ self.qtable,
109
+ ["Experiment_A", "Experiment_B"],
110
+ comparison_tag=" vs ",
111
+ exclude_invalid=False,
112
+ )
113
+
114
+ def test_no_error_with_missing_values_of_special_proteins(self):
115
+ fig, axes = msreport.plot.expression_comparison(
116
+ self.qtable,
117
+ ["Experiment_A", "Experiment_B"],
118
+ comparison_tag=" vs ",
119
+ special_proteins=["A", "B", "C"],
120
+ exclude_invalid=False,
121
+ )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes