msreport 0.0.30__py3-none-any.whl → 0.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
msreport/peptidoform.py CHANGED
@@ -1,5 +1,14 @@
1
+ """Defines the `Peptide` class and associated utilities for handling peptidoforms.
2
+
3
+ This module provides a `Peptide` class for representing modified peptide sequences,
4
+ and their site localization probabilities. It offers methods to access and manipulate
5
+ peptide information, summarize isoform probabilities, and retrieve modification sites.
6
+ Additionally, it includes utility functions for parsing modified sequence strings and
7
+ converting site localization probabilities to and from a standardized string format.
8
+ """
9
+
1
10
  from collections import defaultdict as ddict
2
- from typing import Optional, Union
11
+ from typing import Optional
3
12
 
4
13
  import numpy as np
5
14
 
@@ -10,7 +19,7 @@ class Peptide:
10
19
  def __init__(
11
20
  self,
12
21
  modified_sequence: str,
13
- localization_probabilities: Optional[dict] = None,
22
+ localization_probabilities: Optional[dict[str, dict[int, float]]] = None,
14
23
  protein_position: Optional[int] = None,
15
24
  ):
16
25
  plain_sequence, modifications = parse_modified_sequence(
@@ -28,7 +37,7 @@ class Peptide:
28
37
  self.modification_positions[mod_tag].append(position)
29
38
  self.modified_residues[position] = mod_tag
30
39
 
31
- def make_modified_sequence(self, include: Optional[list] = None) -> str:
40
+ def make_modified_sequence(self, include: Optional[list[str]] = None) -> str:
32
41
  """Returns a modified sequence string.
33
42
 
34
43
  Args:
@@ -55,7 +64,7 @@ class Peptide:
55
64
  return 0
56
65
  return len(self.modification_positions[modification])
57
66
 
58
- def isoform_probability(self, modification: str) -> Union[float, None]:
67
+ def isoform_probability(self, modification: str) -> float | None:
59
68
  """Calculates the isoform probability for a given modification.
60
69
 
61
70
  Returns:
@@ -66,12 +75,13 @@ class Peptide:
66
75
  """
67
76
  probabilities = []
68
77
  for site in self.list_modified_peptide_sites(modification):
69
- probabilities.append(self.get_peptide_site_probability(site))
70
- if None in probabilities:
71
- return None
78
+ probability = self.get_peptide_site_probability(site)
79
+ if probability is None:
80
+ return None
81
+ probabilities.append(probability)
72
82
  return float(np.prod(probabilities))
73
83
 
74
- def get_peptide_site_probability(self, position: int) -> Optional[float]:
84
+ def get_peptide_site_probability(self, position: int) -> float | None:
75
85
  """Return the modification localization probability of the peptide position.
76
86
 
77
87
  Args:
@@ -85,7 +95,7 @@ class Peptide:
85
95
  """
86
96
  return self._get_site_probability(position, is_protein_position=False)
87
97
 
88
- def get_protein_site_probability(self, position: int) -> Optional[float]:
98
+ def get_protein_site_probability(self, position: int) -> float | None:
89
99
  """Return the modification localization probability of the protein position.
90
100
 
91
101
  Args:
@@ -109,7 +119,7 @@ class Peptide:
109
119
 
110
120
  def _get_site_probability(
111
121
  self, position: int, is_protein_position: bool
112
- ) -> Optional[float]:
122
+ ) -> float | None:
113
123
  """Return the modification localization probability of the peptide position.
114
124
 
115
125
  Args:
@@ -224,7 +234,7 @@ def modify_peptide(
224
234
 
225
235
 
226
236
  def make_localization_string(
227
- localization_probabilities: dict, decimal_places: int = 3
237
+ localization_probabilities: dict[str, dict[int, float]], decimal_places: int = 3
228
238
  ) -> str:
229
239
  """Generates a site localization probability string.
230
240
 
msreport/plot/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- """This module provides various plotting functions for visualizing data within a Qtable.
1
+ """Plotting functions for visualizing proteomics data from `Qtable`.
2
2
 
3
3
  The functions in this module generate a wide range of plots, including heatmaps, PCA
4
4
  plots, volcano plots, and histograms, to analyze and compare expression values,
@@ -6,8 +6,8 @@ missingness, contaminants, and other features in proteomics datasets. The plots
6
6
  designed to work with the Qtable class as input, which provides structured access to
7
7
  proteomics data and experimental design information.
8
8
 
9
- The style of the plots can be customized using the `set_active_style` function, which
10
- allows applying style sheets from the msreport library or those available in matplotlib.
9
+ Users can customize plot styles via the `set_active_style` function, which allows
10
+ applying style sheets from the msreport library or those available in matplotlib.
11
11
  """
12
12
 
13
13
  from .comparison import expression_comparison, pvalue_histogram, volcano_ma
@@ -77,10 +77,15 @@ def volcano_ma(
77
77
  )
78
78
  special_entries = list(special_entries) + list(special_proteins)
79
79
 
80
- data = qtable.get_data(exclude_invalid=exclude_invalid)
81
- if annotation_column not in data.columns:
80
+ if annotation_column not in qtable.data.columns:
82
81
  annotation_column = qtable.id_column
83
82
 
83
+ data = qtable.get_data(exclude_invalid=exclude_invalid)
84
+ mask = np.ones(data.shape[0], dtype=bool)
85
+ for tag in [ratio_tag, expression_tag, pvalue_tag]:
86
+ mask = mask & np.isfinite(data[f"{tag} {comparison_group}"])
87
+ data = data[mask]
88
+
84
89
  scatter_size = 2 / (max(min(data.shape[0], 10000), 1000) / 1000)
85
90
 
86
91
  masks = {
@@ -21,6 +21,7 @@ def sample_pca(
21
21
  pc_x: str = "PC1",
22
22
  pc_y: str = "PC2",
23
23
  exclude_invalid: bool = True,
24
+ exclude_missing: bool = False,
24
25
  ) -> tuple[plt.Figure, list[plt.Axes]]:
25
26
  """Figure to compare sample similarities with a principle component analysis.
26
27
 
@@ -44,11 +45,14 @@ def sample_pca(
44
45
  samples.
45
46
  exclude_invalid: If True, rows are filtered according to the Boolean entries of
46
47
  the "Valid" column.
48
+ exclude_missing: If True, only rows without any missing values are used.
47
49
 
48
50
  Returns:
49
51
  A matplotlib Figure and a list of Axes objects, containing the PCA plots.
50
52
  """
51
53
  design = qtable.get_design()
54
+ samples = qtable.get_samples()
55
+
52
56
  if design.shape[0] < 3:
53
57
  fig, ax = plt.subplots(1, 1, figsize=(2, 1.3))
54
58
  fig.suptitle(f'PCA of "{tag}" values', y=1.1)
@@ -65,13 +69,22 @@ def sample_pca(
65
69
  return fig, np.array([ax])
66
70
 
67
71
  table = qtable.make_sample_table(
68
- tag, samples_as_columns=True, exclude_invalid=exclude_invalid
72
+ tag, samples_as_columns=True, exclude_invalid=False
69
73
  )
74
+
75
+ inclusion_mask = np.ones(qtable.data.shape[0], dtype=bool)
76
+ if exclude_invalid:
77
+ inclusion_mask = inclusion_mask & qtable["Valid"]
78
+ if exclude_missing:
79
+ _non_missing_masks = [(qtable[f"Missing {s}"] == 0) for s in samples]
80
+ inclusion_mask = inclusion_mask & (np.all(_non_missing_masks, axis=0))
81
+ table = table[inclusion_mask]
82
+
70
83
  table = table.replace({0: np.nan})
71
84
  table = table[np.isfinite(table).sum(axis=1) > 0]
72
85
  if not msreport.helper.intensities_in_logspace(table):
73
86
  table = np.log2(table)
74
- table[table.isna()] = 0
87
+ table = table.fillna(0)
75
88
 
76
89
  table = table.transpose()
77
90
  sample_index = table.index.tolist()
@@ -203,6 +216,7 @@ def sample_pca(
203
216
  def expression_clustermap(
204
217
  qtable: Qtable,
205
218
  exclude_invalid: bool = True,
219
+ exclude_missing: bool = False,
206
220
  remove_imputation: bool = True,
207
221
  mean_center: bool = False,
208
222
  cluster_samples: bool = True,
@@ -218,6 +232,7 @@ def expression_clustermap(
218
232
  qtable: A `Qtable` instance, which data is used for plotting.
219
233
  exclude_invalid: If True, rows are filtered according to the Boolean entries of
220
234
  the "Valid" column.
235
+ exclude_missing: If True, only rows without any missing values are used.
221
236
  remove_imputation: If True, imputed values are set to 0 before clustering.
222
237
  Defaults to True.
223
238
  mean_center: If True, the data is mean-centered before clustering. Defaults to
@@ -242,25 +257,29 @@ def expression_clustermap(
242
257
  if len(samples) < 2:
243
258
  raise ValueError("At least two samples are required to generate a clustermap.")
244
259
 
245
- data = qtable.make_expression_table(samples_as_columns=True)
260
+ data = qtable.make_expression_table(samples_as_columns=True, exclude_invalid=False)
246
261
  data = data[samples]
262
+ data = data.fillna(0)
247
263
 
248
- for sample in samples:
249
- if remove_imputation:
250
- data.loc[qtable.data[f"Missing {sample}"], sample] = 0
251
- data[sample] = data[sample].fillna(0)
252
-
253
- if not mean_center:
254
- # Hide missing values in the heatmap, making them appear white
255
- mask_values = qtable.data[
264
+ if not mean_center: # Hide missing values in the heatmap, making them appear white
265
+ hide_values_mask = qtable.data[
256
266
  [f"Missing {sample}" for sample in samples]
257
267
  ].to_numpy()
258
268
  else:
259
- mask_values = np.zeros(data.shape, dtype=bool)
269
+ hide_values_mask = np.zeros(data.shape, dtype=bool)
270
+
271
+ if remove_imputation:
272
+ for sample in samples:
273
+ data.loc[qtable.data[f"Missing {sample}"], sample] = 0
260
274
 
275
+ inclusion_mask = np.ones(data.shape[0], dtype=bool)
261
276
  if exclude_invalid:
262
- data = data[qtable.data["Valid"]]
263
- mask_values = mask_values[qtable.data["Valid"]]
277
+ inclusion_mask = inclusion_mask & qtable["Valid"]
278
+ if exclude_missing:
279
+ _non_missing_masks = [(qtable[f"Missing {s}"] == 0) for s in samples]
280
+ inclusion_mask = inclusion_mask & (np.all(_non_missing_masks, axis=0))
281
+ hide_values_mask = hide_values_mask[inclusion_mask]
282
+ data = data[inclusion_mask]
264
283
 
265
284
  color_wheel = ColorWheelDict()
266
285
  for exp in experiments:
@@ -314,7 +333,7 @@ def expression_clustermap(
314
333
  col_cluster=cluster_samples,
315
334
  col_colors=sample_colors,
316
335
  row_colors=["#000000" for _ in range(len(data))],
317
- mask=mask_values,
336
+ mask=hide_values_mask,
318
337
  method=cluster_method,
319
338
  metric="euclidean",
320
339
  **heatmap_args,
msreport/plot/quality.py CHANGED
@@ -314,7 +314,7 @@ def sample_intensities(
314
314
 
315
315
  @with_active_style
316
316
  def sample_correlation(
317
- qtable, exclude_invalid: bool = True, labels: bool = False
317
+ qtable: Qtable, exclude_invalid: bool = True, labels: bool = False
318
318
  ) -> tuple[plt.Figure, list[plt.Axes]]:
319
319
  """Generates a pair-wise correlation matrix of samples 'Expression' values.
320
320
 
msreport/qtable.py CHANGED
@@ -1,14 +1,28 @@
1
- from __future__ import annotations
1
+ """Defines the `Qtable` class, the central container for quantitative proteomics data.
2
+
3
+ The `Qtable` class serves as the standardized data structure for `msreport`,
4
+ storing a main table with quantitative values and associated metadata for its entries;
5
+ it also maintains the name of the unique ID column for the main table. Additionally,
6
+ it stores an experimental design table that links sample names to experimental
7
+ conditions and replicate information.
8
+
9
+ `Qtable` provides convenience methods for creating subtables and accessing design
10
+ related information (e.g., samples per experiment), and instances of `Qtable` can be
11
+ easily saved to disk and loaded back. As the central data container, the `Qtable`
12
+ facilitates seamless integration with the high-level modules `analyze`, `plot` and
13
+ `export`, which all directly operate on `Qtable` instances.
14
+ """
2
15
 
3
16
  import copy
4
17
  import os
5
18
  import warnings
6
19
  from contextlib import contextmanager
7
- from typing import Any, Iterable, Optional
20
+ from typing import Any, Generator, Iterable, Optional
8
21
 
9
22
  import numpy as np
10
23
  import pandas as pd
11
24
  import yaml
25
+ from typing_extensions import Self
12
26
 
13
27
  import msreport.helper as helper
14
28
 
@@ -359,7 +373,7 @@ class Qtable:
359
373
  keep_experiments: Optional[Iterable[str]] = None,
360
374
  exclude_samples: Optional[Iterable[str]] = None,
361
375
  keep_samples: Optional[Iterable[str]] = None,
362
- ):
376
+ ) -> Generator[None, None, None]:
363
377
  """Context manager to temporarily modify the design table.
364
378
 
365
379
  Args:
@@ -422,7 +436,7 @@ class Qtable:
422
436
  self.design.to_csv(filepaths["design"], sep="\t", index=True)
423
437
 
424
438
  @classmethod
425
- def load(cls, directory: str, basename: str) -> Qtable:
439
+ def load(cls, directory: str, basename: str) -> Self:
426
440
  """Load a qtable from disk by reading a data, design, and config file.
427
441
 
428
442
  Loading a qtable will first import the three files generated during saving, then
@@ -470,7 +484,7 @@ class Qtable:
470
484
  )
471
485
  id_column = config_data["Unique ID column"]
472
486
 
473
- qtable = Qtable(data, design, id_column)
487
+ qtable = cls(data, design, id_column)
474
488
  qtable._expression_columns = config_data["Expression columns"]
475
489
  qtable._expression_features = config_data["Expression features"]
476
490
  qtable._expression_sample_mapping = config_data["Expression sample mapping"]
@@ -486,11 +500,11 @@ class Qtable:
486
500
  )
487
501
  self.data.to_csv(path, sep="\t", index=index)
488
502
 
489
- def to_clipboard(self, index: bool = False):
503
+ def to_clipboard(self, index: bool = False) -> None:
490
504
  """Writes the data table to the system clipboard."""
491
505
  self.data.to_clipboard(sep="\t", index=index)
492
506
 
493
- def copy(self) -> Qtable:
507
+ def copy(self) -> Self:
494
508
  """Returns a copy of this Qtable instance."""
495
509
  return self.__copy__()
496
510
 
@@ -579,8 +593,8 @@ class Qtable:
579
593
  self._expression_features = []
580
594
  self._expression_sample_mapping = {}
581
595
 
582
- def __copy__(self) -> Qtable:
583
- new_instance = Qtable(self.data, self.design, self.id_column)
596
+ def __copy__(self) -> Self:
597
+ new_instance = type(self)(self.data, self.design, self.id_column)
584
598
  # Copy all private attributes
585
599
  for attr in dir(self):
586
600
  if (
@@ -609,7 +623,7 @@ def _match_samples_to_tag_columns(
609
623
  samples: Iterable[str],
610
624
  columns: Iterable[str],
611
625
  tag: str,
612
- ) -> dict:
626
+ ) -> dict[str, str]:
613
627
  """Mapping of samples to columns which contain the sample and the tag.
614
628
 
615
629
  Args:
@@ -632,7 +646,7 @@ def _match_samples_to_tag_columns(
632
646
  return mapping
633
647
 
634
648
 
635
- def _get_qtable_export_filepaths(directory: str, name: str):
649
+ def _get_qtable_export_filepaths(directory: str, name: str) -> dict[str, str]:
636
650
  """Returns a dictionary of standard filepaths for loading and saving a qtable."""
637
651
  filenames = {
638
652
  "data": f"{name}.data.tsv",