msreport 0.0.29__py3-none-any.whl → 0.0.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
msreport/peptidoform.py CHANGED
@@ -1,5 +1,14 @@
1
+ """Defines the `Peptide` class and associated utilities for handling peptidoforms.
2
+
3
+ This module provides a `Peptide` class for representing modified peptide sequences,
4
+ and their site localization probabilities. It offers methods to access and manipulate
5
+ peptide information, summarize isoform probabilities, and retrieve modification sites.
6
+ Additionally, it includes utility functions for parsing modified sequence strings and
7
+ converting site localization probabilities to and from a standardized string format.
8
+ """
9
+
1
10
  from collections import defaultdict as ddict
2
- from typing import Optional, Union
11
+ from typing import Optional
3
12
 
4
13
  import numpy as np
5
14
 
@@ -10,7 +19,7 @@ class Peptide:
10
19
  def __init__(
11
20
  self,
12
21
  modified_sequence: str,
13
- localization_probabilities: Optional[dict] = None,
22
+ localization_probabilities: Optional[dict[str, dict[int, float]]] = None,
14
23
  protein_position: Optional[int] = None,
15
24
  ):
16
25
  plain_sequence, modifications = parse_modified_sequence(
@@ -28,7 +37,7 @@ class Peptide:
28
37
  self.modification_positions[mod_tag].append(position)
29
38
  self.modified_residues[position] = mod_tag
30
39
 
31
- def make_modified_sequence(self, include: Optional[list] = None) -> str:
40
+ def make_modified_sequence(self, include: Optional[list[str]] = None) -> str:
32
41
  """Returns a modified sequence string.
33
42
 
34
43
  Args:
@@ -55,7 +64,7 @@ class Peptide:
55
64
  return 0
56
65
  return len(self.modification_positions[modification])
57
66
 
58
- def isoform_probability(self, modification: str) -> Union[float, None]:
67
+ def isoform_probability(self, modification: str) -> float | None:
59
68
  """Calculates the isoform probability for a given modification.
60
69
 
61
70
  Returns:
@@ -66,12 +75,13 @@ class Peptide:
66
75
  """
67
76
  probabilities = []
68
77
  for site in self.list_modified_peptide_sites(modification):
69
- probabilities.append(self.get_peptide_site_probability(site))
70
- if None in probabilities:
71
- return None
78
+ probability = self.get_peptide_site_probability(site)
79
+ if probability is None:
80
+ return None
81
+ probabilities.append(probability)
72
82
  return float(np.prod(probabilities))
73
83
 
74
- def get_peptide_site_probability(self, position: int) -> Optional[float]:
84
+ def get_peptide_site_probability(self, position: int) -> float | None:
75
85
  """Return the modification localization probability of the peptide position.
76
86
 
77
87
  Args:
@@ -85,7 +95,7 @@ class Peptide:
85
95
  """
86
96
  return self._get_site_probability(position, is_protein_position=False)
87
97
 
88
- def get_protein_site_probability(self, position: int) -> Optional[float]:
98
+ def get_protein_site_probability(self, position: int) -> float | None:
89
99
  """Return the modification localization probability of the protein position.
90
100
 
91
101
  Args:
@@ -109,7 +119,7 @@ class Peptide:
109
119
 
110
120
  def _get_site_probability(
111
121
  self, position: int, is_protein_position: bool
112
- ) -> Optional[float]:
122
+ ) -> float | None:
113
123
  """Return the modification localization probability of the peptide position.
114
124
 
115
125
  Args:
@@ -224,7 +234,7 @@ def modify_peptide(
224
234
 
225
235
 
226
236
  def make_localization_string(
227
- localization_probabilities: dict, decimal_places: int = 3
237
+ localization_probabilities: dict[str, dict[int, float]], decimal_places: int = 3
228
238
  ) -> str:
229
239
  """Generates a site localization probability string.
230
240
 
msreport/plot/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- """This module provides various plotting functions for visualizing data within a Qtable.
1
+ """Plotting functions for visualizing proteomics data from `Qtable`.
2
2
 
3
3
  The functions in this module generate a wide range of plots, including heatmaps, PCA
4
4
  plots, volcano plots, and histograms, to analyze and compare expression values,
@@ -6,8 +6,8 @@ missingness, contaminants, and other features in proteomics datasets. The plots
6
6
  designed to work with the Qtable class as input, which provides structured access to
7
7
  proteomics data and experimental design information.
8
8
 
9
- The style of the plots can be customized using the `set_active_style` function, which
10
- allows applying style sheets from the msreport library or those available in matplotlib.
9
+ Users can customize plot styles via the `set_active_style` function, which allows
10
+ applying style sheets from the msreport library or those available in matplotlib.
11
11
  """
12
12
 
13
13
  from .comparison import expression_comparison, pvalue_histogram, volcano_ma
@@ -204,7 +204,8 @@ def experiment_ratios(
204
204
  mask = np.all([(qtable.data[f"Events {exp}"] > 0) for exp in experiments], axis=0)
205
205
  if exclude_invalid:
206
206
  mask = mask & qtable["Valid"]
207
- experiment_data = experiment_data[mask]
207
+ # Use `mask.to_numpy` to solve issue with different indices of mask and dataframe
208
+ experiment_data = experiment_data[mask.to_numpy()]
208
209
  pseudo_reference = np.nanmean(experiment_data, axis=1)
209
210
  ratio_data = experiment_data.subtract(pseudo_reference, axis=0)
210
211
 
msreport/plot/quality.py CHANGED
@@ -314,7 +314,7 @@ def sample_intensities(
314
314
 
315
315
  @with_active_style
316
316
  def sample_correlation(
317
- qtable, exclude_invalid: bool = True, labels: bool = False
317
+ qtable: Qtable, exclude_invalid: bool = True, labels: bool = False
318
318
  ) -> tuple[plt.Figure, list[plt.Axes]]:
319
319
  """Generates a pair-wise correlation matrix of samples 'Expression' values.
320
320
 
msreport/qtable.py CHANGED
@@ -1,14 +1,28 @@
1
- from __future__ import annotations
1
+ """Defines the `Qtable` class, the central container for quantitative proteomics data.
2
+
3
+ The `Qtable` class serves as the standardized data structure for `msreport`,
4
+ storing a main table with quantitative values and associated metadata for its entries;
5
+ it also maintains the name of the unique ID column for the main table. Additionally,
6
+ it stores an experimental design table that links sample names to experimental
7
+ conditions and replicate information.
8
+
9
+ `Qtable` provides convenience methods for creating subtables and accessing design
10
+ related information (e.g., samples per experiment), and instances of `Qtable` can be
11
+ easily saved to disk and loaded back. As the central data container, the `Qtable`
12
+ facilitates seamless integration with the high-level modules `analyze`, `plot` and
13
+ `export`, which all directly operate on `Qtable` instances.
14
+ """
2
15
 
3
16
  import copy
4
17
  import os
5
18
  import warnings
6
19
  from contextlib import contextmanager
7
- from typing import Any, Iterable, Optional
20
+ from typing import Any, Generator, Iterable, Optional
8
21
 
9
22
  import numpy as np
10
23
  import pandas as pd
11
24
  import yaml
25
+ from typing_extensions import Self
12
26
 
13
27
  import msreport.helper as helper
14
28
 
@@ -27,13 +41,11 @@ class Qtable:
27
41
  design: A pandas.DataFrame describing the experimental design.
28
42
  """
29
43
 
30
- _default_id_column = "Representative protein"
31
-
32
44
  def __init__(
33
45
  self,
34
46
  data: pd.DataFrame,
35
- design: Optional[pd.DataFrame] = None,
36
- id_column: str = "Representative protein",
47
+ design: pd.DataFrame,
48
+ id_column: str,
37
49
  ):
38
50
  """Initializes the Qtable.
39
51
 
@@ -42,12 +54,13 @@ class Qtable:
42
54
 
43
55
  Args:
44
56
  data: A dataframe containing quantitative proteomics data in a wide format.
57
+ The index of the dataframe must contain unique values.
45
58
  design: A dataframe describing the experimental design that must at least
46
59
  contain the columns "Sample" and "Experiment". The "Sample" entries
47
60
  should correspond to the Sample names present in the quantitative
48
61
  columns of the data.
49
62
  id_column: The name of the column that contains the unique identifiers for
50
- the entries in the data table. Default is "Representative protein".
63
+ the entries in the data table.
51
64
 
52
65
  Raises:
53
66
  KeyError: If the specified id_column is not found in data.
@@ -76,8 +89,7 @@ class Qtable:
76
89
  self._id_column = id_column
77
90
  if "Valid" not in self.data.columns:
78
91
  self.data["Valid"] = True
79
- if design is not None:
80
- self.add_design(design)
92
+ self.add_design(design)
81
93
 
82
94
  self._expression_columns: list[str] = []
83
95
  self._expression_features: list[str] = []
@@ -361,7 +373,7 @@ class Qtable:
361
373
  keep_experiments: Optional[Iterable[str]] = None,
362
374
  exclude_samples: Optional[Iterable[str]] = None,
363
375
  keep_samples: Optional[Iterable[str]] = None,
364
- ):
376
+ ) -> Generator[None, None, None]:
365
377
  """Context manager to temporarily modify the design table.
366
378
 
367
379
  Args:
@@ -424,7 +436,7 @@ class Qtable:
424
436
  self.design.to_csv(filepaths["design"], sep="\t", index=True)
425
437
 
426
438
  @classmethod
427
- def load(cls, directory: str, basename: str) -> Qtable:
439
+ def load(cls, directory: str, basename: str) -> Self:
428
440
  """Load a qtable from disk by reading a data, design, and config file.
429
441
 
430
442
  Loading a qtable will first import the three files generated during saving, then
@@ -438,6 +450,11 @@ class Qtable:
438
450
 
439
451
  Returns:
440
452
  An instance of Qtable loaded from the specified files.
453
+
454
+ Raises:
455
+ ValueError: If the loaded config file does not contain the
456
+ "Unique ID column" key. This is due to the qtable being saved with a
457
+ version of msreport <= 0.0.27.
441
458
  """
442
459
  filepaths = _get_qtable_export_filepaths(directory, basename)
443
460
  with open(filepaths["config"]) as openfile:
@@ -458,13 +475,20 @@ class Qtable:
458
475
  filepaths["design"], sep="\t", index_col=0, keep_default_na=True
459
476
  )
460
477
 
461
- qtable = Qtable(data, design)
478
+ if "Unique ID column" not in config_data:
479
+ # Mention that the qtable was likely saved with a version of msreport <= 0.0.27
480
+ raise ValueError(
481
+ "The qtable config file does not contain the 'Unique ID column' key. "
482
+ "This is likely due to the qtable being saved with a version of "
483
+ "msreport <= 0.0.27."
484
+ )
485
+ id_column = config_data["Unique ID column"]
486
+
487
+ qtable = cls(data, design, id_column)
462
488
  qtable._expression_columns = config_data["Expression columns"]
463
489
  qtable._expression_features = config_data["Expression features"]
464
490
  qtable._expression_sample_mapping = config_data["Expression sample mapping"]
465
491
  # This check is required for backwards compatibility with msreport <= 0.0.27
466
- if "Unique ID column" in config_data:
467
- qtable._id_column = config_data["Unique ID column"]
468
492
  return qtable
469
493
 
470
494
  def to_tsv(self, path: str, index: bool = False):
@@ -476,11 +500,11 @@ class Qtable:
476
500
  )
477
501
  self.data.to_csv(path, sep="\t", index=index)
478
502
 
479
- def to_clipboard(self, index: bool = False):
503
+ def to_clipboard(self, index: bool = False) -> None:
480
504
  """Writes the data table to the system clipboard."""
481
505
  self.data.to_clipboard(sep="\t", index=index)
482
506
 
483
- def copy(self) -> Qtable:
507
+ def copy(self) -> Self:
484
508
  """Returns a copy of this Qtable instance."""
485
509
  return self.__copy__()
486
510
 
@@ -569,8 +593,8 @@ class Qtable:
569
593
  self._expression_features = []
570
594
  self._expression_sample_mapping = {}
571
595
 
572
- def __copy__(self) -> Qtable:
573
- new_instance = Qtable(self.data, self.design)
596
+ def __copy__(self) -> Self:
597
+ new_instance = type(self)(self.data, self.design, self.id_column)
574
598
  # Copy all private attributes
575
599
  for attr in dir(self):
576
600
  if (
@@ -599,7 +623,7 @@ def _match_samples_to_tag_columns(
599
623
  samples: Iterable[str],
600
624
  columns: Iterable[str],
601
625
  tag: str,
602
- ) -> dict:
626
+ ) -> dict[str, str]:
603
627
  """Mapping of samples to columns which contain the sample and the tag.
604
628
 
605
629
  Args:
@@ -622,7 +646,7 @@ def _match_samples_to_tag_columns(
622
646
  return mapping
623
647
 
624
648
 
625
- def _get_qtable_export_filepaths(directory: str, name: str):
649
+ def _get_qtable_export_filepaths(directory: str, name: str) -> dict[str, str]:
626
650
  """Returns a dictionary of standard filepaths for loading and saving a qtable."""
627
651
  filenames = {
628
652
  "data": f"{name}.data.tsv",