msreport 0.0.29__py3-none-any.whl → 0.0.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- msreport/__init__.py +1 -1
- msreport/aggregate/__init__.py +10 -0
- msreport/aggregate/condense.py +9 -0
- msreport/aggregate/pivot.py +14 -5
- msreport/aggregate/summarize.py +14 -4
- msreport/analyze.py +67 -5
- msreport/export.py +9 -15
- msreport/fasta.py +9 -2
- msreport/helper/__init__.py +18 -0
- msreport/impute.py +18 -10
- msreport/isobar.py +11 -14
- msreport/normalize.py +95 -10
- msreport/peptidoform.py +21 -11
- msreport/plot/__init__.py +3 -3
- msreport/plot/distribution.py +2 -1
- msreport/plot/quality.py +1 -1
- msreport/qtable.py +44 -20
- msreport/reader.py +321 -40
- msreport/rinterface/limma.py +1 -1
- {msreport-0.0.29.dist-info → msreport-0.0.31.dist-info}/METADATA +20 -2
- msreport-0.0.31.dist-info/RECORD +38 -0
- {msreport-0.0.29.dist-info → msreport-0.0.31.dist-info}/WHEEL +1 -1
- msreport-0.0.29.dist-info/RECORD +0 -38
- {msreport-0.0.29.dist-info → msreport-0.0.31.dist-info}/licenses/LICENSE.txt +0 -0
- {msreport-0.0.29.dist-info → msreport-0.0.31.dist-info}/top_level.txt +0 -0
msreport/peptidoform.py
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
|
+
"""Defines the `Peptide` class and associated utilities for handling peptidoforms.
|
|
2
|
+
|
|
3
|
+
This module provides a `Peptide` class for representing modified peptide sequences,
|
|
4
|
+
and their site localization probabilities. It offers methods to access and manipulate
|
|
5
|
+
peptide information, summarize isoform probabilities, and retrieve modification sites.
|
|
6
|
+
Additionally, it includes utility functions for parsing modified sequence strings and
|
|
7
|
+
converting site localization probabilities to and from a standardized string format.
|
|
8
|
+
"""
|
|
9
|
+
|
|
1
10
|
from collections import defaultdict as ddict
|
|
2
|
-
from typing import Optional
|
|
11
|
+
from typing import Optional
|
|
3
12
|
|
|
4
13
|
import numpy as np
|
|
5
14
|
|
|
@@ -10,7 +19,7 @@ class Peptide:
|
|
|
10
19
|
def __init__(
|
|
11
20
|
self,
|
|
12
21
|
modified_sequence: str,
|
|
13
|
-
localization_probabilities: Optional[dict] = None,
|
|
22
|
+
localization_probabilities: Optional[dict[str, dict[int, float]]] = None,
|
|
14
23
|
protein_position: Optional[int] = None,
|
|
15
24
|
):
|
|
16
25
|
plain_sequence, modifications = parse_modified_sequence(
|
|
@@ -28,7 +37,7 @@ class Peptide:
|
|
|
28
37
|
self.modification_positions[mod_tag].append(position)
|
|
29
38
|
self.modified_residues[position] = mod_tag
|
|
30
39
|
|
|
31
|
-
def make_modified_sequence(self, include: Optional[list] = None) -> str:
|
|
40
|
+
def make_modified_sequence(self, include: Optional[list[str]] = None) -> str:
|
|
32
41
|
"""Returns a modified sequence string.
|
|
33
42
|
|
|
34
43
|
Args:
|
|
@@ -55,7 +64,7 @@ class Peptide:
|
|
|
55
64
|
return 0
|
|
56
65
|
return len(self.modification_positions[modification])
|
|
57
66
|
|
|
58
|
-
def isoform_probability(self, modification: str) ->
|
|
67
|
+
def isoform_probability(self, modification: str) -> float | None:
|
|
59
68
|
"""Calculates the isoform probability for a given modification.
|
|
60
69
|
|
|
61
70
|
Returns:
|
|
@@ -66,12 +75,13 @@ class Peptide:
|
|
|
66
75
|
"""
|
|
67
76
|
probabilities = []
|
|
68
77
|
for site in self.list_modified_peptide_sites(modification):
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
78
|
+
probability = self.get_peptide_site_probability(site)
|
|
79
|
+
if probability is None:
|
|
80
|
+
return None
|
|
81
|
+
probabilities.append(probability)
|
|
72
82
|
return float(np.prod(probabilities))
|
|
73
83
|
|
|
74
|
-
def get_peptide_site_probability(self, position: int) ->
|
|
84
|
+
def get_peptide_site_probability(self, position: int) -> float | None:
|
|
75
85
|
"""Return the modification localization probability of the peptide position.
|
|
76
86
|
|
|
77
87
|
Args:
|
|
@@ -85,7 +95,7 @@ class Peptide:
|
|
|
85
95
|
"""
|
|
86
96
|
return self._get_site_probability(position, is_protein_position=False)
|
|
87
97
|
|
|
88
|
-
def get_protein_site_probability(self, position: int) ->
|
|
98
|
+
def get_protein_site_probability(self, position: int) -> float | None:
|
|
89
99
|
"""Return the modification localization probability of the protein position.
|
|
90
100
|
|
|
91
101
|
Args:
|
|
@@ -109,7 +119,7 @@ class Peptide:
|
|
|
109
119
|
|
|
110
120
|
def _get_site_probability(
|
|
111
121
|
self, position: int, is_protein_position: bool
|
|
112
|
-
) ->
|
|
122
|
+
) -> float | None:
|
|
113
123
|
"""Return the modification localization probability of the peptide position.
|
|
114
124
|
|
|
115
125
|
Args:
|
|
@@ -224,7 +234,7 @@ def modify_peptide(
|
|
|
224
234
|
|
|
225
235
|
|
|
226
236
|
def make_localization_string(
|
|
227
|
-
localization_probabilities: dict, decimal_places: int = 3
|
|
237
|
+
localization_probabilities: dict[str, dict[int, float]], decimal_places: int = 3
|
|
228
238
|
) -> str:
|
|
229
239
|
"""Generates a site localization probability string.
|
|
230
240
|
|
msreport/plot/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Plotting functions for visualizing proteomics data from `Qtable`.
|
|
2
2
|
|
|
3
3
|
The functions in this module generate a wide range of plots, including heatmaps, PCA
|
|
4
4
|
plots, volcano plots, and histograms, to analyze and compare expression values,
|
|
@@ -6,8 +6,8 @@ missingness, contaminants, and other features in proteomics datasets. The plots
|
|
|
6
6
|
designed to work with the Qtable class as input, which provides structured access to
|
|
7
7
|
proteomics data and experimental design information.
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
Users can customize plot styles via the `set_active_style` function, which allows
|
|
10
|
+
applying style sheets from the msreport library or those available in matplotlib.
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
13
|
from .comparison import expression_comparison, pvalue_histogram, volcano_ma
|
msreport/plot/distribution.py
CHANGED
|
@@ -204,7 +204,8 @@ def experiment_ratios(
|
|
|
204
204
|
mask = np.all([(qtable.data[f"Events {exp}"] > 0) for exp in experiments], axis=0)
|
|
205
205
|
if exclude_invalid:
|
|
206
206
|
mask = mask & qtable["Valid"]
|
|
207
|
-
|
|
207
|
+
# Use `mask.to_numpy` to solve issue with different indices of mask and dataframe
|
|
208
|
+
experiment_data = experiment_data[mask.to_numpy()]
|
|
208
209
|
pseudo_reference = np.nanmean(experiment_data, axis=1)
|
|
209
210
|
ratio_data = experiment_data.subtract(pseudo_reference, axis=0)
|
|
210
211
|
|
msreport/plot/quality.py
CHANGED
|
@@ -314,7 +314,7 @@ def sample_intensities(
|
|
|
314
314
|
|
|
315
315
|
@with_active_style
|
|
316
316
|
def sample_correlation(
|
|
317
|
-
qtable, exclude_invalid: bool = True, labels: bool = False
|
|
317
|
+
qtable: Qtable, exclude_invalid: bool = True, labels: bool = False
|
|
318
318
|
) -> tuple[plt.Figure, list[plt.Axes]]:
|
|
319
319
|
"""Generates a pair-wise correlation matrix of samples 'Expression' values.
|
|
320
320
|
|
msreport/qtable.py
CHANGED
|
@@ -1,14 +1,28 @@
|
|
|
1
|
-
|
|
1
|
+
"""Defines the `Qtable` class, the central container for quantitative proteomics data.
|
|
2
|
+
|
|
3
|
+
The `Qtable` class serves as the standardized data structure for `msreport`,
|
|
4
|
+
storing a main table with quantitative values and associated metadata for its entries;
|
|
5
|
+
it also maintains the name of the unique ID column for the main table. Additionally,
|
|
6
|
+
it stores an experimental design table that links sample names to experimental
|
|
7
|
+
conditions and replicate information.
|
|
8
|
+
|
|
9
|
+
`Qtable` provides convenience methods for creating subtables and accessing design
|
|
10
|
+
related information (e.g., samples per experiment), and instances of `Qtable` can be
|
|
11
|
+
easily saved to disk and loaded back. As the central data container, the `Qtable`
|
|
12
|
+
facilitates seamless integration with the high-level modules `analyze`, `plot` and
|
|
13
|
+
`export`, which all directly operate on `Qtable` instances.
|
|
14
|
+
"""
|
|
2
15
|
|
|
3
16
|
import copy
|
|
4
17
|
import os
|
|
5
18
|
import warnings
|
|
6
19
|
from contextlib import contextmanager
|
|
7
|
-
from typing import Any, Iterable, Optional
|
|
20
|
+
from typing import Any, Generator, Iterable, Optional
|
|
8
21
|
|
|
9
22
|
import numpy as np
|
|
10
23
|
import pandas as pd
|
|
11
24
|
import yaml
|
|
25
|
+
from typing_extensions import Self
|
|
12
26
|
|
|
13
27
|
import msreport.helper as helper
|
|
14
28
|
|
|
@@ -27,13 +41,11 @@ class Qtable:
|
|
|
27
41
|
design: A pandas.DataFrame describing the experimental design.
|
|
28
42
|
"""
|
|
29
43
|
|
|
30
|
-
_default_id_column = "Representative protein"
|
|
31
|
-
|
|
32
44
|
def __init__(
|
|
33
45
|
self,
|
|
34
46
|
data: pd.DataFrame,
|
|
35
|
-
design:
|
|
36
|
-
id_column: str
|
|
47
|
+
design: pd.DataFrame,
|
|
48
|
+
id_column: str,
|
|
37
49
|
):
|
|
38
50
|
"""Initializes the Qtable.
|
|
39
51
|
|
|
@@ -42,12 +54,13 @@ class Qtable:
|
|
|
42
54
|
|
|
43
55
|
Args:
|
|
44
56
|
data: A dataframe containing quantitative proteomics data in a wide format.
|
|
57
|
+
The index of the dataframe must contain unique values.
|
|
45
58
|
design: A dataframe describing the experimental design that must at least
|
|
46
59
|
contain the columns "Sample" and "Experiment". The "Sample" entries
|
|
47
60
|
should correspond to the Sample names present in the quantitative
|
|
48
61
|
columns of the data.
|
|
49
62
|
id_column: The name of the column that contains the unique identifiers for
|
|
50
|
-
the entries in the data table.
|
|
63
|
+
the entries in the data table.
|
|
51
64
|
|
|
52
65
|
Raises:
|
|
53
66
|
KeyError: If the specified id_column is not found in data.
|
|
@@ -76,8 +89,7 @@ class Qtable:
|
|
|
76
89
|
self._id_column = id_column
|
|
77
90
|
if "Valid" not in self.data.columns:
|
|
78
91
|
self.data["Valid"] = True
|
|
79
|
-
|
|
80
|
-
self.add_design(design)
|
|
92
|
+
self.add_design(design)
|
|
81
93
|
|
|
82
94
|
self._expression_columns: list[str] = []
|
|
83
95
|
self._expression_features: list[str] = []
|
|
@@ -361,7 +373,7 @@ class Qtable:
|
|
|
361
373
|
keep_experiments: Optional[Iterable[str]] = None,
|
|
362
374
|
exclude_samples: Optional[Iterable[str]] = None,
|
|
363
375
|
keep_samples: Optional[Iterable[str]] = None,
|
|
364
|
-
):
|
|
376
|
+
) -> Generator[None, None, None]:
|
|
365
377
|
"""Context manager to temporarily modify the design table.
|
|
366
378
|
|
|
367
379
|
Args:
|
|
@@ -424,7 +436,7 @@ class Qtable:
|
|
|
424
436
|
self.design.to_csv(filepaths["design"], sep="\t", index=True)
|
|
425
437
|
|
|
426
438
|
@classmethod
|
|
427
|
-
def load(cls, directory: str, basename: str) ->
|
|
439
|
+
def load(cls, directory: str, basename: str) -> Self:
|
|
428
440
|
"""Load a qtable from disk by reading a data, design, and config file.
|
|
429
441
|
|
|
430
442
|
Loading a qtable will first import the three files generated during saving, then
|
|
@@ -438,6 +450,11 @@ class Qtable:
|
|
|
438
450
|
|
|
439
451
|
Returns:
|
|
440
452
|
An instance of Qtable loaded from the specified files.
|
|
453
|
+
|
|
454
|
+
Raises:
|
|
455
|
+
ValueError: If the loaded config file does not contain the
|
|
456
|
+
"Unique ID column" key. This is due to the qtable being saved with a
|
|
457
|
+
version of msreport <= 0.0.27.
|
|
441
458
|
"""
|
|
442
459
|
filepaths = _get_qtable_export_filepaths(directory, basename)
|
|
443
460
|
with open(filepaths["config"]) as openfile:
|
|
@@ -458,13 +475,20 @@ class Qtable:
|
|
|
458
475
|
filepaths["design"], sep="\t", index_col=0, keep_default_na=True
|
|
459
476
|
)
|
|
460
477
|
|
|
461
|
-
|
|
478
|
+
if "Unique ID column" not in config_data:
|
|
479
|
+
# Mention that the qtable was likely saved with a version of msreport <= 0.0.27
|
|
480
|
+
raise ValueError(
|
|
481
|
+
"The qtable config file does not contain the 'Unique ID column' key. "
|
|
482
|
+
"This is likely due to the qtable being saved with a version of "
|
|
483
|
+
"msreport <= 0.0.27."
|
|
484
|
+
)
|
|
485
|
+
id_column = config_data["Unique ID column"]
|
|
486
|
+
|
|
487
|
+
qtable = cls(data, design, id_column)
|
|
462
488
|
qtable._expression_columns = config_data["Expression columns"]
|
|
463
489
|
qtable._expression_features = config_data["Expression features"]
|
|
464
490
|
qtable._expression_sample_mapping = config_data["Expression sample mapping"]
|
|
465
491
|
# This check is required for backwards compatibility with msreport <= 0.0.27
|
|
466
|
-
if "Unique ID column" in config_data:
|
|
467
|
-
qtable._id_column = config_data["Unique ID column"]
|
|
468
492
|
return qtable
|
|
469
493
|
|
|
470
494
|
def to_tsv(self, path: str, index: bool = False):
|
|
@@ -476,11 +500,11 @@ class Qtable:
|
|
|
476
500
|
)
|
|
477
501
|
self.data.to_csv(path, sep="\t", index=index)
|
|
478
502
|
|
|
479
|
-
def to_clipboard(self, index: bool = False):
|
|
503
|
+
def to_clipboard(self, index: bool = False) -> None:
|
|
480
504
|
"""Writes the data table to the system clipboard."""
|
|
481
505
|
self.data.to_clipboard(sep="\t", index=index)
|
|
482
506
|
|
|
483
|
-
def copy(self) ->
|
|
507
|
+
def copy(self) -> Self:
|
|
484
508
|
"""Returns a copy of this Qtable instance."""
|
|
485
509
|
return self.__copy__()
|
|
486
510
|
|
|
@@ -569,8 +593,8 @@ class Qtable:
|
|
|
569
593
|
self._expression_features = []
|
|
570
594
|
self._expression_sample_mapping = {}
|
|
571
595
|
|
|
572
|
-
def __copy__(self) ->
|
|
573
|
-
new_instance =
|
|
596
|
+
def __copy__(self) -> Self:
|
|
597
|
+
new_instance = type(self)(self.data, self.design, self.id_column)
|
|
574
598
|
# Copy all private attributes
|
|
575
599
|
for attr in dir(self):
|
|
576
600
|
if (
|
|
@@ -599,7 +623,7 @@ def _match_samples_to_tag_columns(
|
|
|
599
623
|
samples: Iterable[str],
|
|
600
624
|
columns: Iterable[str],
|
|
601
625
|
tag: str,
|
|
602
|
-
) -> dict:
|
|
626
|
+
) -> dict[str, str]:
|
|
603
627
|
"""Mapping of samples to columns which contain the sample and the tag.
|
|
604
628
|
|
|
605
629
|
Args:
|
|
@@ -622,7 +646,7 @@ def _match_samples_to_tag_columns(
|
|
|
622
646
|
return mapping
|
|
623
647
|
|
|
624
648
|
|
|
625
|
-
def _get_qtable_export_filepaths(directory: str, name: str):
|
|
649
|
+
def _get_qtable_export_filepaths(directory: str, name: str) -> dict[str, str]:
|
|
626
650
|
"""Returns a dictionary of standard filepaths for loading and saving a qtable."""
|
|
627
651
|
filenames = {
|
|
628
652
|
"data": f"{name}.data.tsv",
|