seer-pas-sdk 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- seer_pas_sdk/auth/__init__.py +1 -77
- seer_pas_sdk/auth/auth.py +99 -0
- seer_pas_sdk/common/__init__.py +370 -72
- seer_pas_sdk/common/errors.py +5 -0
- seer_pas_sdk/common/groupanalysis.py +55 -0
- seer_pas_sdk/core/__init__.py +1 -1307
- seer_pas_sdk/core/sdk.py +2632 -0
- seer_pas_sdk/core/unsupported.py +1634 -0
- seer_pas_sdk/objects/__init__.py +3 -129
- seer_pas_sdk/objects/groupanalysis.py +30 -0
- seer_pas_sdk/objects/platemap.py +174 -0
- seer_pas_sdk/objects/volcanoplot.py +290 -0
- seer_pas_sdk-0.2.1.dist-info/METADATA +230 -0
- seer_pas_sdk-0.2.1.dist-info/RECORD +18 -0
- {seer_pas_sdk-0.1.2.dist-info → seer_pas_sdk-0.2.1.dist-info}/WHEEL +1 -1
- {seer_pas_sdk-0.1.2.dist-info → seer_pas_sdk-0.2.1.dist-info}/top_level.txt +0 -1
- seer_pas_sdk-0.1.2.dist-info/METADATA +0 -50
- seer_pas_sdk-0.1.2.dist-info/RECORD +0 -16
- tests/__init__.py +0 -0
- tests/conftest.py +0 -17
- tests/test_auth.py +0 -48
- tests/test_common.py +0 -99
- tests/test_objects.py +0 -91
- tests/test_sdk.py +0 -11
- {seer_pas_sdk-0.1.2.dist-info → seer_pas_sdk-0.2.1.dist-info/licenses}/LICENSE.txt +0 -0
seer_pas_sdk/objects/__init__.py
CHANGED
|
@@ -1,129 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class PlateMap:
|
|
5
|
-
"""
|
|
6
|
-
Plate map object containing information about samples and corresponding MS data files.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
def __init__(
|
|
10
|
-
self,
|
|
11
|
-
ms_file_name=None,
|
|
12
|
-
sample_name=None,
|
|
13
|
-
sample_id=None,
|
|
14
|
-
well_location=None,
|
|
15
|
-
nanoparticle=None,
|
|
16
|
-
nanoparticle_id=None,
|
|
17
|
-
control=None,
|
|
18
|
-
control_id=None,
|
|
19
|
-
instrument_name=None,
|
|
20
|
-
date_sample_preparation=None,
|
|
21
|
-
sample_volume=None,
|
|
22
|
-
peptide_concentration=None,
|
|
23
|
-
peptide_mass_sample=None,
|
|
24
|
-
dilution_factor=None,
|
|
25
|
-
kit_id=None,
|
|
26
|
-
plate_id=None,
|
|
27
|
-
plate_name=None,
|
|
28
|
-
):
|
|
29
|
-
|
|
30
|
-
if not ms_file_name:
|
|
31
|
-
raise ValueError("MS file name(s) must be provided.")
|
|
32
|
-
|
|
33
|
-
self.ms_file_name = ms_file_name
|
|
34
|
-
self.length = len(ms_file_name)
|
|
35
|
-
|
|
36
|
-
self.sample_name = sample_name
|
|
37
|
-
self.sample_id = sample_id
|
|
38
|
-
self.well_location = well_location
|
|
39
|
-
self.nanoparticle = nanoparticle
|
|
40
|
-
self.nanoparticle_id = nanoparticle_id
|
|
41
|
-
self.control = control
|
|
42
|
-
self.control_id = control_id
|
|
43
|
-
self.instrument_name = instrument_name
|
|
44
|
-
self.date_sample_preparation = date_sample_preparation
|
|
45
|
-
self.sample_volume = sample_volume
|
|
46
|
-
self.peptide_concentration = peptide_concentration
|
|
47
|
-
self.peptide_mass_sample = peptide_mass_sample
|
|
48
|
-
self.dilution_factor = dilution_factor
|
|
49
|
-
self.kit_id = kit_id
|
|
50
|
-
self.plate_id = plate_id
|
|
51
|
-
self.plate_name = plate_name
|
|
52
|
-
|
|
53
|
-
self.__cols = [
|
|
54
|
-
"MS file name",
|
|
55
|
-
"Sample name",
|
|
56
|
-
"Sample ID",
|
|
57
|
-
"Well location",
|
|
58
|
-
"Nanoparticle",
|
|
59
|
-
"Nanoparticle ID",
|
|
60
|
-
"Control",
|
|
61
|
-
"Control ID",
|
|
62
|
-
"Instrument name",
|
|
63
|
-
"Date sample preparation",
|
|
64
|
-
"Sample volume",
|
|
65
|
-
"Peptide concentration",
|
|
66
|
-
"Peptide mass sample",
|
|
67
|
-
"Dilution factor",
|
|
68
|
-
"Kit ID",
|
|
69
|
-
"Plate ID",
|
|
70
|
-
"Plate Name",
|
|
71
|
-
]
|
|
72
|
-
|
|
73
|
-
self.__attrs = [
|
|
74
|
-
"ms_file_name",
|
|
75
|
-
"sample_name",
|
|
76
|
-
"sample_id",
|
|
77
|
-
"well_location",
|
|
78
|
-
"nanoparticle",
|
|
79
|
-
"nanoparticle_id",
|
|
80
|
-
"control",
|
|
81
|
-
"control_id",
|
|
82
|
-
"instrument_name",
|
|
83
|
-
"date_sample_preparation",
|
|
84
|
-
"sample_volume",
|
|
85
|
-
"peptide_concentration",
|
|
86
|
-
"peptide_mass_sample",
|
|
87
|
-
"dilution_factor",
|
|
88
|
-
"kit_id",
|
|
89
|
-
"plate_id",
|
|
90
|
-
"plate_name",
|
|
91
|
-
]
|
|
92
|
-
|
|
93
|
-
for attr in self.__attrs:
|
|
94
|
-
if not getattr(self, attr):
|
|
95
|
-
# Replace falsey values with empty lists
|
|
96
|
-
setattr(self, attr, [])
|
|
97
|
-
|
|
98
|
-
attr_len = len(getattr(self, attr))
|
|
99
|
-
|
|
100
|
-
if attr_len > self.length:
|
|
101
|
-
raise ValueError(
|
|
102
|
-
"Parameter lengths must not exceed the number of MS files."
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
elif attr_len < self.length:
|
|
106
|
-
for i in range(self.length - attr_len):
|
|
107
|
-
getattr(self, attr).append(None)
|
|
108
|
-
|
|
109
|
-
def to_dict(self):
|
|
110
|
-
res = {}
|
|
111
|
-
|
|
112
|
-
for i in range(len(self.__attrs)):
|
|
113
|
-
res[self.__cols[i]] = getattr(self, self.__attrs[i])
|
|
114
|
-
|
|
115
|
-
for entry in res:
|
|
116
|
-
res[entry] = {i: res[entry][i] for i in range(len(res[entry]))}
|
|
117
|
-
|
|
118
|
-
return res
|
|
119
|
-
|
|
120
|
-
def to_df(self):
|
|
121
|
-
return pd.DataFrame(self.to_dict())
|
|
122
|
-
|
|
123
|
-
def to_csv(self, path=None):
|
|
124
|
-
if not path:
|
|
125
|
-
return self.to_df().to_csv(index=False)
|
|
126
|
-
return self.to_df().to_csv(path_or_buf=path, index=False)
|
|
127
|
-
|
|
128
|
-
def __repr__(self):
|
|
129
|
-
return str(self.to_dict())
|
|
1
|
+
from .platemap import PlateMap
|
|
2
|
+
from .groupanalysis import GroupAnalysisPostData
|
|
3
|
+
from .volcanoplot import *
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Filter group analysis data for only the POST
|
|
2
|
+
class GroupAnalysisPostData:
|
|
3
|
+
"""DTO for Group Analysis Saved Results"""
|
|
4
|
+
|
|
5
|
+
def __init__(self, data):
|
|
6
|
+
if "post" not in data:
|
|
7
|
+
raise ValueError('Invalid data format. Missing "post" key')
|
|
8
|
+
|
|
9
|
+
# Safe check if there are proteins vs peptides
|
|
10
|
+
num_proteins = (
|
|
11
|
+
data["post"]
|
|
12
|
+
.get("protein", {"totalFeature": 0})
|
|
13
|
+
.get("totalFeature", 0)
|
|
14
|
+
)
|
|
15
|
+
num_peptides = (
|
|
16
|
+
data["post"]
|
|
17
|
+
.get("peptide", {"totalFeature": 0})
|
|
18
|
+
.get("totalFeature", 0)
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
if num_proteins > 0:
|
|
22
|
+
self.type = "protein"
|
|
23
|
+
elif num_peptides > 0:
|
|
24
|
+
self.type = "peptide"
|
|
25
|
+
else:
|
|
26
|
+
raise ValueError(
|
|
27
|
+
"Invalid data format. No features found in post data"
|
|
28
|
+
)
|
|
29
|
+
self.data = data["post"][self.type]["mergedStats"]
|
|
30
|
+
self.stat_test = data["post"][self.type]["parameters"]["statTest"]
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class PlateMap:
|
|
5
|
+
"""
|
|
6
|
+
Plate map object containing information about samples and corresponding MS data files.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
def __init__(
|
|
10
|
+
self,
|
|
11
|
+
ms_file_name=None,
|
|
12
|
+
sample_name=None,
|
|
13
|
+
sample_id=None,
|
|
14
|
+
well_location=None,
|
|
15
|
+
nanoparticle=None,
|
|
16
|
+
nanoparticle_id=None,
|
|
17
|
+
control=None,
|
|
18
|
+
control_id=None,
|
|
19
|
+
instrument_name=None,
|
|
20
|
+
date_sample_preparation=None,
|
|
21
|
+
sample_volume=None,
|
|
22
|
+
peptide_concentration=None,
|
|
23
|
+
peptide_mass_sample=None,
|
|
24
|
+
recon_volume=None,
|
|
25
|
+
dilution_factor=None,
|
|
26
|
+
kit_id=None,
|
|
27
|
+
plate_id=None,
|
|
28
|
+
plate_name=None,
|
|
29
|
+
assay_version=None,
|
|
30
|
+
sample_tube_id=None,
|
|
31
|
+
method_set_id=None,
|
|
32
|
+
assay_method_id=None,
|
|
33
|
+
product="XT",
|
|
34
|
+
):
|
|
35
|
+
|
|
36
|
+
if not ms_file_name:
|
|
37
|
+
raise ValueError("MS file name(s) must be provided.")
|
|
38
|
+
|
|
39
|
+
self.ms_file_name = ms_file_name
|
|
40
|
+
self.length = len(ms_file_name)
|
|
41
|
+
|
|
42
|
+
self.sample_name = sample_name
|
|
43
|
+
self.sample_id = sample_id
|
|
44
|
+
self.well_location = well_location
|
|
45
|
+
self.nanoparticle = nanoparticle
|
|
46
|
+
self.nanoparticle_id = nanoparticle_id
|
|
47
|
+
self.control = control
|
|
48
|
+
self.control_id = control_id
|
|
49
|
+
self.instrument_name = instrument_name
|
|
50
|
+
self.date_sample_preparation = date_sample_preparation
|
|
51
|
+
self.sample_volume = sample_volume
|
|
52
|
+
self.peptide_concentration = peptide_concentration
|
|
53
|
+
self.peptide_mass_sample = peptide_mass_sample
|
|
54
|
+
self.recon_volume = recon_volume
|
|
55
|
+
self.dilution_factor = dilution_factor
|
|
56
|
+
self.kit_id = kit_id
|
|
57
|
+
self.plate_id = plate_id
|
|
58
|
+
self.plate_name = plate_name
|
|
59
|
+
self.assay_version = assay_version
|
|
60
|
+
self.sample_tube_id = sample_tube_id
|
|
61
|
+
self.method_set_id = method_set_id
|
|
62
|
+
self.assay_method_id = assay_method_id
|
|
63
|
+
self.product = product
|
|
64
|
+
|
|
65
|
+
if self.product == "XT":
|
|
66
|
+
self.__map = {
|
|
67
|
+
"ms_file_name": "MS file name",
|
|
68
|
+
"sample_name": "Sample name",
|
|
69
|
+
"sample_id": "Sample ID",
|
|
70
|
+
"well_location": "Well location",
|
|
71
|
+
"nanoparticle": "Nanoparticle",
|
|
72
|
+
"nanoparticle_id": "Nanoparticle ID",
|
|
73
|
+
"control": "Control",
|
|
74
|
+
"control_id": "Control ID",
|
|
75
|
+
"instrument_name": "Instrument name",
|
|
76
|
+
"date_sample_preparation": "Date sample preparation",
|
|
77
|
+
"sample_volume": "Sample volume",
|
|
78
|
+
"peptide_concentration": "Peptide concentration",
|
|
79
|
+
"peptide_mass_sample": "Peptide mass sample",
|
|
80
|
+
"recon_volume": "Recon volume",
|
|
81
|
+
"dilution_factor": "Dilution factor",
|
|
82
|
+
"kit_id": "Kit ID",
|
|
83
|
+
"plate_id": "Plate ID",
|
|
84
|
+
"plate_name": "Plate Name",
|
|
85
|
+
"assay_version": "Assay",
|
|
86
|
+
}
|
|
87
|
+
else:
|
|
88
|
+
self.__map = {
|
|
89
|
+
"ms_file_name": "MS file name",
|
|
90
|
+
"sample_name": "Sample name",
|
|
91
|
+
"sample_id": "Sample ID",
|
|
92
|
+
"well_location": "Well location",
|
|
93
|
+
"nanoparticle": "Nanoparticle set",
|
|
94
|
+
"nanoparticle_id": "Nanoparticle set ID",
|
|
95
|
+
"control_id": "Control ID",
|
|
96
|
+
"instrument_name": "Instrument ID",
|
|
97
|
+
"date_sample_preparation": "Date assay initiated",
|
|
98
|
+
"sample_volume": "Sample volume",
|
|
99
|
+
"peptide_concentration": "Reconstituted peptide concentration",
|
|
100
|
+
"peptide_mass_sample": "Recovered peptide mass",
|
|
101
|
+
"recon_volume": "Reconstitution volume",
|
|
102
|
+
"plate_id": "Plate ID",
|
|
103
|
+
"plate_name": "Plate Name",
|
|
104
|
+
"assay_version": "Assay product",
|
|
105
|
+
"sample_tube_id": "Sample tube ID",
|
|
106
|
+
"method_set_id": "Method set ID",
|
|
107
|
+
"assay_method_id": "Assay method ID",
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
self.__attrs = [
|
|
111
|
+
"ms_file_name",
|
|
112
|
+
"sample_name",
|
|
113
|
+
"sample_id",
|
|
114
|
+
"well_location",
|
|
115
|
+
"nanoparticle",
|
|
116
|
+
"nanoparticle_id",
|
|
117
|
+
"control",
|
|
118
|
+
"control_id",
|
|
119
|
+
"instrument_name",
|
|
120
|
+
"date_sample_preparation",
|
|
121
|
+
"sample_volume",
|
|
122
|
+
"peptide_concentration",
|
|
123
|
+
"peptide_mass_sample",
|
|
124
|
+
"recon_volume",
|
|
125
|
+
"dilution_factor",
|
|
126
|
+
"kit_id",
|
|
127
|
+
"plate_id",
|
|
128
|
+
"plate_name",
|
|
129
|
+
"assay_version",
|
|
130
|
+
"sample_tube_id",
|
|
131
|
+
"method_set_id",
|
|
132
|
+
"assay_method_id",
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
for attr in self.__attrs:
|
|
136
|
+
if not getattr(self, attr):
|
|
137
|
+
# Replace falsey values with empty lists
|
|
138
|
+
setattr(self, attr, [])
|
|
139
|
+
|
|
140
|
+
attr_len = len(getattr(self, attr))
|
|
141
|
+
|
|
142
|
+
if attr_len > self.length:
|
|
143
|
+
raise ValueError(
|
|
144
|
+
"Parameter lengths must not exceed the number of MS files."
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
elif attr_len < self.length:
|
|
148
|
+
for i in range(self.length - attr_len):
|
|
149
|
+
getattr(self, attr).append(None)
|
|
150
|
+
|
|
151
|
+
def to_dict(self):
|
|
152
|
+
res = {}
|
|
153
|
+
|
|
154
|
+
supported_cols = self.__map.keys()
|
|
155
|
+
|
|
156
|
+
for attr in self.__attrs:
|
|
157
|
+
if attr in supported_cols:
|
|
158
|
+
res[self.__map[attr]] = getattr(self, attr)
|
|
159
|
+
|
|
160
|
+
for entry in res:
|
|
161
|
+
res[entry] = {i: res[entry][i] for i in range(len(res[entry]))}
|
|
162
|
+
|
|
163
|
+
return res
|
|
164
|
+
|
|
165
|
+
def to_df(self):
|
|
166
|
+
return pd.DataFrame(self.to_dict())
|
|
167
|
+
|
|
168
|
+
def to_csv(self, path=None):
|
|
169
|
+
if not path:
|
|
170
|
+
return self.to_df().to_csv(index=False)
|
|
171
|
+
return self.to_df().to_csv(path_or_buf=path, index=False)
|
|
172
|
+
|
|
173
|
+
def __repr__(self):
|
|
174
|
+
return str(self.to_dict())
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import json
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from typing import List as _List, Dict as _Dict
|
|
5
|
+
from .groupanalysis import GroupAnalysisPostData
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class VolcanoPlotSettings:
|
|
9
|
+
"""Class to hold the settings information for the Volcano Plot"""
|
|
10
|
+
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
significance_threshold: float = 0.05,
|
|
14
|
+
fold_change_threshold: float = 1,
|
|
15
|
+
label_by: str = "fold_change",
|
|
16
|
+
):
|
|
17
|
+
"""Initialize the VolcanoPlotSettings object
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
significance_threshold (float, optional): Cutoff value for the p-value to determine significance. Defaults to 0.05.
|
|
21
|
+
fold_change_threshold (float, optional): Cutoff value for the fold change to determine significance. Defaults to 1.
|
|
22
|
+
label_by (str, optional): Metric to sort result data. Defaults to "fold_change".
|
|
23
|
+
|
|
24
|
+
Raises:
|
|
25
|
+
ValueError: "Invalid label_by value, must be one of ['euclidean', 'fold_change', 'significance']"
|
|
26
|
+
"""
|
|
27
|
+
if label_by not in ["euclidean", "fold_change", "significance"]:
|
|
28
|
+
raise ValueError(
|
|
29
|
+
"Invalid label_by value, must be one of ['euclidean', 'fold_change', 'significance']"
|
|
30
|
+
)
|
|
31
|
+
self.significance_threshold = significance_threshold
|
|
32
|
+
self.fold_change_threshold = fold_change_threshold
|
|
33
|
+
self.label_by = label_by
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def values(self):
|
|
37
|
+
return {
|
|
38
|
+
"significance_threshold": self.significance_threshold,
|
|
39
|
+
"fold_change_threshold": self.fold_change_threshold,
|
|
40
|
+
"label_by": self.label_by,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def get_settings(cls):
|
|
45
|
+
return [
|
|
46
|
+
"significance_threshold",
|
|
47
|
+
"fold_change_threshold",
|
|
48
|
+
"label_by",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def get_label_by_map(cls):
|
|
53
|
+
return dict(
|
|
54
|
+
euclidean="euclideanDistance",
|
|
55
|
+
fold_change="logFD",
|
|
56
|
+
significance="negativeLog10P",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class VolcanoPlotBuilder:
|
|
61
|
+
"""
|
|
62
|
+
Builder class for the Volcano Plot
|
|
63
|
+
Can be used to reuse the same GroupAnalysisResults data to build multiple Volcano Plots with different settings.
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
PROTEIN_GROUP_INDEX = "pg"
|
|
68
|
+
PEPTIDE_INDEX = "peptide"
|
|
69
|
+
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
data: _List[_Dict],
|
|
73
|
+
significance_threshold: float = 0.05,
|
|
74
|
+
fold_change_threshold: float = 1,
|
|
75
|
+
label_by: str = "fold_change",
|
|
76
|
+
):
|
|
77
|
+
"""Initialize the VolcanoPlotBuilder object
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
data (list[dict]): The complete set of group analysis result data
|
|
81
|
+
significance_threshold (float, optional): Cutoff value for the p-value to determine significance. Defaults to 0.05.
|
|
82
|
+
fold_change_threshold (float, optional): Cutoff value for the fold change to determine significance. Defaults to 1.
|
|
83
|
+
label_by (str, optional): Metric to sort result data. Defaults to "fold_change".
|
|
84
|
+
|
|
85
|
+
Raises:
|
|
86
|
+
ValueError: "Invalid label_by value, must be one of ['euclidean', 'fold_change', 'significance']"
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
None
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
self.settings = VolcanoPlotSettings(
|
|
93
|
+
significance_threshold=significance_threshold,
|
|
94
|
+
fold_change_threshold=fold_change_threshold,
|
|
95
|
+
label_by=label_by,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
parsed_data = GroupAnalysisPostData(data)
|
|
99
|
+
|
|
100
|
+
self.type = parsed_data.type
|
|
101
|
+
self.stat_test = parsed_data.stat_test
|
|
102
|
+
self.data = parsed_data.data
|
|
103
|
+
self.minusLog10PSigValue = -np.log10(
|
|
104
|
+
self.settings.significance_threshold
|
|
105
|
+
)
|
|
106
|
+
self.sort_param = VolcanoPlotSettings.get_label_by_map()[
|
|
107
|
+
self.settings.label_by
|
|
108
|
+
]
|
|
109
|
+
self.max_logFD, self.max_negative_log10_p = self._get_max_values(
|
|
110
|
+
self.data
|
|
111
|
+
)
|
|
112
|
+
self.protein_gene_map = dict()
|
|
113
|
+
self.feature_type_index = (
|
|
114
|
+
self.PROTEIN_GROUP_INDEX
|
|
115
|
+
if self.type == "protein"
|
|
116
|
+
else self.PEPTIDE_INDEX
|
|
117
|
+
)
|
|
118
|
+
self.volcano_plot = self.build()
|
|
119
|
+
|
|
120
|
+
def build(self):
|
|
121
|
+
"""Build the volcano plot
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
list[dict]: sorted volcano plot data
|
|
125
|
+
"""
|
|
126
|
+
result = []
|
|
127
|
+
for i, row in enumerate(self.data):
|
|
128
|
+
result.append(self.build_row(i, row))
|
|
129
|
+
sorted_result = sorted(
|
|
130
|
+
result,
|
|
131
|
+
key=lambda x: (
|
|
132
|
+
x[self.sort_param]
|
|
133
|
+
if self.sort_param != "logFD"
|
|
134
|
+
else np.abs(x[self.sort_param])
|
|
135
|
+
),
|
|
136
|
+
reverse=True,
|
|
137
|
+
)
|
|
138
|
+
return sorted_result
|
|
139
|
+
|
|
140
|
+
def build_row(self, i, data):
|
|
141
|
+
"""Build a row for the volcano plot
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
i (int): The index of the row
|
|
145
|
+
data (dict): a group analysis result entry
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
dict: The row data
|
|
149
|
+
"""
|
|
150
|
+
self.protein_gene_map[data[self.feature_type_index]] = data["gene"]
|
|
151
|
+
|
|
152
|
+
row = dict(
|
|
153
|
+
logFD=data["logFD"],
|
|
154
|
+
negativeLog10P=data["negativeLog10P"],
|
|
155
|
+
dataIndex=i,
|
|
156
|
+
rowID=json.dumps(data),
|
|
157
|
+
gene=data["gene"],
|
|
158
|
+
group=self.get_contrast_group_string(data),
|
|
159
|
+
significant=self.get_significance_class(data),
|
|
160
|
+
euclideanDistance=self.calculate_euclidean_distance(
|
|
161
|
+
data["logFD"] / self.max_logFD,
|
|
162
|
+
data["negativeLog10P"] / self.max_negative_log10_p,
|
|
163
|
+
),
|
|
164
|
+
)
|
|
165
|
+
row[self.type] = data[self.feature_type_index]
|
|
166
|
+
return row
|
|
167
|
+
|
|
168
|
+
def is_significant_point(self, data):
|
|
169
|
+
return (
|
|
170
|
+
data["negativeLog10P"] >= self.minusLog10PSigValue
|
|
171
|
+
and np.abs(data["logFD"]) >= self.settings.fold_change_threshold
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
def get_significance_class(self, data):
|
|
175
|
+
"""Get the significance class
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
data (dict): the row data
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
int: 0 if not significant, 1 if (logFD >= 1), -1 if (logFD <= -1)
|
|
182
|
+
"""
|
|
183
|
+
if not self.is_significant_point(data):
|
|
184
|
+
return 0
|
|
185
|
+
elif data["logFD"] >= 1:
|
|
186
|
+
return 1
|
|
187
|
+
elif data["logFD"] <= -1:
|
|
188
|
+
return -1
|
|
189
|
+
|
|
190
|
+
def get_contrast_group_string(self, obj):
|
|
191
|
+
"""Get the contrast group string
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
obj (dict): The row data
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
str: The contrast group string
|
|
198
|
+
"""
|
|
199
|
+
if (
|
|
200
|
+
obj
|
|
201
|
+
and obj.get("contrastGroup", None)
|
|
202
|
+
and obj["contrastGroup"].get("G1", None)
|
|
203
|
+
and obj["contrastGroup"].get("G2", None)
|
|
204
|
+
):
|
|
205
|
+
return "/".join(
|
|
206
|
+
[obj["contrastGroup"]["G1"], obj["contrastGroup"]["G2"]]
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
def calculate_euclidean_distance(self, x, y):
|
|
210
|
+
"""Calculate the euclidean distance
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
x (float): The x value
|
|
214
|
+
y (float): The y value
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
float: The euclidean distance
|
|
218
|
+
"""
|
|
219
|
+
return np.sqrt(x**2 + y**2)
|
|
220
|
+
|
|
221
|
+
def _get_max_values(self, data):
|
|
222
|
+
"""For euclidean distance, get the max logFD and negativeLog10P values to normalize the data
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
data (list[dict]): The complete set of group analysis result data
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
tuple: The max logFD and negativeLog10P values.
|
|
229
|
+
"""
|
|
230
|
+
max_logFD = -np.inf
|
|
231
|
+
max_negative_log10_p = -np.inf
|
|
232
|
+
for row in data:
|
|
233
|
+
max_logFD = max(max_logFD, row["logFD"])
|
|
234
|
+
max_negative_log10_p = max(
|
|
235
|
+
max_negative_log10_p, row["negativeLog10P"]
|
|
236
|
+
)
|
|
237
|
+
return max_logFD, max_negative_log10_p
|
|
238
|
+
|
|
239
|
+
def update(
|
|
240
|
+
self,
|
|
241
|
+
significance_threshold=None,
|
|
242
|
+
fold_change_threshold=None,
|
|
243
|
+
label_by=None,
|
|
244
|
+
):
|
|
245
|
+
"""Updates the settings and recalculates the volcano plot
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
significance_threshold (float, optional): Cutoff value for the p-value to determine significance
|
|
249
|
+
fold_change_threshold (float, optional): Cutoff value for the fold change to determine significance
|
|
250
|
+
label_by (str, optional): Metric to sort result data
|
|
251
|
+
|
|
252
|
+
Raises:
|
|
253
|
+
ValueError: "Invalid label_by value, must be one of ['euclidean', 'fold_change', 'significance']"
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
None
|
|
257
|
+
"""
|
|
258
|
+
if not significance_threshold:
|
|
259
|
+
significance_threshold = self.settings.significance_threshold
|
|
260
|
+
if not fold_change_threshold:
|
|
261
|
+
fold_change_threshold = self.settings.fold_change_threshold
|
|
262
|
+
if not label_by:
|
|
263
|
+
label_by = self.settings.label_by
|
|
264
|
+
|
|
265
|
+
self.settings = VolcanoPlotSettings(
|
|
266
|
+
significance_threshold=significance_threshold,
|
|
267
|
+
fold_change_threshold=fold_change_threshold,
|
|
268
|
+
label_by=label_by,
|
|
269
|
+
)
|
|
270
|
+
self.minusLog10PSigValue = -np.log10(
|
|
271
|
+
self.settings.significance_threshold
|
|
272
|
+
)
|
|
273
|
+
self.sort_param = VolcanoPlotSettings.get_label_by_map()[
|
|
274
|
+
self.settings.label_by
|
|
275
|
+
]
|
|
276
|
+
self.volcano_plot = self.build()
|
|
277
|
+
|
|
278
|
+
def to_df(self):
|
|
279
|
+
"""Convert the volcano plot data to a DataFrame"""
|
|
280
|
+
return pd.DataFrame(self.volcano_plot)
|
|
281
|
+
|
|
282
|
+
def get_significant_rows(self):
|
|
283
|
+
"""Get the significant proteins
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
List: The list of significant proteins
|
|
287
|
+
"""
|
|
288
|
+
return [
|
|
289
|
+
row[self.type] for row in self.volcano_plot if row["significant"]
|
|
290
|
+
]
|