python-katlas 0.1.4__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_katlas-0.1.4 → python_katlas-0.2.0}/LICENSE +0 -0
- {python_katlas-0.1.4 → python_katlas-0.2.0}/MANIFEST.in +0 -0
- {python_katlas-0.1.4/python_katlas.egg-info → python_katlas-0.2.0}/PKG-INFO +246 -127
- {python_katlas-0.1.4 → python_katlas-0.2.0}/README.md +215 -117
- python_katlas-0.2.0/katlas/__init__.py +1 -0
- python_katlas-0.2.0/katlas/_modidx.py +216 -0
- python_katlas-0.2.0/katlas/clustering.py +142 -0
- python_katlas-0.2.0/katlas/common.py +4 -0
- python_katlas-0.2.0/katlas/core.py +6 -0
- python_katlas-0.2.0/katlas/data.py +455 -0
- python_katlas-0.2.0/katlas/dnn.py +384 -0
- {python_katlas-0.1.4 → python_katlas-0.2.0}/katlas/feature.py +136 -111
- python_katlas-0.2.0/katlas/pathway.py +170 -0
- python_katlas-0.2.0/katlas/plot.py +924 -0
- python_katlas-0.2.0/katlas/pssm.py +844 -0
- python_katlas-0.2.0/katlas/score.py +322 -0
- python_katlas-0.2.0/katlas/statistics.py +102 -0
- {python_katlas-0.1.4 → python_katlas-0.2.0}/katlas/train.py +51 -77
- python_katlas-0.2.0/katlas/utils.py +189 -0
- python_katlas-0.2.0/pyproject.toml +11 -0
- {python_katlas-0.1.4 → python_katlas-0.2.0/python_katlas.egg-info}/PKG-INFO +246 -127
- {python_katlas-0.1.4 → python_katlas-0.2.0}/python_katlas.egg-info/SOURCES.txt +10 -2
- {python_katlas-0.1.4 → python_katlas-0.2.0}/python_katlas.egg-info/dependency_links.txt +0 -0
- {python_katlas-0.1.4 → python_katlas-0.2.0}/python_katlas.egg-info/entry_points.txt +0 -0
- {python_katlas-0.1.4 → python_katlas-0.2.0}/python_katlas.egg-info/not-zip-safe +0 -0
- python_katlas-0.2.0/python_katlas.egg-info/requires.txt +27 -0
- {python_katlas-0.1.4 → python_katlas-0.2.0}/python_katlas.egg-info/top_level.txt +0 -0
- python_katlas-0.2.0/settings.ini +40 -0
- {python_katlas-0.1.4 → python_katlas-0.2.0}/setup.py +0 -0
- python_katlas-0.1.4/katlas/__init__.py +0 -1
- python_katlas-0.1.4/katlas/_modidx.py +0 -109
- python_katlas-0.1.4/katlas/core.py +0 -816
- python_katlas-0.1.4/katlas/dl.py +0 -357
- python_katlas-0.1.4/katlas/imports.py +0 -7
- python_katlas-0.1.4/katlas/plot.py +0 -670
- python_katlas-0.1.4/python_katlas.egg-info/requires.txt +0 -19
- python_katlas-0.1.4/settings.ini +0 -44
- {python_katlas-0.1.4 → python_katlas-0.2.0}/setup.cfg +0 -0
|
File without changes
|
|
File without changes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: python-katlas
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: tools for predicting kinome specificities
|
|
5
5
|
Home-page: https://github.com/sky1ove/katlas
|
|
6
6
|
Author: lily
|
|
@@ -18,34 +18,52 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
18
18
|
Requires-Python: >=3.7
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
License-File: LICENSE
|
|
21
|
+
Requires-Dist: pandas
|
|
22
|
+
Requires-Dist: gdown
|
|
21
23
|
Requires-Dist: statsmodels
|
|
24
|
+
Requires-Dist: statannotations
|
|
22
25
|
Requires-Dist: fastparquet
|
|
26
|
+
Requires-Dist: pyarrow
|
|
23
27
|
Requires-Dist: tqdm
|
|
28
|
+
Requires-Dist: logomaker-kinase
|
|
29
|
+
Requires-Dist: seaborn
|
|
30
|
+
Requires-Dist: bokeh
|
|
31
|
+
Requires-Dist: reactome2py
|
|
32
|
+
Requires-Dist: adjustText
|
|
33
|
+
Requires-Dist: scikit-learn
|
|
34
|
+
Requires-Dist: umap-learn
|
|
35
|
+
Requires-Dist: ipywidgets
|
|
36
|
+
Requires-Dist: biopython
|
|
24
37
|
Provides-Extra: dev
|
|
25
38
|
Requires-Dist: nbdev; extra == "dev"
|
|
26
39
|
Requires-Dist: pyngrok; extra == "dev"
|
|
27
|
-
Requires-Dist: fastai
|
|
28
|
-
Requires-Dist: fastbook; extra == "dev"
|
|
40
|
+
Requires-Dist: fastai; extra == "dev"
|
|
29
41
|
Requires-Dist: fairscale; extra == "dev"
|
|
30
42
|
Requires-Dist: fair-esm; extra == "dev"
|
|
31
|
-
Requires-Dist: logomaker; extra == "dev"
|
|
32
|
-
Requires-Dist: seaborn; extra == "dev"
|
|
33
43
|
Requires-Dist: rdkit; extra == "dev"
|
|
34
|
-
Requires-Dist: umap-learn; extra == "dev"
|
|
35
|
-
Requires-Dist: adjustText; extra == "dev"
|
|
36
|
-
Requires-Dist: bokeh; extra == "dev"
|
|
37
|
-
Requires-Dist: scikit-learn>=1.3.0; extra == "dev"
|
|
38
44
|
Requires-Dist: openpyxl; extra == "dev"
|
|
45
|
+
Requires-Dist: transformers; extra == "dev"
|
|
46
|
+
Requires-Dist: sentencepiece; extra == "dev"
|
|
47
|
+
Dynamic: author
|
|
48
|
+
Dynamic: author-email
|
|
49
|
+
Dynamic: classifier
|
|
50
|
+
Dynamic: description
|
|
51
|
+
Dynamic: description-content-type
|
|
52
|
+
Dynamic: home-page
|
|
53
|
+
Dynamic: keywords
|
|
54
|
+
Dynamic: license
|
|
55
|
+
Dynamic: license-file
|
|
56
|
+
Dynamic: provides-extra
|
|
57
|
+
Dynamic: requires-dist
|
|
58
|
+
Dynamic: requires-python
|
|
59
|
+
Dynamic: summary
|
|
39
60
|
|
|
40
61
|
# KATLAS
|
|
41
62
|
|
|
42
63
|
|
|
43
64
|
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
44
65
|
|
|
45
|
-
<img alt="Katlas logo" width="600" caption="Katlas logo" src="https://github.com/sky1ove/katlas/raw/main/
|
|
46
|
-
|
|
47
|
-
<p><a target="_blank" href="https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/index.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
|
|
48
|
-
<a href="https://pypi.org/project/python-katlas/"><img src="https://img.shields.io/pypi/v/python-katlas?link=https%3A%2F%2Fpypi.org%2Fproject%2Fpython-katlas%2F" alt="PyPI"></a></p>
|
|
66
|
+
<img alt="Katlas logo" width="600" caption="Katlas logo" src="https://github.com/sky1ove/katlas/raw/main/logo.png" id="logo"/>
|
|
49
67
|
|
|
50
68
|
KATLAS is a repository containing python tools to predict kinases given
|
|
51
69
|
a substrate sequence. It also contains datasets of kinase substrate
|
|
@@ -81,8 +99,6 @@ helpful to your research.
|
|
|
81
99
|
Follow the instructions in katlas_raw:
|
|
82
100
|
https://github.com/sky1ove/katlas_raw
|
|
83
101
|
|
|
84
|
-
Need to install the package via: `pip install 'python-katlas[dev]' -U`
|
|
85
|
-
|
|
86
102
|
## Web applications
|
|
87
103
|
|
|
88
104
|
Users can now run the analysis directly on the web without needing to
|
|
@@ -91,26 +107,27 @@ code.
|
|
|
91
107
|
Check out our latest web platform:
|
|
92
108
|
[kinase-atlas.com](https://kinase-atlas.com/)
|
|
93
109
|
|
|
94
|
-
##
|
|
110
|
+
## Install
|
|
95
111
|
|
|
96
|
-
|
|
97
|
-
sequence](https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/tutorial_01_sinlge_input.ipynb)
|
|
98
|
-
- 2. [High throughput substrate scoring on phosphoproteomics
|
|
99
|
-
dataset](https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/tutorial_02_high_throughput.ipynb)
|
|
100
|
-
- 3. [Kinase enrichment analysis for AKT
|
|
101
|
-
inhibitor](https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/tutorial_03a_enrichment_AKTi.ipynb)
|
|
112
|
+
UV:
|
|
102
113
|
|
|
103
|
-
|
|
114
|
+
``` bash
|
|
115
|
+
uv add -U python-katlas
|
|
116
|
+
```
|
|
104
117
|
|
|
105
|
-
|
|
118
|
+
pip:
|
|
106
119
|
|
|
107
|
-
|
|
108
|
-
|
|
120
|
+
``` bash
|
|
121
|
+
pip install -U python-katlas
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
If using machine-learning related modules, need to install development
|
|
125
|
+
verison: `pip install -U "python-katlas[dev]"`
|
|
109
126
|
|
|
110
127
|
## Import
|
|
111
128
|
|
|
112
129
|
``` python
|
|
113
|
-
from katlas.
|
|
130
|
+
from katlas.common import *
|
|
114
131
|
```
|
|
115
132
|
|
|
116
133
|
# Quick start
|
|
@@ -130,93 +147,101 @@ For input sequences, we also consider it in two conditions:
|
|
|
130
147
|
- all capital
|
|
131
148
|
- contains lower cases indicating phosphorylation status
|
|
132
149
|
|
|
133
|
-
##
|
|
150
|
+
## Quick start
|
|
151
|
+
|
|
152
|
+
### Site scoring
|
|
134
153
|
|
|
135
|
-
|
|
154
|
+
CDDM, all capital
|
|
136
155
|
|
|
137
156
|
``` python
|
|
138
|
-
predict_kinase('
|
|
157
|
+
predict_kinase('AAAAAAASGAGSDN',**Params("CDDM_upper"))
|
|
139
158
|
```
|
|
140
159
|
|
|
141
|
-
considering string: ['-7A', '-6A', '-5A', '-4A', '-3A', '-2A', '-1A', '0S', '1G', '
|
|
160
|
+
considering string: ['-7A', '-6A', '-5A', '-4A', '-3A', '-2A', '-1A', '0S', '1G', '2A', '3G', '4S', '5D', '6N']
|
|
142
161
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
Length: 289, dtype: float64
|
|
162
|
+
GCN2 4.556
|
|
163
|
+
MPSK1 4.425
|
|
164
|
+
MEKK2 4.253
|
|
165
|
+
WNK3 4.213
|
|
166
|
+
WNK1 4.064
|
|
167
|
+
...
|
|
168
|
+
PDK1 -25.077
|
|
169
|
+
PDHK3 -25.346
|
|
170
|
+
CLK2 -27.251
|
|
171
|
+
ROR2 -27.582
|
|
172
|
+
DDR1 -53.581
|
|
173
|
+
Length: 328, dtype: float64
|
|
156
174
|
|
|
157
|
-
|
|
175
|
+
CDDM, with lower case indicating phosphorylation status
|
|
158
176
|
|
|
159
177
|
``` python
|
|
160
|
-
predict_kinase('AAAAAAAsGGAGsDN',**
|
|
178
|
+
predict_kinase('AAAAAAAsGGAGsDN',**Params("CDDM"))
|
|
161
179
|
```
|
|
162
180
|
|
|
163
181
|
considering string: ['-7A', '-6A', '-5A', '-4A', '-3A', '-2A', '-1A', '0s', '1G', '2G', '3A', '4G', '5s', '6D', '7N']
|
|
164
182
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
### PSPA, with lower case indicating phosphorylation status
|
|
183
|
+
ROR1 8.355
|
|
184
|
+
WNK1 4.907
|
|
185
|
+
WNK2 4.782
|
|
186
|
+
ERK5 4.466
|
|
187
|
+
RIPK2 4.045
|
|
188
|
+
...
|
|
189
|
+
DDR1 -29.393
|
|
190
|
+
TNNI3K -29.884
|
|
191
|
+
CHAK1 -31.775
|
|
192
|
+
VRK1 -45.287
|
|
193
|
+
BRAF -49.403
|
|
194
|
+
Length: 328, dtype: float64
|
|
195
|
+
|
|
196
|
+
PSPA, with lower case indicating phosphorylation status
|
|
180
197
|
|
|
181
198
|
``` python
|
|
182
|
-
predict_kinase('AEEKEyHsEGG',**
|
|
199
|
+
predict_kinase('AEEKEyHsEGG',**Params("PSPA"))
|
|
183
200
|
```
|
|
184
201
|
|
|
185
202
|
considering string: ['-5A', '-4E', '-3E', '-2K', '-1E', '0y', '1H', '2s', '3E', '4G', '5G']
|
|
186
203
|
|
|
187
204
|
kinase
|
|
188
|
-
EGFR
|
|
189
|
-
FGFR4
|
|
190
|
-
ZAP70
|
|
191
|
-
CSK
|
|
192
|
-
SYK
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
205
|
+
EGFR 4.013
|
|
206
|
+
FGFR4 3.568
|
|
207
|
+
ZAP70 3.412
|
|
208
|
+
CSK 3.241
|
|
209
|
+
SYK 3.209
|
|
210
|
+
...
|
|
211
|
+
JAK1 -3.837
|
|
212
|
+
DDR2 -4.421
|
|
213
|
+
TNK2 -4.534
|
|
214
|
+
TNNI3K_TYR -4.651
|
|
215
|
+
TNK1 -5.320
|
|
216
|
+
Length: 93, dtype: float64
|
|
217
|
+
|
|
218
|
+
To replicate the results from The Kinase Library (PSPA)
|
|
196
219
|
|
|
197
220
|
Check this link: [The Kinase
|
|
198
|
-
Library](https://kinase-library.
|
|
221
|
+
Library](https://kinase-library.mit.edu/site?s=AEEKEy*HSEGG&pp=false&scp=true),
|
|
199
222
|
and use log2(score) to rank, it shows same results with the below (with
|
|
200
223
|
slight differences due to rounding).
|
|
201
224
|
|
|
202
225
|
``` python
|
|
203
|
-
predict_kinase('AEEKEyHSEGG',**
|
|
226
|
+
out = predict_kinase('AEEKEyHSEGG',**Params("PSPA"))
|
|
227
|
+
out
|
|
204
228
|
```
|
|
205
229
|
|
|
206
230
|
considering string: ['-5A', '-4E', '-3E', '-2K', '-1E', '0y', '1H', '2S', '3E', '4G', '5G']
|
|
207
231
|
|
|
208
232
|
kinase
|
|
209
|
-
EGFR
|
|
210
|
-
FGFR4
|
|
211
|
-
CSK
|
|
212
|
-
ZAP70
|
|
213
|
-
SYK
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
233
|
+
EGFR 3.181
|
|
234
|
+
FGFR4 2.390
|
|
235
|
+
CSK 2.308
|
|
236
|
+
ZAP70 2.068
|
|
237
|
+
SYK 1.998
|
|
238
|
+
...
|
|
239
|
+
EPHA1 -3.501
|
|
240
|
+
FES -3.699
|
|
241
|
+
TNK1 -4.269
|
|
242
|
+
TNK2 -4.577
|
|
243
|
+
DDR2 -4.920
|
|
244
|
+
Length: 93, dtype: float64
|
|
220
245
|
|
|
221
246
|
- So far [The kinase Library](https://kinase-library.phosphosite.org)
|
|
222
247
|
considers all ***tyr sequences*** in capital regardless of whether or
|
|
@@ -232,13 +257,26 @@ sheet.
|
|
|
232
257
|
``` python
|
|
233
258
|
# Percentile reference sheet
|
|
234
259
|
y_pct = Data.get_pspa_tyr_pct()
|
|
260
|
+
```
|
|
235
261
|
|
|
236
|
-
|
|
262
|
+
``` python
|
|
263
|
+
get_pct('AEEKEyHSEGG',pct_ref = y_pct,**Params("PSPA_y"))
|
|
237
264
|
```
|
|
238
265
|
|
|
239
266
|
considering string: ['-5A', '-4E', '-3E', '-2K', '-1E', '0Y', '1H', '2S', '3E', '4G', '5G']
|
|
240
267
|
|
|
241
|
-
|
|
268
|
+
<div>
|
|
269
|
+
<style scoped>
|
|
270
|
+
.dataframe tbody tr th:only-of-type {
|
|
271
|
+
vertical-align: middle;
|
|
272
|
+
}
|
|
273
|
+
.dataframe tbody tr th {
|
|
274
|
+
vertical-align: top;
|
|
275
|
+
}
|
|
276
|
+
.dataframe thead th {
|
|
277
|
+
text-align: right;
|
|
278
|
+
}
|
|
279
|
+
</style>
|
|
242
280
|
|
|
243
281
|
| | log2(score) | percentile |
|
|
244
282
|
|-------|-------------|------------|
|
|
@@ -255,17 +293,17 @@ get_pct('AEEKEyHSEGG',**param_PSPA_y, pct_ref = y_pct)
|
|
|
255
293
|
| DDR2 | -4.920 | 10.403281 |
|
|
256
294
|
|
|
257
295
|
<p>93 rows × 2 columns</p>
|
|
296
|
+
</div>
|
|
258
297
|
|
|
298
|
+
### Site scoring in a df
|
|
259
299
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
### Load your csv
|
|
300
|
+
Load your csv:
|
|
263
301
|
|
|
264
302
|
``` python
|
|
265
303
|
# df = pd.read_csv('your_file.csv')
|
|
266
304
|
```
|
|
267
305
|
|
|
268
|
-
|
|
306
|
+
Or load a demo df
|
|
269
307
|
|
|
270
308
|
``` python
|
|
271
309
|
# Load a demo df with phosphorylation sites
|
|
@@ -273,7 +311,18 @@ df = Data.get_ochoa_site().head()
|
|
|
273
311
|
df.iloc[:,-2:]
|
|
274
312
|
```
|
|
275
313
|
|
|
276
|
-
|
|
314
|
+
<div>
|
|
315
|
+
<style scoped>
|
|
316
|
+
.dataframe tbody tr th:only-of-type {
|
|
317
|
+
vertical-align: middle;
|
|
318
|
+
}
|
|
319
|
+
.dataframe tbody tr th {
|
|
320
|
+
vertical-align: top;
|
|
321
|
+
}
|
|
322
|
+
.dataframe thead th {
|
|
323
|
+
text-align: right;
|
|
324
|
+
}
|
|
325
|
+
</style>
|
|
277
326
|
|
|
278
327
|
| | site_seq | gene_site |
|
|
279
328
|
|-----|-----------------|----------------|
|
|
@@ -283,39 +332,66 @@ df.iloc[:,-2:]
|
|
|
283
332
|
| 3 | KSRFTEYSMTSSVMR | A0A075B6Q4_S68 |
|
|
284
333
|
| 4 | FTEYSMTSSVMRRNE | A0A075B6Q4_S71 |
|
|
285
334
|
|
|
335
|
+
</div>
|
|
286
336
|
|
|
287
|
-
|
|
288
|
-
### Set the column name and param to calculate
|
|
337
|
+
Set the column name and param to calculate
|
|
289
338
|
|
|
290
339
|
Here we choose param_CDDM_upper, as the sequences in the demo df are all
|
|
291
340
|
in capital. You can also choose other params.
|
|
292
341
|
|
|
293
342
|
``` python
|
|
294
|
-
results = predict_kinase_df(df,'site_seq',**
|
|
343
|
+
results = predict_kinase_df(df,'site_seq',**Params("CDDM_upper"))
|
|
295
344
|
results
|
|
296
345
|
```
|
|
297
346
|
|
|
298
347
|
input dataframe has a length 5
|
|
299
348
|
Preprocessing
|
|
300
349
|
Finish preprocessing
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
350
|
+
Merging reference
|
|
351
|
+
Finish merging
|
|
352
|
+
|
|
353
|
+
<div>
|
|
354
|
+
<style scoped>
|
|
355
|
+
.dataframe tbody tr th:only-of-type {
|
|
356
|
+
vertical-align: middle;
|
|
357
|
+
}
|
|
358
|
+
.dataframe tbody tr th {
|
|
359
|
+
vertical-align: top;
|
|
360
|
+
}
|
|
361
|
+
.dataframe thead th {
|
|
362
|
+
text-align: right;
|
|
363
|
+
}
|
|
364
|
+
</style>
|
|
365
|
+
|
|
366
|
+
| | SRC | EPHA3 | FES | NTRK3 | ALK | ABL1 | FLT3 | EPHA8 | EPHB2 | EPHB1 | ... | VRK1 | PKMYT1 | GRK3 | CAMK1B | CDC7 | SMMLCK | ROR1 | GAK | MAST2 | BRAF |
|
|
308
367
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
309
|
-
| 0 |
|
|
310
|
-
| 1 |
|
|
311
|
-
| 2 |
|
|
312
|
-
| 3 |
|
|
313
|
-
| 4 |
|
|
368
|
+
| 0 | -2.440640 | -0.818753 | -1.663990 | -0.738991 | -2.047628 | -3.602344 | -3.200998 | -0.935176 | -1.388444 | -1.859450 | ... | -17.103237 | -113.698143 | -16.848783 | -41.520172 | -41.646187 | 1.284159 | -26.566362 | -69.165062 | -17.706400 | -87.763214 |
|
|
369
|
+
| 1 | -3.838486 | -2.735969 | -2.533986 | -2.150399 | -3.792498 | -4.725527 | -5.711791 | -4.534240 | -3.148449 | -2.511518 | ... | -67.889053 | -68.652641 | -45.833855 | -64.171600 | -39.465572 | -65.061722 | -109.561707 | -85.911224 | -60.105064 | -63.889122 |
|
|
370
|
+
| 2 | -2.610423 | -2.370090 | -3.235637 | -1.508413 | -2.571347 | -3.740941 | -3.025596 | -3.373504 | -2.776297 | -3.060740 | ... | -15.798462 | -45.905319 | -61.440742 | -67.695694 | -55.047962 | -42.135216 | -38.501572 | -62.624382 | -56.119389 | -107.060989 |
|
|
371
|
+
| 3 | -5.180541 | -4.201880 | -5.766463 | -3.038421 | -3.836897 | -4.249900 | -5.029885 | -5.411311 | -4.713308 | -4.827825 | ... | -96.978317 | -83.419777 | -22.559393 | -110.611588 | -63.283070 | -37.240440 | -24.497492 | -112.878151 | -43.538158 | -60.348518 |
|
|
372
|
+
| 4 | -2.844254 | -3.322700 | -3.681745 | -1.766435 | -2.666579 | -3.748774 | -4.083619 | -3.912834 | -3.724181 | -3.948160 | ... | -35.824612 | -87.983566 | -83.312317 | -107.162407 | -61.478374 | -85.793571 | -43.738819 | -47.004211 | -42.281624 | -59.518513 |
|
|
314
373
|
|
|
315
|
-
<p>5 rows ×
|
|
374
|
+
<p>5 rows × 328 columns</p>
|
|
375
|
+
</div>
|
|
316
376
|
|
|
377
|
+
``` python
|
|
378
|
+
results.iloc[0].sort_values(ascending=False)
|
|
379
|
+
```
|
|
317
380
|
|
|
318
|
-
|
|
381
|
+
TLK2 8.264621
|
|
382
|
+
GCN2 8.101542
|
|
383
|
+
TLK1 7.693897
|
|
384
|
+
HRI 6.691402
|
|
385
|
+
PLK3 6.579368
|
|
386
|
+
...
|
|
387
|
+
NIK -64.605148
|
|
388
|
+
SRPK2 -67.300667
|
|
389
|
+
GAK -69.165062
|
|
390
|
+
BRAF -87.763214
|
|
391
|
+
PKMYT1 -113.698143
|
|
392
|
+
Name: 0, Length: 328, dtype: float32
|
|
393
|
+
|
|
394
|
+
## Dataset
|
|
319
395
|
|
|
320
396
|
Besides calculating sequence scores, we also provides multiple datasets
|
|
321
397
|
of phosphorylation sites.
|
|
@@ -327,7 +403,18 @@ df = Data.get_cptac_ensembl_site()
|
|
|
327
403
|
df.head(3)
|
|
328
404
|
```
|
|
329
405
|
|
|
330
|
-
|
|
406
|
+
<div>
|
|
407
|
+
<style scoped>
|
|
408
|
+
.dataframe tbody tr th:only-of-type {
|
|
409
|
+
vertical-align: middle;
|
|
410
|
+
}
|
|
411
|
+
.dataframe tbody tr th {
|
|
412
|
+
vertical-align: top;
|
|
413
|
+
}
|
|
414
|
+
.dataframe thead th {
|
|
415
|
+
text-align: right;
|
|
416
|
+
}
|
|
417
|
+
</style>
|
|
331
418
|
|
|
332
419
|
| | gene | site | site_seq | protein | gene_name | gene_site | protein_site |
|
|
333
420
|
|----|----|----|----|----|----|----|----|
|
|
@@ -335,7 +422,7 @@ df.head(3)
|
|
|
335
422
|
| 1 | ENSG00000003056.8 | S267 | DDQLGEESEERDDHL | ENSP00000440488.2 | M6PR | M6PR_S267 | ENSP00000440488_S267 |
|
|
336
423
|
| 2 | ENSG00000048028.11 | S1053 | PPTIRPNSPYDLCSR | ENSP00000003302.4 | USP28 | USP28_S1053 | ENSP00000003302_S1053 |
|
|
337
424
|
|
|
338
|
-
|
|
425
|
+
</div>
|
|
339
426
|
|
|
340
427
|
### [Ochoa et al. human phosphoproteome](https://www.nature.com/articles/s41587-019-0344-3)
|
|
341
428
|
|
|
@@ -344,15 +431,26 @@ df = Data.get_ochoa_site()
|
|
|
344
431
|
df.head(3)
|
|
345
432
|
```
|
|
346
433
|
|
|
347
|
-
|
|
434
|
+
<div>
|
|
435
|
+
<style scoped>
|
|
436
|
+
.dataframe tbody tr th:only-of-type {
|
|
437
|
+
vertical-align: middle;
|
|
438
|
+
}
|
|
439
|
+
.dataframe tbody tr th {
|
|
440
|
+
vertical-align: top;
|
|
441
|
+
}
|
|
442
|
+
.dataframe thead th {
|
|
443
|
+
text-align: right;
|
|
444
|
+
}
|
|
445
|
+
</style>
|
|
348
446
|
|
|
349
447
|
| | uniprot | position | residue | is_disopred | disopred_score | log10_hotspot_pval_min | isHotspot | uniprot_position | functional_score | current_uniprot | name | gene | Sequence | is_valid | site_seq | gene_site |
|
|
350
448
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
351
|
-
| 0 | A0A075B6Q4 | 24 | S |
|
|
352
|
-
| 1 | A0A075B6Q4 | 35 | S |
|
|
353
|
-
| 2 | A0A075B6Q4 | 57 | S |
|
|
354
|
-
|
|
449
|
+
| 0 | A0A075B6Q4 | 24 | S | 1.0 | 0.91 | 6.839384 | 1.0 | A0A075B6Q4_24 | 0.149257 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | VDDEKGDSNDDYDSA | A0A075B6Q4_S24 |
|
|
450
|
+
| 1 | A0A075B6Q4 | 35 | S | 1.0 | 0.87 | 9.192622 | 0.0 | A0A075B6Q4_35 | 0.136966 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | YDSAGLLSDEDCMSV | A0A075B6Q4_S35 |
|
|
451
|
+
| 2 | A0A075B6Q4 | 57 | S | 0.0 | 0.28 | 0.818834 | 0.0 | A0A075B6Q4_57 | 0.125364 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | IADHLFWSEETKSRF | A0A075B6Q4_S57 |
|
|
355
452
|
|
|
453
|
+
</div>
|
|
356
454
|
|
|
357
455
|
### PhosphoSitePlus human phosphorylation site
|
|
358
456
|
|
|
@@ -361,7 +459,18 @@ df = Data.get_psp_human_site()
|
|
|
361
459
|
df.head(3)
|
|
362
460
|
```
|
|
363
461
|
|
|
364
|
-
|
|
462
|
+
<div>
|
|
463
|
+
<style scoped>
|
|
464
|
+
.dataframe tbody tr th:only-of-type {
|
|
465
|
+
vertical-align: middle;
|
|
466
|
+
}
|
|
467
|
+
.dataframe tbody tr th {
|
|
468
|
+
vertical-align: top;
|
|
469
|
+
}
|
|
470
|
+
.dataframe thead th {
|
|
471
|
+
text-align: right;
|
|
472
|
+
}
|
|
473
|
+
</style>
|
|
365
474
|
|
|
366
475
|
| | gene | protein | uniprot | site | gene_site | SITE_GRP_ID | species | site_seq | LT_LIT | MS_LIT | MS_CST | CST_CAT# | Ambiguous_Site |
|
|
367
476
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -369,7 +478,7 @@ df.head(3)
|
|
|
369
478
|
| 1 | YWHAB | 14-3-3 beta | P31946 | S6 | YWHAB_S6 | 15718709 | human | \_\_MtMDksELVQkAk | NaN | 8.0 | NaN | None | 0 |
|
|
370
479
|
| 2 | YWHAB | 14-3-3 beta | P31946 | Y21 | YWHAB_Y21 | 3426383 | human | LAEQAERyDDMAAAM | NaN | NaN | 4.0 | None | 0 |
|
|
371
480
|
|
|
372
|
-
|
|
481
|
+
</div>
|
|
373
482
|
|
|
374
483
|
### Unique sites of combined Ochoa & PhosphoSitePlus
|
|
375
484
|
|
|
@@ -378,16 +487,26 @@ df = Data.get_combine_site_psp_ochoa()
|
|
|
378
487
|
df.head(3)
|
|
379
488
|
```
|
|
380
489
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
490
|
+
<div>
|
|
491
|
+
<style scoped>
|
|
492
|
+
.dataframe tbody tr th:only-of-type {
|
|
493
|
+
vertical-align: middle;
|
|
494
|
+
}
|
|
495
|
+
.dataframe tbody tr th {
|
|
496
|
+
vertical-align: top;
|
|
497
|
+
}
|
|
498
|
+
.dataframe thead th {
|
|
499
|
+
text-align: right;
|
|
500
|
+
}
|
|
501
|
+
</style>
|
|
502
|
+
|
|
503
|
+
| | uniprot | gene | site | site_seq | source | AM_pathogenicity | CDDM_upper | CDDM_max_score |
|
|
504
|
+
|----|----|----|----|----|----|----|----|----|
|
|
505
|
+
| 0 | A0A024R4G9 | C19orf48 | S20 | ITGSRLLSMVPGPAR | psp | NaN | PRKX,AKT1,PKG1,P90RSK,HIPK4,AKT3,HIPK1,PKACB,H... | 2.407041 |
|
|
506
|
+
| 1 | A0A075B6Q4 | None | S24 | VDDEKGDSNDDYDSA | ochoa | NaN | CK2A2,CK2A1,GRK7,GRK5,CK1G1,CK1A,IKKA,CK1G2,CA... | 2.295654 |
|
|
507
|
+
| 2 | A0A075B6Q4 | None | S35 | YDSAGLLSDEDCMSV | ochoa | NaN | CK2A2,CK2A1,IKKA,ATM,IKKB,CAMK1D,MARK2,GRK7,IK... | 2.488683 |
|
|
508
|
+
|
|
509
|
+
</div>
|
|
391
510
|
|
|
392
511
|
## Phosphorylation site sequence example
|
|
393
512
|
|