python-katlas 0.1.4__tar.gz → 2025.10.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/LICENSE +0 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/MANIFEST.in +0 -0
- {python_katlas-0.1.4/python_katlas.egg-info → python_katlas-2025.10.20}/PKG-INFO +242 -125
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/README.md +212 -115
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/katlas/__init__.py +0 -0
- python_katlas-2025.10.20/katlas/_modidx.py +213 -0
- python_katlas-2025.10.20/katlas/clustering.py +142 -0
- python_katlas-2025.10.20/katlas/common.py +3 -0
- python_katlas-2025.10.20/katlas/core.py +6 -0
- python_katlas-2025.10.20/katlas/data.py +446 -0
- python_katlas-2025.10.20/katlas/dnn.py +384 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/katlas/feature.py +136 -111
- python_katlas-2025.10.20/katlas/pathway.py +156 -0
- python_katlas-2025.10.20/katlas/plot.py +879 -0
- python_katlas-2025.10.20/katlas/pssm.py +784 -0
- python_katlas-2025.10.20/katlas/score.py +364 -0
- python_katlas-2025.10.20/katlas/statistics.py +102 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/katlas/train.py +51 -77
- python_katlas-2025.10.20/katlas/utils.py +176 -0
- python_katlas-2025.10.20/pyproject.toml +11 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20/python_katlas.egg-info}/PKG-INFO +242 -125
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/python_katlas.egg-info/SOURCES.txt +10 -2
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/python_katlas.egg-info/dependency_links.txt +0 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/python_katlas.egg-info/entry_points.txt +0 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/python_katlas.egg-info/not-zip-safe +0 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/python_katlas.egg-info/requires.txt +15 -8
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/python_katlas.egg-info/top_level.txt +0 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/settings.ini +4 -4
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/setup.py +0 -0
- python_katlas-0.1.4/katlas/_modidx.py +0 -109
- python_katlas-0.1.4/katlas/core.py +0 -816
- python_katlas-0.1.4/katlas/dl.py +0 -357
- python_katlas-0.1.4/katlas/imports.py +0 -7
- python_katlas-0.1.4/katlas/plot.py +0 -670
- {python_katlas-0.1.4 → python_katlas-2025.10.20}/setup.cfg +0 -0
|
File without changes
|
|
File without changes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: python-katlas
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2025.10.20
|
|
4
4
|
Summary: tools for predicting kinome specificities
|
|
5
5
|
Home-page: https://github.com/sky1ove/katlas
|
|
6
6
|
Author: lily
|
|
@@ -18,34 +18,51 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
18
18
|
Requires-Python: >=3.7
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
License-File: LICENSE
|
|
21
|
+
Requires-Dist: pandas
|
|
22
|
+
Requires-Dist: gdown
|
|
21
23
|
Requires-Dist: statsmodels
|
|
22
24
|
Requires-Dist: fastparquet
|
|
25
|
+
Requires-Dist: pyarrow
|
|
23
26
|
Requires-Dist: tqdm
|
|
27
|
+
Requires-Dist: logomaker-kinase
|
|
28
|
+
Requires-Dist: seaborn
|
|
29
|
+
Requires-Dist: bokeh
|
|
30
|
+
Requires-Dist: reactome2py
|
|
31
|
+
Requires-Dist: adjustText
|
|
32
|
+
Requires-Dist: scikit-learn
|
|
33
|
+
Requires-Dist: umap-learn
|
|
34
|
+
Requires-Dist: ipywidgets
|
|
35
|
+
Requires-Dist: biopython
|
|
24
36
|
Provides-Extra: dev
|
|
25
37
|
Requires-Dist: nbdev; extra == "dev"
|
|
26
38
|
Requires-Dist: pyngrok; extra == "dev"
|
|
27
|
-
Requires-Dist: fastai
|
|
28
|
-
Requires-Dist: fastbook; extra == "dev"
|
|
39
|
+
Requires-Dist: fastai; extra == "dev"
|
|
29
40
|
Requires-Dist: fairscale; extra == "dev"
|
|
30
41
|
Requires-Dist: fair-esm; extra == "dev"
|
|
31
|
-
Requires-Dist: logomaker; extra == "dev"
|
|
32
|
-
Requires-Dist: seaborn; extra == "dev"
|
|
33
42
|
Requires-Dist: rdkit; extra == "dev"
|
|
34
|
-
Requires-Dist: umap-learn; extra == "dev"
|
|
35
|
-
Requires-Dist: adjustText; extra == "dev"
|
|
36
|
-
Requires-Dist: bokeh; extra == "dev"
|
|
37
|
-
Requires-Dist: scikit-learn>=1.3.0; extra == "dev"
|
|
38
43
|
Requires-Dist: openpyxl; extra == "dev"
|
|
44
|
+
Requires-Dist: transformers; extra == "dev"
|
|
45
|
+
Requires-Dist: sentencepiece; extra == "dev"
|
|
46
|
+
Dynamic: author
|
|
47
|
+
Dynamic: author-email
|
|
48
|
+
Dynamic: classifier
|
|
49
|
+
Dynamic: description
|
|
50
|
+
Dynamic: description-content-type
|
|
51
|
+
Dynamic: home-page
|
|
52
|
+
Dynamic: keywords
|
|
53
|
+
Dynamic: license
|
|
54
|
+
Dynamic: license-file
|
|
55
|
+
Dynamic: provides-extra
|
|
56
|
+
Dynamic: requires-dist
|
|
57
|
+
Dynamic: requires-python
|
|
58
|
+
Dynamic: summary
|
|
39
59
|
|
|
40
60
|
# KATLAS
|
|
41
61
|
|
|
42
62
|
|
|
43
63
|
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
44
64
|
|
|
45
|
-
<img alt="Katlas logo" width="600" caption="Katlas logo" src="https://github.com/sky1ove/katlas/raw/main/
|
|
46
|
-
|
|
47
|
-
<p><a target="_blank" href="https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/index.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
|
|
48
|
-
<a href="https://pypi.org/project/python-katlas/"><img src="https://img.shields.io/pypi/v/python-katlas?link=https%3A%2F%2Fpypi.org%2Fproject%2Fpython-katlas%2F" alt="PyPI"></a></p>
|
|
65
|
+
<img alt="Katlas logo" width="600" caption="Katlas logo" src="https://github.com/sky1ove/katlas/raw/main/logo.png" id="logo"/>
|
|
49
66
|
|
|
50
67
|
KATLAS is a repository containing python tools to predict kinases given
|
|
51
68
|
a substrate sequence. It also contains datasets of kinase substrate
|
|
@@ -91,26 +108,24 @@ code.
|
|
|
91
108
|
Check out our latest web platform:
|
|
92
109
|
[kinase-atlas.com](https://kinase-atlas.com/)
|
|
93
110
|
|
|
94
|
-
##
|
|
111
|
+
## Install
|
|
95
112
|
|
|
96
|
-
|
|
97
|
-
sequence](https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/tutorial_01_sinlge_input.ipynb)
|
|
98
|
-
- 2. [High throughput substrate scoring on phosphoproteomics
|
|
99
|
-
dataset](https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/tutorial_02_high_throughput.ipynb)
|
|
100
|
-
- 3. [Kinase enrichment analysis for AKT
|
|
101
|
-
inhibitor](https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/tutorial_03a_enrichment_AKTi.ipynb)
|
|
113
|
+
UV:
|
|
102
114
|
|
|
103
|
-
|
|
115
|
+
``` bash
|
|
116
|
+
uv add -U git+https://github.com/sky1ove/katlas.git
|
|
117
|
+
```
|
|
104
118
|
|
|
105
|
-
|
|
119
|
+
pip:
|
|
106
120
|
|
|
107
|
-
|
|
108
|
-
|
|
121
|
+
``` bash
|
|
122
|
+
pip install -U git+https://github.com/sky1ove/katlas.git
|
|
123
|
+
```
|
|
109
124
|
|
|
110
125
|
## Import
|
|
111
126
|
|
|
112
127
|
``` python
|
|
113
|
-
from katlas.
|
|
128
|
+
from katlas.common import *
|
|
114
129
|
```
|
|
115
130
|
|
|
116
131
|
# Quick start
|
|
@@ -130,93 +145,101 @@ For input sequences, we also consider it in two conditions:
|
|
|
130
145
|
- all capital
|
|
131
146
|
- contains lower cases indicating phosphorylation status
|
|
132
147
|
|
|
133
|
-
##
|
|
148
|
+
## Quick start
|
|
134
149
|
|
|
135
|
-
###
|
|
150
|
+
### Site scoring
|
|
151
|
+
|
|
152
|
+
CDDM, all capital
|
|
136
153
|
|
|
137
154
|
``` python
|
|
138
|
-
predict_kinase('
|
|
155
|
+
predict_kinase('AAAAAAASGAGSDN',**Params("CDDM_upper"))
|
|
139
156
|
```
|
|
140
157
|
|
|
141
|
-
considering string: ['-7A', '-6A', '-5A', '-4A', '-3A', '-2A', '-1A', '0S', '1G', '
|
|
158
|
+
considering string: ['-7A', '-6A', '-5A', '-4A', '-3A', '-2A', '-1A', '0S', '1G', '2A', '3G', '4S', '5D', '6N']
|
|
142
159
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
Length: 289, dtype: float64
|
|
160
|
+
GCN2 4.556
|
|
161
|
+
MPSK1 4.425
|
|
162
|
+
MEKK2 4.253
|
|
163
|
+
WNK3 4.213
|
|
164
|
+
WNK1 4.064
|
|
165
|
+
...
|
|
166
|
+
PDK1 -25.077
|
|
167
|
+
PDHK3 -25.346
|
|
168
|
+
CLK2 -27.251
|
|
169
|
+
ROR2 -27.582
|
|
170
|
+
DDR1 -53.581
|
|
171
|
+
Length: 328, dtype: float64
|
|
156
172
|
|
|
157
|
-
|
|
173
|
+
CDDM, with lower case indicating phosphorylation status
|
|
158
174
|
|
|
159
175
|
``` python
|
|
160
|
-
predict_kinase('AAAAAAAsGGAGsDN',**
|
|
176
|
+
predict_kinase('AAAAAAAsGGAGsDN',**Params("CDDM"))
|
|
161
177
|
```
|
|
162
178
|
|
|
163
179
|
considering string: ['-7A', '-6A', '-5A', '-4A', '-3A', '-2A', '-1A', '0s', '1G', '2G', '3A', '4G', '5s', '6D', '7N']
|
|
164
180
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
### PSPA, with lower case indicating phosphorylation status
|
|
181
|
+
ROR1 8.355
|
|
182
|
+
WNK1 4.907
|
|
183
|
+
WNK2 4.782
|
|
184
|
+
ERK5 4.466
|
|
185
|
+
RIPK2 4.045
|
|
186
|
+
...
|
|
187
|
+
DDR1 -29.393
|
|
188
|
+
TNNI3K -29.884
|
|
189
|
+
CHAK1 -31.775
|
|
190
|
+
VRK1 -45.287
|
|
191
|
+
BRAF -49.403
|
|
192
|
+
Length: 328, dtype: float64
|
|
193
|
+
|
|
194
|
+
PSPA, with lower case indicating phosphorylation status
|
|
180
195
|
|
|
181
196
|
``` python
|
|
182
|
-
predict_kinase('AEEKEyHsEGG',**
|
|
197
|
+
predict_kinase('AEEKEyHsEGG',**Params("PSPA"))
|
|
183
198
|
```
|
|
184
199
|
|
|
185
200
|
considering string: ['-5A', '-4E', '-3E', '-2K', '-1E', '0y', '1H', '2s', '3E', '4G', '5G']
|
|
186
201
|
|
|
187
202
|
kinase
|
|
188
|
-
EGFR
|
|
189
|
-
FGFR4
|
|
190
|
-
ZAP70
|
|
191
|
-
CSK
|
|
192
|
-
SYK
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
203
|
+
EGFR 4.013
|
|
204
|
+
FGFR4 3.568
|
|
205
|
+
ZAP70 3.412
|
|
206
|
+
CSK 3.241
|
|
207
|
+
SYK 3.209
|
|
208
|
+
...
|
|
209
|
+
JAK1 -3.837
|
|
210
|
+
DDR2 -4.421
|
|
211
|
+
TNK2 -4.534
|
|
212
|
+
TNNI3K_TYR -4.651
|
|
213
|
+
TNK1 -5.320
|
|
214
|
+
Length: 93, dtype: float64
|
|
215
|
+
|
|
216
|
+
To replicate the results from The Kinase Library (PSPA)
|
|
196
217
|
|
|
197
218
|
Check this link: [The Kinase
|
|
198
|
-
Library](https://kinase-library.
|
|
219
|
+
Library](https://kinase-library.mit.edu/site?s=AEEKEy*HSEGG&pp=false&scp=true),
|
|
199
220
|
and use log2(score) to rank, it shows same results with the below (with
|
|
200
221
|
slight differences due to rounding).
|
|
201
222
|
|
|
202
223
|
``` python
|
|
203
|
-
predict_kinase('AEEKEyHSEGG',**
|
|
224
|
+
out = predict_kinase('AEEKEyHSEGG',**Params("PSPA"))
|
|
225
|
+
out
|
|
204
226
|
```
|
|
205
227
|
|
|
206
228
|
considering string: ['-5A', '-4E', '-3E', '-2K', '-1E', '0y', '1H', '2S', '3E', '4G', '5G']
|
|
207
229
|
|
|
208
230
|
kinase
|
|
209
|
-
EGFR
|
|
210
|
-
FGFR4
|
|
211
|
-
CSK
|
|
212
|
-
ZAP70
|
|
213
|
-
SYK
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
231
|
+
EGFR 3.181
|
|
232
|
+
FGFR4 2.390
|
|
233
|
+
CSK 2.308
|
|
234
|
+
ZAP70 2.068
|
|
235
|
+
SYK 1.998
|
|
236
|
+
...
|
|
237
|
+
EPHA1 -3.501
|
|
238
|
+
FES -3.699
|
|
239
|
+
TNK1 -4.269
|
|
240
|
+
TNK2 -4.577
|
|
241
|
+
DDR2 -4.920
|
|
242
|
+
Length: 93, dtype: float64
|
|
220
243
|
|
|
221
244
|
- So far [The kinase Library](https://kinase-library.phosphosite.org)
|
|
222
245
|
considers all ***tyr sequences*** in capital regardless of whether or
|
|
@@ -232,13 +255,26 @@ sheet.
|
|
|
232
255
|
``` python
|
|
233
256
|
# Percentile reference sheet
|
|
234
257
|
y_pct = Data.get_pspa_tyr_pct()
|
|
258
|
+
```
|
|
235
259
|
|
|
236
|
-
|
|
260
|
+
``` python
|
|
261
|
+
get_pct('AEEKEyHSEGG',pct_ref = y_pct,**Params("PSPA_y"))
|
|
237
262
|
```
|
|
238
263
|
|
|
239
264
|
considering string: ['-5A', '-4E', '-3E', '-2K', '-1E', '0Y', '1H', '2S', '3E', '4G', '5G']
|
|
240
265
|
|
|
241
|
-
|
|
266
|
+
<div>
|
|
267
|
+
<style scoped>
|
|
268
|
+
.dataframe tbody tr th:only-of-type {
|
|
269
|
+
vertical-align: middle;
|
|
270
|
+
}
|
|
271
|
+
.dataframe tbody tr th {
|
|
272
|
+
vertical-align: top;
|
|
273
|
+
}
|
|
274
|
+
.dataframe thead th {
|
|
275
|
+
text-align: right;
|
|
276
|
+
}
|
|
277
|
+
</style>
|
|
242
278
|
|
|
243
279
|
| | log2(score) | percentile |
|
|
244
280
|
|-------|-------------|------------|
|
|
@@ -255,17 +291,17 @@ get_pct('AEEKEyHSEGG',**param_PSPA_y, pct_ref = y_pct)
|
|
|
255
291
|
| DDR2 | -4.920 | 10.403281 |
|
|
256
292
|
|
|
257
293
|
<p>93 rows × 2 columns</p>
|
|
294
|
+
</div>
|
|
258
295
|
|
|
296
|
+
### Site scoring in a df
|
|
259
297
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
### Load your csv
|
|
298
|
+
Load your csv:
|
|
263
299
|
|
|
264
300
|
``` python
|
|
265
301
|
# df = pd.read_csv('your_file.csv')
|
|
266
302
|
```
|
|
267
303
|
|
|
268
|
-
|
|
304
|
+
Or load a demo df
|
|
269
305
|
|
|
270
306
|
``` python
|
|
271
307
|
# Load a demo df with phosphorylation sites
|
|
@@ -273,7 +309,18 @@ df = Data.get_ochoa_site().head()
|
|
|
273
309
|
df.iloc[:,-2:]
|
|
274
310
|
```
|
|
275
311
|
|
|
276
|
-
|
|
312
|
+
<div>
|
|
313
|
+
<style scoped>
|
|
314
|
+
.dataframe tbody tr th:only-of-type {
|
|
315
|
+
vertical-align: middle;
|
|
316
|
+
}
|
|
317
|
+
.dataframe tbody tr th {
|
|
318
|
+
vertical-align: top;
|
|
319
|
+
}
|
|
320
|
+
.dataframe thead th {
|
|
321
|
+
text-align: right;
|
|
322
|
+
}
|
|
323
|
+
</style>
|
|
277
324
|
|
|
278
325
|
| | site_seq | gene_site |
|
|
279
326
|
|-----|-----------------|----------------|
|
|
@@ -283,39 +330,66 @@ df.iloc[:,-2:]
|
|
|
283
330
|
| 3 | KSRFTEYSMTSSVMR | A0A075B6Q4_S68 |
|
|
284
331
|
| 4 | FTEYSMTSSVMRRNE | A0A075B6Q4_S71 |
|
|
285
332
|
|
|
333
|
+
</div>
|
|
286
334
|
|
|
287
|
-
|
|
288
|
-
### Set the column name and param to calculate
|
|
335
|
+
Set the column name and param to calculate
|
|
289
336
|
|
|
290
337
|
Here we choose param_CDDM_upper, as the sequences in the demo df are all
|
|
291
338
|
in capital. You can also choose other params.
|
|
292
339
|
|
|
293
340
|
``` python
|
|
294
|
-
results = predict_kinase_df(df,'site_seq',**
|
|
341
|
+
results = predict_kinase_df(df,'site_seq',**Params("CDDM_upper"))
|
|
295
342
|
results
|
|
296
343
|
```
|
|
297
344
|
|
|
298
345
|
input dataframe has a length 5
|
|
299
346
|
Preprocessing
|
|
300
347
|
Finish preprocessing
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
348
|
+
Merging reference
|
|
349
|
+
Finish merging
|
|
350
|
+
|
|
351
|
+
<div>
|
|
352
|
+
<style scoped>
|
|
353
|
+
.dataframe tbody tr th:only-of-type {
|
|
354
|
+
vertical-align: middle;
|
|
355
|
+
}
|
|
356
|
+
.dataframe tbody tr th {
|
|
357
|
+
vertical-align: top;
|
|
358
|
+
}
|
|
359
|
+
.dataframe thead th {
|
|
360
|
+
text-align: right;
|
|
361
|
+
}
|
|
362
|
+
</style>
|
|
363
|
+
|
|
364
|
+
| | SRC | EPHA3 | FES | NTRK3 | ALK | ABL1 | FLT3 | EPHA8 | EPHB2 | EPHB1 | ... | VRK1 | PKMYT1 | GRK3 | CAMK1B | CDC7 | SMMLCK | ROR1 | GAK | MAST2 | BRAF |
|
|
308
365
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
309
|
-
| 0 |
|
|
310
|
-
| 1 |
|
|
311
|
-
| 2 |
|
|
312
|
-
| 3 |
|
|
313
|
-
| 4 |
|
|
366
|
+
| 0 | -2.440640 | -0.818753 | -1.663990 | -0.738991 | -2.047628 | -3.602344 | -3.200998 | -0.935176 | -1.388444 | -1.859450 | ... | -17.103237 | -113.698143 | -16.848783 | -41.520172 | -41.646187 | 1.284159 | -26.566362 | -69.165062 | -17.706400 | -87.763214 |
|
|
367
|
+
| 1 | -3.838486 | -2.735969 | -2.533986 | -2.150399 | -3.792498 | -4.725527 | -5.711791 | -4.534240 | -3.148449 | -2.511518 | ... | -67.889053 | -68.652641 | -45.833855 | -64.171600 | -39.465572 | -65.061722 | -109.561707 | -85.911224 | -60.105064 | -63.889122 |
|
|
368
|
+
| 2 | -2.610423 | -2.370090 | -3.235637 | -1.508413 | -2.571347 | -3.740941 | -3.025596 | -3.373504 | -2.776297 | -3.060740 | ... | -15.798462 | -45.905319 | -61.440742 | -67.695694 | -55.047962 | -42.135216 | -38.501572 | -62.624382 | -56.119389 | -107.060989 |
|
|
369
|
+
| 3 | -5.180541 | -4.201880 | -5.766463 | -3.038421 | -3.836897 | -4.249900 | -5.029885 | -5.411311 | -4.713308 | -4.827825 | ... | -96.978317 | -83.419777 | -22.559393 | -110.611588 | -63.283070 | -37.240440 | -24.497492 | -112.878151 | -43.538158 | -60.348518 |
|
|
370
|
+
| 4 | -2.844254 | -3.322700 | -3.681745 | -1.766435 | -2.666579 | -3.748774 | -4.083619 | -3.912834 | -3.724181 | -3.948160 | ... | -35.824612 | -87.983566 | -83.312317 | -107.162407 | -61.478374 | -85.793571 | -43.738819 | -47.004211 | -42.281624 | -59.518513 |
|
|
314
371
|
|
|
315
|
-
<p>5 rows ×
|
|
372
|
+
<p>5 rows × 328 columns</p>
|
|
373
|
+
</div>
|
|
316
374
|
|
|
375
|
+
``` python
|
|
376
|
+
results.iloc[0].sort_values(ascending=False)
|
|
377
|
+
```
|
|
317
378
|
|
|
318
|
-
|
|
379
|
+
TLK2 8.264621
|
|
380
|
+
GCN2 8.101542
|
|
381
|
+
TLK1 7.693897
|
|
382
|
+
HRI 6.691402
|
|
383
|
+
PLK3 6.579368
|
|
384
|
+
...
|
|
385
|
+
NIK -64.605148
|
|
386
|
+
SRPK2 -67.300667
|
|
387
|
+
GAK -69.165062
|
|
388
|
+
BRAF -87.763214
|
|
389
|
+
PKMYT1 -113.698143
|
|
390
|
+
Name: 0, Length: 328, dtype: float32
|
|
391
|
+
|
|
392
|
+
## Dataset
|
|
319
393
|
|
|
320
394
|
Besides calculating sequence scores, we also provides multiple datasets
|
|
321
395
|
of phosphorylation sites.
|
|
@@ -327,7 +401,18 @@ df = Data.get_cptac_ensembl_site()
|
|
|
327
401
|
df.head(3)
|
|
328
402
|
```
|
|
329
403
|
|
|
330
|
-
|
|
404
|
+
<div>
|
|
405
|
+
<style scoped>
|
|
406
|
+
.dataframe tbody tr th:only-of-type {
|
|
407
|
+
vertical-align: middle;
|
|
408
|
+
}
|
|
409
|
+
.dataframe tbody tr th {
|
|
410
|
+
vertical-align: top;
|
|
411
|
+
}
|
|
412
|
+
.dataframe thead th {
|
|
413
|
+
text-align: right;
|
|
414
|
+
}
|
|
415
|
+
</style>
|
|
331
416
|
|
|
332
417
|
| | gene | site | site_seq | protein | gene_name | gene_site | protein_site |
|
|
333
418
|
|----|----|----|----|----|----|----|----|
|
|
@@ -335,7 +420,7 @@ df.head(3)
|
|
|
335
420
|
| 1 | ENSG00000003056.8 | S267 | DDQLGEESEERDDHL | ENSP00000440488.2 | M6PR | M6PR_S267 | ENSP00000440488_S267 |
|
|
336
421
|
| 2 | ENSG00000048028.11 | S1053 | PPTIRPNSPYDLCSR | ENSP00000003302.4 | USP28 | USP28_S1053 | ENSP00000003302_S1053 |
|
|
337
422
|
|
|
338
|
-
|
|
423
|
+
</div>
|
|
339
424
|
|
|
340
425
|
### [Ochoa et al. human phosphoproteome](https://www.nature.com/articles/s41587-019-0344-3)
|
|
341
426
|
|
|
@@ -344,15 +429,26 @@ df = Data.get_ochoa_site()
|
|
|
344
429
|
df.head(3)
|
|
345
430
|
```
|
|
346
431
|
|
|
347
|
-
|
|
432
|
+
<div>
|
|
433
|
+
<style scoped>
|
|
434
|
+
.dataframe tbody tr th:only-of-type {
|
|
435
|
+
vertical-align: middle;
|
|
436
|
+
}
|
|
437
|
+
.dataframe tbody tr th {
|
|
438
|
+
vertical-align: top;
|
|
439
|
+
}
|
|
440
|
+
.dataframe thead th {
|
|
441
|
+
text-align: right;
|
|
442
|
+
}
|
|
443
|
+
</style>
|
|
348
444
|
|
|
349
445
|
| | uniprot | position | residue | is_disopred | disopred_score | log10_hotspot_pval_min | isHotspot | uniprot_position | functional_score | current_uniprot | name | gene | Sequence | is_valid | site_seq | gene_site |
|
|
350
446
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
351
|
-
| 0 | A0A075B6Q4 | 24 | S |
|
|
352
|
-
| 1 | A0A075B6Q4 | 35 | S |
|
|
353
|
-
| 2 | A0A075B6Q4 | 57 | S |
|
|
354
|
-
|
|
447
|
+
| 0 | A0A075B6Q4 | 24 | S | 1.0 | 0.91 | 6.839384 | 1.0 | A0A075B6Q4_24 | 0.149257 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | VDDEKGDSNDDYDSA | A0A075B6Q4_S24 |
|
|
448
|
+
| 1 | A0A075B6Q4 | 35 | S | 1.0 | 0.87 | 9.192622 | 0.0 | A0A075B6Q4_35 | 0.136966 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | YDSAGLLSDEDCMSV | A0A075B6Q4_S35 |
|
|
449
|
+
| 2 | A0A075B6Q4 | 57 | S | 0.0 | 0.28 | 0.818834 | 0.0 | A0A075B6Q4_57 | 0.125364 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | IADHLFWSEETKSRF | A0A075B6Q4_S57 |
|
|
355
450
|
|
|
451
|
+
</div>
|
|
356
452
|
|
|
357
453
|
### PhosphoSitePlus human phosphorylation site
|
|
358
454
|
|
|
@@ -361,7 +457,18 @@ df = Data.get_psp_human_site()
|
|
|
361
457
|
df.head(3)
|
|
362
458
|
```
|
|
363
459
|
|
|
364
|
-
|
|
460
|
+
<div>
|
|
461
|
+
<style scoped>
|
|
462
|
+
.dataframe tbody tr th:only-of-type {
|
|
463
|
+
vertical-align: middle;
|
|
464
|
+
}
|
|
465
|
+
.dataframe tbody tr th {
|
|
466
|
+
vertical-align: top;
|
|
467
|
+
}
|
|
468
|
+
.dataframe thead th {
|
|
469
|
+
text-align: right;
|
|
470
|
+
}
|
|
471
|
+
</style>
|
|
365
472
|
|
|
366
473
|
| | gene | protein | uniprot | site | gene_site | SITE_GRP_ID | species | site_seq | LT_LIT | MS_LIT | MS_CST | CST_CAT# | Ambiguous_Site |
|
|
367
474
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -369,7 +476,7 @@ df.head(3)
|
|
|
369
476
|
| 1 | YWHAB | 14-3-3 beta | P31946 | S6 | YWHAB_S6 | 15718709 | human | \_\_MtMDksELVQkAk | NaN | 8.0 | NaN | None | 0 |
|
|
370
477
|
| 2 | YWHAB | 14-3-3 beta | P31946 | Y21 | YWHAB_Y21 | 3426383 | human | LAEQAERyDDMAAAM | NaN | NaN | 4.0 | None | 0 |
|
|
371
478
|
|
|
372
|
-
|
|
479
|
+
</div>
|
|
373
480
|
|
|
374
481
|
### Unique sites of combined Ochoa & PhosphoSitePlus
|
|
375
482
|
|
|
@@ -378,16 +485,26 @@ df = Data.get_combine_site_psp_ochoa()
|
|
|
378
485
|
df.head(3)
|
|
379
486
|
```
|
|
380
487
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
488
|
+
<div>
|
|
489
|
+
<style scoped>
|
|
490
|
+
.dataframe tbody tr th:only-of-type {
|
|
491
|
+
vertical-align: middle;
|
|
492
|
+
}
|
|
493
|
+
.dataframe tbody tr th {
|
|
494
|
+
vertical-align: top;
|
|
495
|
+
}
|
|
496
|
+
.dataframe thead th {
|
|
497
|
+
text-align: right;
|
|
498
|
+
}
|
|
499
|
+
</style>
|
|
500
|
+
|
|
501
|
+
| | uniprot | gene | site | site_seq | source | AM_pathogenicity | CDDM_upper | CDDM_max_score |
|
|
502
|
+
|----|----|----|----|----|----|----|----|----|
|
|
503
|
+
| 0 | A0A024R4G9 | C19orf48 | S20 | ITGSRLLSMVPGPAR | psp | NaN | PRKX,AKT1,PKG1,P90RSK,HIPK4,AKT3,HIPK1,PKACB,H... | 2.407041 |
|
|
504
|
+
| 1 | A0A075B6Q4 | None | S24 | VDDEKGDSNDDYDSA | ochoa | NaN | CK2A2,CK2A1,GRK7,GRK5,CK1G1,CK1A,IKKA,CK1G2,CA... | 2.295654 |
|
|
505
|
+
| 2 | A0A075B6Q4 | None | S35 | YDSAGLLSDEDCMSV | ochoa | NaN | CK2A2,CK2A1,IKKA,ATM,IKKB,CAMK1D,MARK2,GRK7,IK... | 2.488683 |
|
|
506
|
+
|
|
507
|
+
</div>
|
|
391
508
|
|
|
392
509
|
## Phosphorylation site sequence example
|
|
393
510
|
|