python-katlas 0.1.4__tar.gz → 2025.10.20.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2}/LICENSE +0 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2}/MANIFEST.in +0 -0
- {python_katlas-0.1.4/python_katlas.egg-info → python_katlas-2025.10.20.2}/PKG-INFO +245 -127
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2}/README.md +215 -117
- python_katlas-2025.10.20.2/katlas/__init__.py +1 -0
- python_katlas-2025.10.20.2/katlas/_modidx.py +213 -0
- python_katlas-2025.10.20.2/katlas/clustering.py +142 -0
- python_katlas-2025.10.20.2/katlas/common.py +3 -0
- python_katlas-2025.10.20.2/katlas/core.py +6 -0
- python_katlas-2025.10.20.2/katlas/data.py +446 -0
- python_katlas-2025.10.20.2/katlas/dnn.py +384 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2}/katlas/feature.py +136 -111
- python_katlas-2025.10.20.2/katlas/pathway.py +156 -0
- python_katlas-2025.10.20.2/katlas/plot.py +879 -0
- python_katlas-2025.10.20.2/katlas/pssm.py +784 -0
- python_katlas-2025.10.20.2/katlas/score.py +364 -0
- python_katlas-2025.10.20.2/katlas/statistics.py +102 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2}/katlas/train.py +51 -77
- python_katlas-2025.10.20.2/katlas/utils.py +176 -0
- python_katlas-2025.10.20.2/pyproject.toml +11 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2/python_katlas.egg-info}/PKG-INFO +245 -127
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2}/python_katlas.egg-info/SOURCES.txt +10 -2
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2}/python_katlas.egg-info/dependency_links.txt +0 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2}/python_katlas.egg-info/entry_points.txt +0 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2}/python_katlas.egg-info/not-zip-safe +0 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2}/python_katlas.egg-info/requires.txt +15 -8
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2}/python_katlas.egg-info/top_level.txt +0 -0
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2}/settings.ini +4 -4
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2}/setup.py +0 -0
- python_katlas-0.1.4/katlas/__init__.py +0 -1
- python_katlas-0.1.4/katlas/_modidx.py +0 -109
- python_katlas-0.1.4/katlas/core.py +0 -816
- python_katlas-0.1.4/katlas/dl.py +0 -357
- python_katlas-0.1.4/katlas/imports.py +0 -7
- python_katlas-0.1.4/katlas/plot.py +0 -670
- {python_katlas-0.1.4 → python_katlas-2025.10.20.2}/setup.cfg +0 -0
|
File without changes
|
|
File without changes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: python-katlas
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2025.10.20.2
|
|
4
4
|
Summary: tools for predicting kinome specificities
|
|
5
5
|
Home-page: https://github.com/sky1ove/katlas
|
|
6
6
|
Author: lily
|
|
@@ -18,34 +18,51 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
18
18
|
Requires-Python: >=3.7
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
License-File: LICENSE
|
|
21
|
+
Requires-Dist: pandas
|
|
22
|
+
Requires-Dist: gdown
|
|
21
23
|
Requires-Dist: statsmodels
|
|
22
24
|
Requires-Dist: fastparquet
|
|
25
|
+
Requires-Dist: pyarrow
|
|
23
26
|
Requires-Dist: tqdm
|
|
27
|
+
Requires-Dist: logomaker-kinase
|
|
28
|
+
Requires-Dist: seaborn
|
|
29
|
+
Requires-Dist: bokeh
|
|
30
|
+
Requires-Dist: reactome2py
|
|
31
|
+
Requires-Dist: adjustText
|
|
32
|
+
Requires-Dist: scikit-learn
|
|
33
|
+
Requires-Dist: umap-learn
|
|
34
|
+
Requires-Dist: ipywidgets
|
|
35
|
+
Requires-Dist: biopython
|
|
24
36
|
Provides-Extra: dev
|
|
25
37
|
Requires-Dist: nbdev; extra == "dev"
|
|
26
38
|
Requires-Dist: pyngrok; extra == "dev"
|
|
27
|
-
Requires-Dist: fastai
|
|
28
|
-
Requires-Dist: fastbook; extra == "dev"
|
|
39
|
+
Requires-Dist: fastai; extra == "dev"
|
|
29
40
|
Requires-Dist: fairscale; extra == "dev"
|
|
30
41
|
Requires-Dist: fair-esm; extra == "dev"
|
|
31
|
-
Requires-Dist: logomaker; extra == "dev"
|
|
32
|
-
Requires-Dist: seaborn; extra == "dev"
|
|
33
42
|
Requires-Dist: rdkit; extra == "dev"
|
|
34
|
-
Requires-Dist: umap-learn; extra == "dev"
|
|
35
|
-
Requires-Dist: adjustText; extra == "dev"
|
|
36
|
-
Requires-Dist: bokeh; extra == "dev"
|
|
37
|
-
Requires-Dist: scikit-learn>=1.3.0; extra == "dev"
|
|
38
43
|
Requires-Dist: openpyxl; extra == "dev"
|
|
44
|
+
Requires-Dist: transformers; extra == "dev"
|
|
45
|
+
Requires-Dist: sentencepiece; extra == "dev"
|
|
46
|
+
Dynamic: author
|
|
47
|
+
Dynamic: author-email
|
|
48
|
+
Dynamic: classifier
|
|
49
|
+
Dynamic: description
|
|
50
|
+
Dynamic: description-content-type
|
|
51
|
+
Dynamic: home-page
|
|
52
|
+
Dynamic: keywords
|
|
53
|
+
Dynamic: license
|
|
54
|
+
Dynamic: license-file
|
|
55
|
+
Dynamic: provides-extra
|
|
56
|
+
Dynamic: requires-dist
|
|
57
|
+
Dynamic: requires-python
|
|
58
|
+
Dynamic: summary
|
|
39
59
|
|
|
40
60
|
# KATLAS
|
|
41
61
|
|
|
42
62
|
|
|
43
63
|
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
44
64
|
|
|
45
|
-
<img alt="Katlas logo" width="600" caption="Katlas logo" src="https://github.com/sky1ove/katlas/raw/main/
|
|
46
|
-
|
|
47
|
-
<p><a target="_blank" href="https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/index.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
|
|
48
|
-
<a href="https://pypi.org/project/python-katlas/"><img src="https://img.shields.io/pypi/v/python-katlas?link=https%3A%2F%2Fpypi.org%2Fproject%2Fpython-katlas%2F" alt="PyPI"></a></p>
|
|
65
|
+
<img alt="Katlas logo" width="600" caption="Katlas logo" src="https://github.com/sky1ove/katlas/raw/main/logo.png" id="logo"/>
|
|
49
66
|
|
|
50
67
|
KATLAS is a repository containing python tools to predict kinases given
|
|
51
68
|
a substrate sequence. It also contains datasets of kinase substrate
|
|
@@ -81,8 +98,6 @@ helpful to your research.
|
|
|
81
98
|
Follow the instructions in katlas_raw:
|
|
82
99
|
https://github.com/sky1ove/katlas_raw
|
|
83
100
|
|
|
84
|
-
Need to install the package via: `pip install 'python-katlas[dev]' -U`
|
|
85
|
-
|
|
86
101
|
## Web applications
|
|
87
102
|
|
|
88
103
|
Users can now run the analysis directly on the web without needing to
|
|
@@ -91,26 +106,27 @@ code.
|
|
|
91
106
|
Check out our latest web platform:
|
|
92
107
|
[kinase-atlas.com](https://kinase-atlas.com/)
|
|
93
108
|
|
|
94
|
-
##
|
|
109
|
+
## Install
|
|
95
110
|
|
|
96
|
-
|
|
97
|
-
sequence](https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/tutorial_01_sinlge_input.ipynb)
|
|
98
|
-
- 2. [High throughput substrate scoring on phosphoproteomics
|
|
99
|
-
dataset](https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/tutorial_02_high_throughput.ipynb)
|
|
100
|
-
- 3. [Kinase enrichment analysis for AKT
|
|
101
|
-
inhibitor](https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/tutorial_03a_enrichment_AKTi.ipynb)
|
|
111
|
+
UV:
|
|
102
112
|
|
|
103
|
-
|
|
113
|
+
``` bash
|
|
114
|
+
uv add -U python-katlas
|
|
115
|
+
```
|
|
104
116
|
|
|
105
|
-
|
|
117
|
+
pip:
|
|
106
118
|
|
|
107
|
-
|
|
108
|
-
|
|
119
|
+
``` bash
|
|
120
|
+
pip install -U python-katlas
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
If using machine-learning related modules, need to install development
|
|
124
|
+
verison: `pip install -U "python-katlas[dev]"`
|
|
109
125
|
|
|
110
126
|
## Import
|
|
111
127
|
|
|
112
128
|
``` python
|
|
113
|
-
from katlas.
|
|
129
|
+
from katlas.common import *
|
|
114
130
|
```
|
|
115
131
|
|
|
116
132
|
# Quick start
|
|
@@ -130,93 +146,101 @@ For input sequences, we also consider it in two conditions:
|
|
|
130
146
|
- all capital
|
|
131
147
|
- contains lower cases indicating phosphorylation status
|
|
132
148
|
|
|
133
|
-
##
|
|
149
|
+
## Quick start
|
|
150
|
+
|
|
151
|
+
### Site scoring
|
|
134
152
|
|
|
135
|
-
|
|
153
|
+
CDDM, all capital
|
|
136
154
|
|
|
137
155
|
``` python
|
|
138
|
-
predict_kinase('
|
|
156
|
+
predict_kinase('AAAAAAASGAGSDN',**Params("CDDM_upper"))
|
|
139
157
|
```
|
|
140
158
|
|
|
141
|
-
considering string: ['-7A', '-6A', '-5A', '-4A', '-3A', '-2A', '-1A', '0S', '1G', '
|
|
159
|
+
considering string: ['-7A', '-6A', '-5A', '-4A', '-3A', '-2A', '-1A', '0S', '1G', '2A', '3G', '4S', '5D', '6N']
|
|
142
160
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
Length: 289, dtype: float64
|
|
161
|
+
GCN2 4.556
|
|
162
|
+
MPSK1 4.425
|
|
163
|
+
MEKK2 4.253
|
|
164
|
+
WNK3 4.213
|
|
165
|
+
WNK1 4.064
|
|
166
|
+
...
|
|
167
|
+
PDK1 -25.077
|
|
168
|
+
PDHK3 -25.346
|
|
169
|
+
CLK2 -27.251
|
|
170
|
+
ROR2 -27.582
|
|
171
|
+
DDR1 -53.581
|
|
172
|
+
Length: 328, dtype: float64
|
|
156
173
|
|
|
157
|
-
|
|
174
|
+
CDDM, with lower case indicating phosphorylation status
|
|
158
175
|
|
|
159
176
|
``` python
|
|
160
|
-
predict_kinase('AAAAAAAsGGAGsDN',**
|
|
177
|
+
predict_kinase('AAAAAAAsGGAGsDN',**Params("CDDM"))
|
|
161
178
|
```
|
|
162
179
|
|
|
163
180
|
considering string: ['-7A', '-6A', '-5A', '-4A', '-3A', '-2A', '-1A', '0s', '1G', '2G', '3A', '4G', '5s', '6D', '7N']
|
|
164
181
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
### PSPA, with lower case indicating phosphorylation status
|
|
182
|
+
ROR1 8.355
|
|
183
|
+
WNK1 4.907
|
|
184
|
+
WNK2 4.782
|
|
185
|
+
ERK5 4.466
|
|
186
|
+
RIPK2 4.045
|
|
187
|
+
...
|
|
188
|
+
DDR1 -29.393
|
|
189
|
+
TNNI3K -29.884
|
|
190
|
+
CHAK1 -31.775
|
|
191
|
+
VRK1 -45.287
|
|
192
|
+
BRAF -49.403
|
|
193
|
+
Length: 328, dtype: float64
|
|
194
|
+
|
|
195
|
+
PSPA, with lower case indicating phosphorylation status
|
|
180
196
|
|
|
181
197
|
``` python
|
|
182
|
-
predict_kinase('AEEKEyHsEGG',**
|
|
198
|
+
predict_kinase('AEEKEyHsEGG',**Params("PSPA"))
|
|
183
199
|
```
|
|
184
200
|
|
|
185
201
|
considering string: ['-5A', '-4E', '-3E', '-2K', '-1E', '0y', '1H', '2s', '3E', '4G', '5G']
|
|
186
202
|
|
|
187
203
|
kinase
|
|
188
|
-
EGFR
|
|
189
|
-
FGFR4
|
|
190
|
-
ZAP70
|
|
191
|
-
CSK
|
|
192
|
-
SYK
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
204
|
+
EGFR 4.013
|
|
205
|
+
FGFR4 3.568
|
|
206
|
+
ZAP70 3.412
|
|
207
|
+
CSK 3.241
|
|
208
|
+
SYK 3.209
|
|
209
|
+
...
|
|
210
|
+
JAK1 -3.837
|
|
211
|
+
DDR2 -4.421
|
|
212
|
+
TNK2 -4.534
|
|
213
|
+
TNNI3K_TYR -4.651
|
|
214
|
+
TNK1 -5.320
|
|
215
|
+
Length: 93, dtype: float64
|
|
216
|
+
|
|
217
|
+
To replicate the results from The Kinase Library (PSPA)
|
|
196
218
|
|
|
197
219
|
Check this link: [The Kinase
|
|
198
|
-
Library](https://kinase-library.
|
|
220
|
+
Library](https://kinase-library.mit.edu/site?s=AEEKEy*HSEGG&pp=false&scp=true),
|
|
199
221
|
and use log2(score) to rank, it shows same results with the below (with
|
|
200
222
|
slight differences due to rounding).
|
|
201
223
|
|
|
202
224
|
``` python
|
|
203
|
-
predict_kinase('AEEKEyHSEGG',**
|
|
225
|
+
out = predict_kinase('AEEKEyHSEGG',**Params("PSPA"))
|
|
226
|
+
out
|
|
204
227
|
```
|
|
205
228
|
|
|
206
229
|
considering string: ['-5A', '-4E', '-3E', '-2K', '-1E', '0y', '1H', '2S', '3E', '4G', '5G']
|
|
207
230
|
|
|
208
231
|
kinase
|
|
209
|
-
EGFR
|
|
210
|
-
FGFR4
|
|
211
|
-
CSK
|
|
212
|
-
ZAP70
|
|
213
|
-
SYK
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
232
|
+
EGFR 3.181
|
|
233
|
+
FGFR4 2.390
|
|
234
|
+
CSK 2.308
|
|
235
|
+
ZAP70 2.068
|
|
236
|
+
SYK 1.998
|
|
237
|
+
...
|
|
238
|
+
EPHA1 -3.501
|
|
239
|
+
FES -3.699
|
|
240
|
+
TNK1 -4.269
|
|
241
|
+
TNK2 -4.577
|
|
242
|
+
DDR2 -4.920
|
|
243
|
+
Length: 93, dtype: float64
|
|
220
244
|
|
|
221
245
|
- So far [The kinase Library](https://kinase-library.phosphosite.org)
|
|
222
246
|
considers all ***tyr sequences*** in capital regardless of whether or
|
|
@@ -232,13 +256,26 @@ sheet.
|
|
|
232
256
|
``` python
|
|
233
257
|
# Percentile reference sheet
|
|
234
258
|
y_pct = Data.get_pspa_tyr_pct()
|
|
259
|
+
```
|
|
235
260
|
|
|
236
|
-
|
|
261
|
+
``` python
|
|
262
|
+
get_pct('AEEKEyHSEGG',pct_ref = y_pct,**Params("PSPA_y"))
|
|
237
263
|
```
|
|
238
264
|
|
|
239
265
|
considering string: ['-5A', '-4E', '-3E', '-2K', '-1E', '0Y', '1H', '2S', '3E', '4G', '5G']
|
|
240
266
|
|
|
241
|
-
|
|
267
|
+
<div>
|
|
268
|
+
<style scoped>
|
|
269
|
+
.dataframe tbody tr th:only-of-type {
|
|
270
|
+
vertical-align: middle;
|
|
271
|
+
}
|
|
272
|
+
.dataframe tbody tr th {
|
|
273
|
+
vertical-align: top;
|
|
274
|
+
}
|
|
275
|
+
.dataframe thead th {
|
|
276
|
+
text-align: right;
|
|
277
|
+
}
|
|
278
|
+
</style>
|
|
242
279
|
|
|
243
280
|
| | log2(score) | percentile |
|
|
244
281
|
|-------|-------------|------------|
|
|
@@ -255,17 +292,17 @@ get_pct('AEEKEyHSEGG',**param_PSPA_y, pct_ref = y_pct)
|
|
|
255
292
|
| DDR2 | -4.920 | 10.403281 |
|
|
256
293
|
|
|
257
294
|
<p>93 rows × 2 columns</p>
|
|
295
|
+
</div>
|
|
258
296
|
|
|
297
|
+
### Site scoring in a df
|
|
259
298
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
### Load your csv
|
|
299
|
+
Load your csv:
|
|
263
300
|
|
|
264
301
|
``` python
|
|
265
302
|
# df = pd.read_csv('your_file.csv')
|
|
266
303
|
```
|
|
267
304
|
|
|
268
|
-
|
|
305
|
+
Or load a demo df
|
|
269
306
|
|
|
270
307
|
``` python
|
|
271
308
|
# Load a demo df with phosphorylation sites
|
|
@@ -273,7 +310,18 @@ df = Data.get_ochoa_site().head()
|
|
|
273
310
|
df.iloc[:,-2:]
|
|
274
311
|
```
|
|
275
312
|
|
|
276
|
-
|
|
313
|
+
<div>
|
|
314
|
+
<style scoped>
|
|
315
|
+
.dataframe tbody tr th:only-of-type {
|
|
316
|
+
vertical-align: middle;
|
|
317
|
+
}
|
|
318
|
+
.dataframe tbody tr th {
|
|
319
|
+
vertical-align: top;
|
|
320
|
+
}
|
|
321
|
+
.dataframe thead th {
|
|
322
|
+
text-align: right;
|
|
323
|
+
}
|
|
324
|
+
</style>
|
|
277
325
|
|
|
278
326
|
| | site_seq | gene_site |
|
|
279
327
|
|-----|-----------------|----------------|
|
|
@@ -283,39 +331,66 @@ df.iloc[:,-2:]
|
|
|
283
331
|
| 3 | KSRFTEYSMTSSVMR | A0A075B6Q4_S68 |
|
|
284
332
|
| 4 | FTEYSMTSSVMRRNE | A0A075B6Q4_S71 |
|
|
285
333
|
|
|
334
|
+
</div>
|
|
286
335
|
|
|
287
|
-
|
|
288
|
-
### Set the column name and param to calculate
|
|
336
|
+
Set the column name and param to calculate
|
|
289
337
|
|
|
290
338
|
Here we choose param_CDDM_upper, as the sequences in the demo df are all
|
|
291
339
|
in capital. You can also choose other params.
|
|
292
340
|
|
|
293
341
|
``` python
|
|
294
|
-
results = predict_kinase_df(df,'site_seq',**
|
|
342
|
+
results = predict_kinase_df(df,'site_seq',**Params("CDDM_upper"))
|
|
295
343
|
results
|
|
296
344
|
```
|
|
297
345
|
|
|
298
346
|
input dataframe has a length 5
|
|
299
347
|
Preprocessing
|
|
300
348
|
Finish preprocessing
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
349
|
+
Merging reference
|
|
350
|
+
Finish merging
|
|
351
|
+
|
|
352
|
+
<div>
|
|
353
|
+
<style scoped>
|
|
354
|
+
.dataframe tbody tr th:only-of-type {
|
|
355
|
+
vertical-align: middle;
|
|
356
|
+
}
|
|
357
|
+
.dataframe tbody tr th {
|
|
358
|
+
vertical-align: top;
|
|
359
|
+
}
|
|
360
|
+
.dataframe thead th {
|
|
361
|
+
text-align: right;
|
|
362
|
+
}
|
|
363
|
+
</style>
|
|
364
|
+
|
|
365
|
+
| | SRC | EPHA3 | FES | NTRK3 | ALK | ABL1 | FLT3 | EPHA8 | EPHB2 | EPHB1 | ... | VRK1 | PKMYT1 | GRK3 | CAMK1B | CDC7 | SMMLCK | ROR1 | GAK | MAST2 | BRAF |
|
|
308
366
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
309
|
-
| 0 |
|
|
310
|
-
| 1 |
|
|
311
|
-
| 2 |
|
|
312
|
-
| 3 |
|
|
313
|
-
| 4 |
|
|
367
|
+
| 0 | -2.440640 | -0.818753 | -1.663990 | -0.738991 | -2.047628 | -3.602344 | -3.200998 | -0.935176 | -1.388444 | -1.859450 | ... | -17.103237 | -113.698143 | -16.848783 | -41.520172 | -41.646187 | 1.284159 | -26.566362 | -69.165062 | -17.706400 | -87.763214 |
|
|
368
|
+
| 1 | -3.838486 | -2.735969 | -2.533986 | -2.150399 | -3.792498 | -4.725527 | -5.711791 | -4.534240 | -3.148449 | -2.511518 | ... | -67.889053 | -68.652641 | -45.833855 | -64.171600 | -39.465572 | -65.061722 | -109.561707 | -85.911224 | -60.105064 | -63.889122 |
|
|
369
|
+
| 2 | -2.610423 | -2.370090 | -3.235637 | -1.508413 | -2.571347 | -3.740941 | -3.025596 | -3.373504 | -2.776297 | -3.060740 | ... | -15.798462 | -45.905319 | -61.440742 | -67.695694 | -55.047962 | -42.135216 | -38.501572 | -62.624382 | -56.119389 | -107.060989 |
|
|
370
|
+
| 3 | -5.180541 | -4.201880 | -5.766463 | -3.038421 | -3.836897 | -4.249900 | -5.029885 | -5.411311 | -4.713308 | -4.827825 | ... | -96.978317 | -83.419777 | -22.559393 | -110.611588 | -63.283070 | -37.240440 | -24.497492 | -112.878151 | -43.538158 | -60.348518 |
|
|
371
|
+
| 4 | -2.844254 | -3.322700 | -3.681745 | -1.766435 | -2.666579 | -3.748774 | -4.083619 | -3.912834 | -3.724181 | -3.948160 | ... | -35.824612 | -87.983566 | -83.312317 | -107.162407 | -61.478374 | -85.793571 | -43.738819 | -47.004211 | -42.281624 | -59.518513 |
|
|
314
372
|
|
|
315
|
-
<p>5 rows ×
|
|
373
|
+
<p>5 rows × 328 columns</p>
|
|
374
|
+
</div>
|
|
316
375
|
|
|
376
|
+
``` python
|
|
377
|
+
results.iloc[0].sort_values(ascending=False)
|
|
378
|
+
```
|
|
317
379
|
|
|
318
|
-
|
|
380
|
+
TLK2 8.264621
|
|
381
|
+
GCN2 8.101542
|
|
382
|
+
TLK1 7.693897
|
|
383
|
+
HRI 6.691402
|
|
384
|
+
PLK3 6.579368
|
|
385
|
+
...
|
|
386
|
+
NIK -64.605148
|
|
387
|
+
SRPK2 -67.300667
|
|
388
|
+
GAK -69.165062
|
|
389
|
+
BRAF -87.763214
|
|
390
|
+
PKMYT1 -113.698143
|
|
391
|
+
Name: 0, Length: 328, dtype: float32
|
|
392
|
+
|
|
393
|
+
## Dataset
|
|
319
394
|
|
|
320
395
|
Besides calculating sequence scores, we also provides multiple datasets
|
|
321
396
|
of phosphorylation sites.
|
|
@@ -327,7 +402,18 @@ df = Data.get_cptac_ensembl_site()
|
|
|
327
402
|
df.head(3)
|
|
328
403
|
```
|
|
329
404
|
|
|
330
|
-
|
|
405
|
+
<div>
|
|
406
|
+
<style scoped>
|
|
407
|
+
.dataframe tbody tr th:only-of-type {
|
|
408
|
+
vertical-align: middle;
|
|
409
|
+
}
|
|
410
|
+
.dataframe tbody tr th {
|
|
411
|
+
vertical-align: top;
|
|
412
|
+
}
|
|
413
|
+
.dataframe thead th {
|
|
414
|
+
text-align: right;
|
|
415
|
+
}
|
|
416
|
+
</style>
|
|
331
417
|
|
|
332
418
|
| | gene | site | site_seq | protein | gene_name | gene_site | protein_site |
|
|
333
419
|
|----|----|----|----|----|----|----|----|
|
|
@@ -335,7 +421,7 @@ df.head(3)
|
|
|
335
421
|
| 1 | ENSG00000003056.8 | S267 | DDQLGEESEERDDHL | ENSP00000440488.2 | M6PR | M6PR_S267 | ENSP00000440488_S267 |
|
|
336
422
|
| 2 | ENSG00000048028.11 | S1053 | PPTIRPNSPYDLCSR | ENSP00000003302.4 | USP28 | USP28_S1053 | ENSP00000003302_S1053 |
|
|
337
423
|
|
|
338
|
-
|
|
424
|
+
</div>
|
|
339
425
|
|
|
340
426
|
### [Ochoa et al. human phosphoproteome](https://www.nature.com/articles/s41587-019-0344-3)
|
|
341
427
|
|
|
@@ -344,15 +430,26 @@ df = Data.get_ochoa_site()
|
|
|
344
430
|
df.head(3)
|
|
345
431
|
```
|
|
346
432
|
|
|
347
|
-
|
|
433
|
+
<div>
|
|
434
|
+
<style scoped>
|
|
435
|
+
.dataframe tbody tr th:only-of-type {
|
|
436
|
+
vertical-align: middle;
|
|
437
|
+
}
|
|
438
|
+
.dataframe tbody tr th {
|
|
439
|
+
vertical-align: top;
|
|
440
|
+
}
|
|
441
|
+
.dataframe thead th {
|
|
442
|
+
text-align: right;
|
|
443
|
+
}
|
|
444
|
+
</style>
|
|
348
445
|
|
|
349
446
|
| | uniprot | position | residue | is_disopred | disopred_score | log10_hotspot_pval_min | isHotspot | uniprot_position | functional_score | current_uniprot | name | gene | Sequence | is_valid | site_seq | gene_site |
|
|
350
447
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
351
|
-
| 0 | A0A075B6Q4 | 24 | S |
|
|
352
|
-
| 1 | A0A075B6Q4 | 35 | S |
|
|
353
|
-
| 2 | A0A075B6Q4 | 57 | S |
|
|
354
|
-
|
|
448
|
+
| 0 | A0A075B6Q4 | 24 | S | 1.0 | 0.91 | 6.839384 | 1.0 | A0A075B6Q4_24 | 0.149257 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | VDDEKGDSNDDYDSA | A0A075B6Q4_S24 |
|
|
449
|
+
| 1 | A0A075B6Q4 | 35 | S | 1.0 | 0.87 | 9.192622 | 0.0 | A0A075B6Q4_35 | 0.136966 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | YDSAGLLSDEDCMSV | A0A075B6Q4_S35 |
|
|
450
|
+
| 2 | A0A075B6Q4 | 57 | S | 0.0 | 0.28 | 0.818834 | 0.0 | A0A075B6Q4_57 | 0.125364 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | IADHLFWSEETKSRF | A0A075B6Q4_S57 |
|
|
355
451
|
|
|
452
|
+
</div>
|
|
356
453
|
|
|
357
454
|
### PhosphoSitePlus human phosphorylation site
|
|
358
455
|
|
|
@@ -361,7 +458,18 @@ df = Data.get_psp_human_site()
|
|
|
361
458
|
df.head(3)
|
|
362
459
|
```
|
|
363
460
|
|
|
364
|
-
|
|
461
|
+
<div>
|
|
462
|
+
<style scoped>
|
|
463
|
+
.dataframe tbody tr th:only-of-type {
|
|
464
|
+
vertical-align: middle;
|
|
465
|
+
}
|
|
466
|
+
.dataframe tbody tr th {
|
|
467
|
+
vertical-align: top;
|
|
468
|
+
}
|
|
469
|
+
.dataframe thead th {
|
|
470
|
+
text-align: right;
|
|
471
|
+
}
|
|
472
|
+
</style>
|
|
365
473
|
|
|
366
474
|
| | gene | protein | uniprot | site | gene_site | SITE_GRP_ID | species | site_seq | LT_LIT | MS_LIT | MS_CST | CST_CAT# | Ambiguous_Site |
|
|
367
475
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -369,7 +477,7 @@ df.head(3)
|
|
|
369
477
|
| 1 | YWHAB | 14-3-3 beta | P31946 | S6 | YWHAB_S6 | 15718709 | human | \_\_MtMDksELVQkAk | NaN | 8.0 | NaN | None | 0 |
|
|
370
478
|
| 2 | YWHAB | 14-3-3 beta | P31946 | Y21 | YWHAB_Y21 | 3426383 | human | LAEQAERyDDMAAAM | NaN | NaN | 4.0 | None | 0 |
|
|
371
479
|
|
|
372
|
-
|
|
480
|
+
</div>
|
|
373
481
|
|
|
374
482
|
### Unique sites of combined Ochoa & PhosphoSitePlus
|
|
375
483
|
|
|
@@ -378,16 +486,26 @@ df = Data.get_combine_site_psp_ochoa()
|
|
|
378
486
|
df.head(3)
|
|
379
487
|
```
|
|
380
488
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
489
|
+
<div>
|
|
490
|
+
<style scoped>
|
|
491
|
+
.dataframe tbody tr th:only-of-type {
|
|
492
|
+
vertical-align: middle;
|
|
493
|
+
}
|
|
494
|
+
.dataframe tbody tr th {
|
|
495
|
+
vertical-align: top;
|
|
496
|
+
}
|
|
497
|
+
.dataframe thead th {
|
|
498
|
+
text-align: right;
|
|
499
|
+
}
|
|
500
|
+
</style>
|
|
501
|
+
|
|
502
|
+
| | uniprot | gene | site | site_seq | source | AM_pathogenicity | CDDM_upper | CDDM_max_score |
|
|
503
|
+
|----|----|----|----|----|----|----|----|----|
|
|
504
|
+
| 0 | A0A024R4G9 | C19orf48 | S20 | ITGSRLLSMVPGPAR | psp | NaN | PRKX,AKT1,PKG1,P90RSK,HIPK4,AKT3,HIPK1,PKACB,H... | 2.407041 |
|
|
505
|
+
| 1 | A0A075B6Q4 | None | S24 | VDDEKGDSNDDYDSA | ochoa | NaN | CK2A2,CK2A1,GRK7,GRK5,CK1G1,CK1A,IKKA,CK1G2,CA... | 2.295654 |
|
|
506
|
+
| 2 | A0A075B6Q4 | None | S35 | YDSAGLLSDEDCMSV | ochoa | NaN | CK2A2,CK2A1,IKKA,ATM,IKKB,CAMK1D,MARK2,GRK7,IK... | 2.488683 |
|
|
507
|
+
|
|
508
|
+
</div>
|
|
391
509
|
|
|
392
510
|
## Phosphorylation site sequence example
|
|
393
511
|
|