python-katlas 0.1.2__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python-katlas-0.1.2 → python_katlas-0.1.4}/LICENSE +0 -0
- {python-katlas-0.1.2 → python_katlas-0.1.4}/MANIFEST.in +0 -0
- {python-katlas-0.1.2/python_katlas.egg-info → python_katlas-0.1.4}/PKG-INFO +74 -46
- {python-katlas-0.1.2 → python_katlas-0.1.4}/README.md +54 -43
- python_katlas-0.1.4/katlas/__init__.py +1 -0
- {python-katlas-0.1.2 → python_katlas-0.1.4}/katlas/_modidx.py +2 -2
- {python-katlas-0.1.2 → python_katlas-0.1.4}/katlas/core.py +0 -0
- {python-katlas-0.1.2 → python_katlas-0.1.4}/katlas/dl.py +14 -14
- {python-katlas-0.1.2 → python_katlas-0.1.4}/katlas/feature.py +8 -5
- {python-katlas-0.1.2 → python_katlas-0.1.4}/katlas/imports.py +0 -0
- {python-katlas-0.1.2 → python_katlas-0.1.4}/katlas/plot.py +30 -25
- {python-katlas-0.1.2 → python_katlas-0.1.4}/katlas/train.py +7 -7
- {python-katlas-0.1.2 → python_katlas-0.1.4/python_katlas.egg-info}/PKG-INFO +74 -46
- {python-katlas-0.1.2 → python_katlas-0.1.4}/python_katlas.egg-info/SOURCES.txt +0 -0
- {python-katlas-0.1.2 → python_katlas-0.1.4}/python_katlas.egg-info/dependency_links.txt +0 -0
- {python-katlas-0.1.2 → python_katlas-0.1.4}/python_katlas.egg-info/entry_points.txt +0 -0
- {python-katlas-0.1.2 → python_katlas-0.1.4}/python_katlas.egg-info/not-zip-safe +0 -0
- {python-katlas-0.1.2 → python_katlas-0.1.4}/python_katlas.egg-info/requires.txt +0 -0
- {python-katlas-0.1.2 → python_katlas-0.1.4}/python_katlas.egg-info/top_level.txt +0 -0
- {python-katlas-0.1.2 → python_katlas-0.1.4}/settings.ini +4 -3
- {python-katlas-0.1.2 → python_katlas-0.1.4}/setup.py +0 -0
- python-katlas-0.1.2/katlas/__init__.py +0 -1
- {python-katlas-0.1.2 → python_katlas-0.1.4}/setup.cfg +0 -0
|
File without changes
|
|
File without changes
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: python-katlas
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: tools for predicting kinome specificities
|
|
5
|
-
Home-page: https://github.com/sky1ove/
|
|
5
|
+
Home-page: https://github.com/sky1ove/katlas
|
|
6
6
|
Author: lily
|
|
7
7
|
Author-email: lcai888666@gmail.com
|
|
8
8
|
License: Apache Software License 2.0
|
|
@@ -17,19 +17,35 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
17
17
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
18
18
|
Requires-Python: >=3.7
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
|
-
Provides-Extra: dev
|
|
21
20
|
License-File: LICENSE
|
|
21
|
+
Requires-Dist: statsmodels
|
|
22
|
+
Requires-Dist: fastparquet
|
|
23
|
+
Requires-Dist: tqdm
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: nbdev; extra == "dev"
|
|
26
|
+
Requires-Dist: pyngrok; extra == "dev"
|
|
27
|
+
Requires-Dist: fastai>=2.7.12; extra == "dev"
|
|
28
|
+
Requires-Dist: fastbook; extra == "dev"
|
|
29
|
+
Requires-Dist: fairscale; extra == "dev"
|
|
30
|
+
Requires-Dist: fair-esm; extra == "dev"
|
|
31
|
+
Requires-Dist: logomaker; extra == "dev"
|
|
32
|
+
Requires-Dist: seaborn; extra == "dev"
|
|
33
|
+
Requires-Dist: rdkit; extra == "dev"
|
|
34
|
+
Requires-Dist: umap-learn; extra == "dev"
|
|
35
|
+
Requires-Dist: adjustText; extra == "dev"
|
|
36
|
+
Requires-Dist: bokeh; extra == "dev"
|
|
37
|
+
Requires-Dist: scikit-learn>=1.3.0; extra == "dev"
|
|
38
|
+
Requires-Dist: openpyxl; extra == "dev"
|
|
22
39
|
|
|
23
40
|
# KATLAS
|
|
24
41
|
|
|
25
42
|
|
|
26
43
|
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
27
44
|
|
|
28
|
-
<
|
|
29
|
-
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
|
30
|
-
</a>
|
|
45
|
+
<img alt="Katlas logo" width="600" caption="Katlas logo" src="https://github.com/sky1ove/katlas/raw/main/dataset/images/logo.png" id="logo"/>
|
|
31
46
|
|
|
32
|
-
<
|
|
47
|
+
<p><a target="_blank" href="https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/index.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
|
|
48
|
+
<a href="https://pypi.org/project/python-katlas/"><img src="https://img.shields.io/pypi/v/python-katlas?link=https%3A%2F%2Fpypi.org%2Fproject%2Fpython-katlas%2F" alt="PyPI"></a></p>
|
|
33
49
|
|
|
34
50
|
KATLAS is a repository containing python tools to predict kinases given
|
|
35
51
|
a substrate sequence. It also contains datasets of kinase substrate
|
|
@@ -38,9 +54,8 @@ specificities and human phosphoproteomics.
|
|
|
38
54
|
***References***: Please cite the appropriate papers if KATLAS is
|
|
39
55
|
helpful to your research.
|
|
40
56
|
|
|
41
|
-
- KATLAS was described in the paper \[Decoding Human
|
|
42
|
-
|
|
43
|
-
(manuscript)\]
|
|
57
|
+
- KATLAS was described in the paper \[Computational Decoding of Human
|
|
58
|
+
Kinome Substrate Specificities and Functions\]
|
|
44
59
|
|
|
45
60
|
- The positional scanning peptide array (PSPA) data is from paper [An
|
|
46
61
|
atlas of substrate specificities for the human serine/threonine
|
|
@@ -60,13 +75,21 @@ helpful to your research.
|
|
|
60
75
|
phosphoproteome](https://www.nature.com/articles/s41587-019-0344-3),
|
|
61
76
|
and [CPTAC](https://pdc.cancer.gov/pdc/cptac-pancancer) /
|
|
62
77
|
[LinkedOmics](https://academic.oup.com/nar/article/46/D1/D956/4607804)
|
|
63
|
-
|
|
64
|
-
|
|
78
|
+
|
|
79
|
+
## Reproduce datasets & figures
|
|
80
|
+
|
|
81
|
+
Follow the instructions in katlas_raw:
|
|
82
|
+
https://github.com/sky1ove/katlas_raw
|
|
83
|
+
|
|
84
|
+
Need to install the package via: `pip install 'python-katlas[dev]' -U`
|
|
85
|
+
|
|
65
86
|
## Web applications
|
|
66
87
|
|
|
67
|
-
Users can now run the analysis directly on the web without needing to
|
|
88
|
+
Users can now run the analysis directly on the web without needing to
|
|
89
|
+
code.
|
|
68
90
|
|
|
69
|
-
Check out our latest web:
|
|
91
|
+
Check out our latest web platform:
|
|
92
|
+
[kinase-atlas.com](https://kinase-atlas.com/)
|
|
70
93
|
|
|
71
94
|
## Tutorials on Colab
|
|
72
95
|
|
|
@@ -77,14 +100,12 @@ Check out our latest web: [kinase-atlas.com](https://kinase-atlas.com/)
|
|
|
77
100
|
- 3. [Kinase enrichment analysis for AKT
|
|
78
101
|
inhibitor](https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/tutorial_03a_enrichment_AKTi.ipynb)
|
|
79
102
|
|
|
80
|
-
|
|
81
103
|
## Install
|
|
82
104
|
|
|
83
|
-
|
|
105
|
+
pip install python-katlas -U
|
|
84
106
|
|
|
85
|
-
|
|
86
|
-
pip install python-katlas -
|
|
87
|
-
```
|
|
107
|
+
To use other modules besides the core, do
|
|
108
|
+
`pip install 'python-katlas[dev]' -U`
|
|
88
109
|
|
|
89
110
|
## Import
|
|
90
111
|
|
|
@@ -233,6 +254,7 @@ get_pct('AEEKEyHSEGG',**param_PSPA_y, pct_ref = y_pct)
|
|
|
233
254
|
| TNK2 | -4.577 | 2.050581 |
|
|
234
255
|
| DDR2 | -4.920 | 10.403281 |
|
|
235
256
|
|
|
257
|
+
<p>93 rows × 2 columns</p>
|
|
236
258
|
|
|
237
259
|
|
|
238
260
|
## High-throughput substrate scoring on a dataframe
|
|
@@ -252,6 +274,7 @@ df.iloc[:,-2:]
|
|
|
252
274
|
```
|
|
253
275
|
|
|
254
276
|
|
|
277
|
+
|
|
255
278
|
| | site_seq | gene_site |
|
|
256
279
|
|-----|-----------------|----------------|
|
|
257
280
|
| 0 | VDDEKGDSNDDYDSA | A0A075B6Q4_S24 |
|
|
@@ -281,14 +304,15 @@ results
|
|
|
281
304
|
|
|
282
305
|
|
|
283
306
|
|
|
284
|
-
| kinase | SRC
|
|
285
|
-
|
|
286
|
-
| 0
|
|
287
|
-
| 1
|
|
288
|
-
| 2
|
|
289
|
-
| 3
|
|
290
|
-
| 4
|
|
307
|
+
| kinase | SRC | EPHA3 | FES | NTRK3 | ALK | EPHA8 | ABL1 | FLT3 | EPHB2 | FYN | ... | MEK5 | PKN2 | MAP2K7 | MRCKB | HIPK3 | CDK8 | BUB1 | MEKK3 | MAP2K3 | GRK1 |
|
|
308
|
+
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
309
|
+
| 0 | 0.991760 | 1.093712 | 1.051750 | 1.067134 | 1.013682 | 1.097519 | 0.966379 | 0.982464 | 1.054986 | 1.055910 | ... | 1.314859 | 1.635470 | 1.652251 | 1.622672 | 1.362973 | 1.797155 | 1.305198 | 1.423618 | 1.504941 | 1.872020 |
|
|
310
|
+
| 1 | 0.910262 | 0.953743 | 0.942327 | 0.950601 | 0.872694 | 0.932586 | 0.846899 | 0.826662 | 0.915020 | 0.942713 | ... | 1.175454 | 1.402006 | 1.430392 | 1.215826 | 1.569373 | 1.716455 | 1.270999 | 1.195081 | 1.223082 | 1.793290 |
|
|
311
|
+
| 2 | 0.849866 | 0.899910 | 0.848895 | 0.879652 | 0.874959 | 0.899414 | 0.839200 | 0.836523 | 0.858040 | 0.867269 | ... | 1.408003 | 1.813739 | 1.454786 | 1.084522 | 1.352556 | 1.524663 | 1.377839 | 1.173830 | 1.305691 | 1.811849 |
|
|
312
|
+
| 3 | 0.803826 | 0.836527 | 0.800759 | 0.894570 | 0.839905 | 0.781001 | 0.847847 | 0.807040 | 0.805877 | 0.801402 | ... | 1.110307 | 1.703637 | 1.795092 | 1.469653 | 1.549936 | 1.491344 | 1.446922 | 1.055452 | 1.534895 | 1.741090 |
|
|
313
|
+
| 4 | 0.822793 | 0.796532 | 0.792343 | 0.839882 | 0.810122 | 0.781420 | 0.805251 | 0.795022 | 0.790380 | 0.864538 | ... | 1.062617 | 1.357689 | 1.485945 | 1.249266 | 1.456078 | 1.422782 | 1.376471 | 1.089629 | 1.121309 | 1.697524 |
|
|
291
314
|
|
|
315
|
+
<p>5 rows × 289 columns</p>
|
|
292
316
|
|
|
293
317
|
|
|
294
318
|
## Phosphorylation sites
|
|
@@ -305,11 +329,11 @@ df.head(3)
|
|
|
305
329
|
|
|
306
330
|
|
|
307
331
|
|
|
308
|
-
|
|
|
309
|
-
|
|
310
|
-
| 0
|
|
311
|
-
| 1
|
|
312
|
-
| 2
|
|
332
|
+
| | gene | site | site_seq | protein | gene_name | gene_site | protein_site |
|
|
333
|
+
|----|----|----|----|----|----|----|----|
|
|
334
|
+
| 0 | ENSG00000003056.8 | S267 | DDQLGEESEERDDHL | ENSP00000000412.3 | M6PR | M6PR_S267 | ENSP00000000412_S267 |
|
|
335
|
+
| 1 | ENSG00000003056.8 | S267 | DDQLGEESEERDDHL | ENSP00000440488.2 | M6PR | M6PR_S267 | ENSP00000440488_S267 |
|
|
336
|
+
| 2 | ENSG00000048028.11 | S1053 | PPTIRPNSPYDLCSR | ENSP00000003302.4 | USP28 | USP28_S1053 | ENSP00000003302_S1053 |
|
|
313
337
|
|
|
314
338
|
|
|
315
339
|
|
|
@@ -321,11 +345,12 @@ df.head(3)
|
|
|
321
345
|
```
|
|
322
346
|
|
|
323
347
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
|
328
|
-
|
|
|
348
|
+
|
|
349
|
+
| | uniprot | position | residue | is_disopred | disopred_score | log10_hotspot_pval_min | isHotspot | uniprot_position | functional_score | current_uniprot | name | gene | Sequence | is_valid | site_seq | gene_site |
|
|
350
|
+
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
351
|
+
| 0 | A0A075B6Q4 | 24 | S | True | 0.91 | 6.839384 | True | A0A075B6Q4_24 | 0.149257 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | VDDEKGDSNDDYDSA | A0A075B6Q4_S24 |
|
|
352
|
+
| 1 | A0A075B6Q4 | 35 | S | True | 0.87 | 9.192622 | False | A0A075B6Q4_35 | 0.136966 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | YDSAGLLSDEDCMSV | A0A075B6Q4_S35 |
|
|
353
|
+
| 2 | A0A075B6Q4 | 57 | S | False | 0.28 | 0.818834 | False | A0A075B6Q4_57 | 0.125364 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | IADHLFWSEETKSRF | A0A075B6Q4_S57 |
|
|
329
354
|
|
|
330
355
|
|
|
331
356
|
|
|
@@ -337,11 +362,12 @@ df.head(3)
|
|
|
337
362
|
```
|
|
338
363
|
|
|
339
364
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
|
344
|
-
|
|
|
365
|
+
|
|
366
|
+
| | gene | protein | uniprot | site | gene_site | SITE_GRP_ID | species | site_seq | LT_LIT | MS_LIT | MS_CST | CST_CAT# | Ambiguous_Site |
|
|
367
|
+
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
368
|
+
| 0 | YWHAB | 14-3-3 beta | P31946 | T2 | YWHAB_T2 | 15718712 | human | \_\_\_\_\_\_MtMDksELV | NaN | 3.0 | 1.0 | None | 0 |
|
|
369
|
+
| 1 | YWHAB | 14-3-3 beta | P31946 | S6 | YWHAB_S6 | 15718709 | human | \_\_MtMDksELVQkAk | NaN | 8.0 | NaN | None | 0 |
|
|
370
|
+
| 2 | YWHAB | 14-3-3 beta | P31946 | Y21 | YWHAB_Y21 | 3426383 | human | LAEQAERyDDMAAAM | NaN | NaN | 4.0 | None | 0 |
|
|
345
371
|
|
|
346
372
|
|
|
347
373
|
|
|
@@ -353,12 +379,14 @@ df.head(3)
|
|
|
353
379
|
```
|
|
354
380
|
|
|
355
381
|
|
|
356
|
-
| | site_seq | gene_site | gene | source | num_site | acceptor | -7 | -6 | -5 | -4 | ... | -2 | -1 | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
|
357
|
-
|-----|-----------------|------------|-------|--------|----------|----------|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|
|
|
358
|
-
| 0 | AAAAAAASGGAGSDN | PBX1_S136 | PBX1 | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | G | A | G | S | D | N |
|
|
359
|
-
| 1 | AAAAAAASGGGVSPD | PBX2_S146 | PBX2 | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | G | G | V | S | P | D |
|
|
360
|
-
| 2 | AAAAAAASGVTTGKP | CLASR_S349 | CLASR | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | V | T | T | G | K | P |
|
|
361
382
|
|
|
383
|
+
| | site_seq | gene_site | gene | source | num_site | acceptor | -7 | -6 | -5 | -4 | ... | -2 | -1 | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
|
384
|
+
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
385
|
+
| 0 | AAAAAAASGGAGSDN | PBX1_S136 | PBX1 | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | G | A | G | S | D | N |
|
|
386
|
+
| 1 | AAAAAAASGGGVSPD | PBX2_S146 | PBX2 | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | G | G | V | S | P | D |
|
|
387
|
+
| 2 | AAAAAAASGVTTGKP | CLASR_S349 | CLASR | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | V | T | T | G | K | P |
|
|
388
|
+
|
|
389
|
+
<p>3 rows × 21 columns</p>
|
|
362
390
|
|
|
363
391
|
|
|
364
392
|
## Phosphorylation site sequence example
|
|
@@ -3,11 +3,10 @@
|
|
|
3
3
|
|
|
4
4
|
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
5
5
|
|
|
6
|
-
<
|
|
7
|
-
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
|
8
|
-
</a>
|
|
6
|
+
<img alt="Katlas logo" width="600" caption="Katlas logo" src="https://github.com/sky1ove/katlas/raw/main/dataset/images/logo.png" id="logo"/>
|
|
9
7
|
|
|
10
|
-
<
|
|
8
|
+
<p><a target="_blank" href="https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/index.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
|
|
9
|
+
<a href="https://pypi.org/project/python-katlas/"><img src="https://img.shields.io/pypi/v/python-katlas?link=https%3A%2F%2Fpypi.org%2Fproject%2Fpython-katlas%2F" alt="PyPI"></a></p>
|
|
11
10
|
|
|
12
11
|
KATLAS is a repository containing python tools to predict kinases given
|
|
13
12
|
a substrate sequence. It also contains datasets of kinase substrate
|
|
@@ -16,9 +15,8 @@ specificities and human phosphoproteomics.
|
|
|
16
15
|
***References***: Please cite the appropriate papers if KATLAS is
|
|
17
16
|
helpful to your research.
|
|
18
17
|
|
|
19
|
-
- KATLAS was described in the paper \[Decoding Human
|
|
20
|
-
|
|
21
|
-
(manuscript)\]
|
|
18
|
+
- KATLAS was described in the paper \[Computational Decoding of Human
|
|
19
|
+
Kinome Substrate Specificities and Functions\]
|
|
22
20
|
|
|
23
21
|
- The positional scanning peptide array (PSPA) data is from paper [An
|
|
24
22
|
atlas of substrate specificities for the human serine/threonine
|
|
@@ -38,13 +36,21 @@ helpful to your research.
|
|
|
38
36
|
phosphoproteome](https://www.nature.com/articles/s41587-019-0344-3),
|
|
39
37
|
and [CPTAC](https://pdc.cancer.gov/pdc/cptac-pancancer) /
|
|
40
38
|
[LinkedOmics](https://academic.oup.com/nar/article/46/D1/D956/4607804)
|
|
41
|
-
|
|
42
|
-
|
|
39
|
+
|
|
40
|
+
## Reproduce datasets & figures
|
|
41
|
+
|
|
42
|
+
Follow the instructions in katlas_raw:
|
|
43
|
+
https://github.com/sky1ove/katlas_raw
|
|
44
|
+
|
|
45
|
+
Need to install the package via: `pip install 'python-katlas[dev]' -U`
|
|
46
|
+
|
|
43
47
|
## Web applications
|
|
44
48
|
|
|
45
|
-
Users can now run the analysis directly on the web without needing to
|
|
49
|
+
Users can now run the analysis directly on the web without needing to
|
|
50
|
+
code.
|
|
46
51
|
|
|
47
|
-
Check out our latest web:
|
|
52
|
+
Check out our latest web platform:
|
|
53
|
+
[kinase-atlas.com](https://kinase-atlas.com/)
|
|
48
54
|
|
|
49
55
|
## Tutorials on Colab
|
|
50
56
|
|
|
@@ -55,14 +61,12 @@ Check out our latest web: [kinase-atlas.com](https://kinase-atlas.com/)
|
|
|
55
61
|
- 3. [Kinase enrichment analysis for AKT
|
|
56
62
|
inhibitor](https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/tutorial_03a_enrichment_AKTi.ipynb)
|
|
57
63
|
|
|
58
|
-
|
|
59
64
|
## Install
|
|
60
65
|
|
|
61
|
-
|
|
66
|
+
pip install python-katlas -U
|
|
62
67
|
|
|
63
|
-
|
|
64
|
-
pip install python-katlas -
|
|
65
|
-
```
|
|
68
|
+
To use other modules besides the core, do
|
|
69
|
+
`pip install 'python-katlas[dev]' -U`
|
|
66
70
|
|
|
67
71
|
## Import
|
|
68
72
|
|
|
@@ -211,6 +215,7 @@ get_pct('AEEKEyHSEGG',**param_PSPA_y, pct_ref = y_pct)
|
|
|
211
215
|
| TNK2 | -4.577 | 2.050581 |
|
|
212
216
|
| DDR2 | -4.920 | 10.403281 |
|
|
213
217
|
|
|
218
|
+
<p>93 rows × 2 columns</p>
|
|
214
219
|
|
|
215
220
|
|
|
216
221
|
## High-throughput substrate scoring on a dataframe
|
|
@@ -230,6 +235,7 @@ df.iloc[:,-2:]
|
|
|
230
235
|
```
|
|
231
236
|
|
|
232
237
|
|
|
238
|
+
|
|
233
239
|
| | site_seq | gene_site |
|
|
234
240
|
|-----|-----------------|----------------|
|
|
235
241
|
| 0 | VDDEKGDSNDDYDSA | A0A075B6Q4_S24 |
|
|
@@ -259,14 +265,15 @@ results
|
|
|
259
265
|
|
|
260
266
|
|
|
261
267
|
|
|
262
|
-
| kinase | SRC
|
|
263
|
-
|
|
264
|
-
| 0
|
|
265
|
-
| 1
|
|
266
|
-
| 2
|
|
267
|
-
| 3
|
|
268
|
-
| 4
|
|
268
|
+
| kinase | SRC | EPHA3 | FES | NTRK3 | ALK | EPHA8 | ABL1 | FLT3 | EPHB2 | FYN | ... | MEK5 | PKN2 | MAP2K7 | MRCKB | HIPK3 | CDK8 | BUB1 | MEKK3 | MAP2K3 | GRK1 |
|
|
269
|
+
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
270
|
+
| 0 | 0.991760 | 1.093712 | 1.051750 | 1.067134 | 1.013682 | 1.097519 | 0.966379 | 0.982464 | 1.054986 | 1.055910 | ... | 1.314859 | 1.635470 | 1.652251 | 1.622672 | 1.362973 | 1.797155 | 1.305198 | 1.423618 | 1.504941 | 1.872020 |
|
|
271
|
+
| 1 | 0.910262 | 0.953743 | 0.942327 | 0.950601 | 0.872694 | 0.932586 | 0.846899 | 0.826662 | 0.915020 | 0.942713 | ... | 1.175454 | 1.402006 | 1.430392 | 1.215826 | 1.569373 | 1.716455 | 1.270999 | 1.195081 | 1.223082 | 1.793290 |
|
|
272
|
+
| 2 | 0.849866 | 0.899910 | 0.848895 | 0.879652 | 0.874959 | 0.899414 | 0.839200 | 0.836523 | 0.858040 | 0.867269 | ... | 1.408003 | 1.813739 | 1.454786 | 1.084522 | 1.352556 | 1.524663 | 1.377839 | 1.173830 | 1.305691 | 1.811849 |
|
|
273
|
+
| 3 | 0.803826 | 0.836527 | 0.800759 | 0.894570 | 0.839905 | 0.781001 | 0.847847 | 0.807040 | 0.805877 | 0.801402 | ... | 1.110307 | 1.703637 | 1.795092 | 1.469653 | 1.549936 | 1.491344 | 1.446922 | 1.055452 | 1.534895 | 1.741090 |
|
|
274
|
+
| 4 | 0.822793 | 0.796532 | 0.792343 | 0.839882 | 0.810122 | 0.781420 | 0.805251 | 0.795022 | 0.790380 | 0.864538 | ... | 1.062617 | 1.357689 | 1.485945 | 1.249266 | 1.456078 | 1.422782 | 1.376471 | 1.089629 | 1.121309 | 1.697524 |
|
|
269
275
|
|
|
276
|
+
<p>5 rows × 289 columns</p>
|
|
270
277
|
|
|
271
278
|
|
|
272
279
|
## Phosphorylation sites
|
|
@@ -283,11 +290,11 @@ df.head(3)
|
|
|
283
290
|
|
|
284
291
|
|
|
285
292
|
|
|
286
|
-
|
|
|
287
|
-
|
|
288
|
-
| 0
|
|
289
|
-
| 1
|
|
290
|
-
| 2
|
|
293
|
+
| | gene | site | site_seq | protein | gene_name | gene_site | protein_site |
|
|
294
|
+
|----|----|----|----|----|----|----|----|
|
|
295
|
+
| 0 | ENSG00000003056.8 | S267 | DDQLGEESEERDDHL | ENSP00000000412.3 | M6PR | M6PR_S267 | ENSP00000000412_S267 |
|
|
296
|
+
| 1 | ENSG00000003056.8 | S267 | DDQLGEESEERDDHL | ENSP00000440488.2 | M6PR | M6PR_S267 | ENSP00000440488_S267 |
|
|
297
|
+
| 2 | ENSG00000048028.11 | S1053 | PPTIRPNSPYDLCSR | ENSP00000003302.4 | USP28 | USP28_S1053 | ENSP00000003302_S1053 |
|
|
291
298
|
|
|
292
299
|
|
|
293
300
|
|
|
@@ -299,11 +306,12 @@ df.head(3)
|
|
|
299
306
|
```
|
|
300
307
|
|
|
301
308
|
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
|
306
|
-
|
|
|
309
|
+
|
|
310
|
+
| | uniprot | position | residue | is_disopred | disopred_score | log10_hotspot_pval_min | isHotspot | uniprot_position | functional_score | current_uniprot | name | gene | Sequence | is_valid | site_seq | gene_site |
|
|
311
|
+
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
312
|
+
| 0 | A0A075B6Q4 | 24 | S | True | 0.91 | 6.839384 | True | A0A075B6Q4_24 | 0.149257 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | VDDEKGDSNDDYDSA | A0A075B6Q4_S24 |
|
|
313
|
+
| 1 | A0A075B6Q4 | 35 | S | True | 0.87 | 9.192622 | False | A0A075B6Q4_35 | 0.136966 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | YDSAGLLSDEDCMSV | A0A075B6Q4_S35 |
|
|
314
|
+
| 2 | A0A075B6Q4 | 57 | S | False | 0.28 | 0.818834 | False | A0A075B6Q4_57 | 0.125364 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | IADHLFWSEETKSRF | A0A075B6Q4_S57 |
|
|
307
315
|
|
|
308
316
|
|
|
309
317
|
|
|
@@ -315,11 +323,12 @@ df.head(3)
|
|
|
315
323
|
```
|
|
316
324
|
|
|
317
325
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
|
322
|
-
|
|
|
326
|
+
|
|
327
|
+
| | gene | protein | uniprot | site | gene_site | SITE_GRP_ID | species | site_seq | LT_LIT | MS_LIT | MS_CST | CST_CAT# | Ambiguous_Site |
|
|
328
|
+
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
329
|
+
| 0 | YWHAB | 14-3-3 beta | P31946 | T2 | YWHAB_T2 | 15718712 | human | \_\_\_\_\_\_MtMDksELV | NaN | 3.0 | 1.0 | None | 0 |
|
|
330
|
+
| 1 | YWHAB | 14-3-3 beta | P31946 | S6 | YWHAB_S6 | 15718709 | human | \_\_MtMDksELVQkAk | NaN | 8.0 | NaN | None | 0 |
|
|
331
|
+
| 2 | YWHAB | 14-3-3 beta | P31946 | Y21 | YWHAB_Y21 | 3426383 | human | LAEQAERyDDMAAAM | NaN | NaN | 4.0 | None | 0 |
|
|
323
332
|
|
|
324
333
|
|
|
325
334
|
|
|
@@ -331,12 +340,14 @@ df.head(3)
|
|
|
331
340
|
```
|
|
332
341
|
|
|
333
342
|
|
|
334
|
-
| | site_seq | gene_site | gene | source | num_site | acceptor | -7 | -6 | -5 | -4 | ... | -2 | -1 | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
|
335
|
-
|-----|-----------------|------------|-------|--------|----------|----------|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|
|
|
336
|
-
| 0 | AAAAAAASGGAGSDN | PBX1_S136 | PBX1 | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | G | A | G | S | D | N |
|
|
337
|
-
| 1 | AAAAAAASGGGVSPD | PBX2_S146 | PBX2 | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | G | G | V | S | P | D |
|
|
338
|
-
| 2 | AAAAAAASGVTTGKP | CLASR_S349 | CLASR | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | V | T | T | G | K | P |
|
|
339
343
|
|
|
344
|
+
| | site_seq | gene_site | gene | source | num_site | acceptor | -7 | -6 | -5 | -4 | ... | -2 | -1 | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
|
345
|
+
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
346
|
+
| 0 | AAAAAAASGGAGSDN | PBX1_S136 | PBX1 | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | G | A | G | S | D | N |
|
|
347
|
+
| 1 | AAAAAAASGGGVSPD | PBX2_S146 | PBX2 | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | G | G | V | S | P | D |
|
|
348
|
+
| 2 | AAAAAAASGVTTGKP | CLASR_S349 | CLASR | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | V | T | T | G | K | P |
|
|
349
|
+
|
|
350
|
+
<p>3 rows × 21 columns</p>
|
|
340
351
|
|
|
341
352
|
|
|
342
353
|
## Phosphorylation site sequence example
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.4"
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# Autogenerated by nbdev
|
|
2
2
|
|
|
3
3
|
d = { 'settings': { 'branch': 'main',
|
|
4
|
-
'doc_baseurl': '/
|
|
4
|
+
'doc_baseurl': '/katlas',
|
|
5
5
|
'doc_host': 'https://sky1ove.github.io',
|
|
6
|
-
'git_url': 'https://github.com/sky1ove/
|
|
6
|
+
'git_url': 'https://github.com/sky1ove/katlas',
|
|
7
7
|
'lib_path': 'katlas'},
|
|
8
8
|
'syms': { 'katlas.core': { 'katlas.core.CPTAC': ('core.html#cptac', 'katlas/core.py'),
|
|
9
9
|
'katlas.core.CPTAC._fetch_data': ('core.html#cptac._fetch_data', 'katlas/core.py'),
|
|
File without changes
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
__all__ = ['def_device', 'seed_everything', 'GeneralDataset', 'get_sampler', 'MLP_1', 'CNN1D_1', 'init_weights', 'lin_wn',
|
|
7
7
|
'conv_wn', 'CNN1D_2', 'train_dl', 'train_dl_cv', 'predict_dl']
|
|
8
8
|
|
|
9
|
-
# %% ../nbs/04_DL.ipynb
|
|
9
|
+
# %% ../nbs/04_DL.ipynb 4
|
|
10
10
|
from fastbook import *
|
|
11
11
|
import fastcore.all as fc,torch.nn.init as init
|
|
12
12
|
from fastai.callback.training import GradientClip
|
|
@@ -22,7 +22,7 @@ from sklearn.model_selection import *
|
|
|
22
22
|
from sklearn.metrics import mean_squared_error
|
|
23
23
|
from scipy.stats import spearmanr,pearsonr
|
|
24
24
|
|
|
25
|
-
# %% ../nbs/04_DL.ipynb
|
|
25
|
+
# %% ../nbs/04_DL.ipynb 6
|
|
26
26
|
def seed_everything(seed=123):
|
|
27
27
|
random.seed(seed)
|
|
28
28
|
os.environ['PYTHONHASHSEED'] = str(seed)
|
|
@@ -32,10 +32,10 @@ def seed_everything(seed=123):
|
|
|
32
32
|
torch.backends.cudnn.deterministic = True
|
|
33
33
|
torch.backends.cudnn.benchmark = False
|
|
34
34
|
|
|
35
|
-
# %% ../nbs/04_DL.ipynb
|
|
35
|
+
# %% ../nbs/04_DL.ipynb 8
|
|
36
36
|
def_device = 'mps' if torch.backends.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
37
37
|
|
|
38
|
-
# %% ../nbs/04_DL.ipynb
|
|
38
|
+
# %% ../nbs/04_DL.ipynb 13
|
|
39
39
|
class GeneralDataset:
|
|
40
40
|
def __init__(self,
|
|
41
41
|
df, # a dataframe of values
|
|
@@ -62,7 +62,7 @@ class GeneralDataset:
|
|
|
62
62
|
y = torch.Tensor(self.y[index])
|
|
63
63
|
return X, y
|
|
64
64
|
|
|
65
|
-
# %% ../nbs/04_DL.ipynb
|
|
65
|
+
# %% ../nbs/04_DL.ipynb 17
|
|
66
66
|
def get_sampler(info,col):
|
|
67
67
|
|
|
68
68
|
"For imbalanced data, get higher weights for less-represented samples"
|
|
@@ -82,7 +82,7 @@ def get_sampler(info,col):
|
|
|
82
82
|
|
|
83
83
|
return sampler
|
|
84
84
|
|
|
85
|
-
# %% ../nbs/04_DL.ipynb
|
|
85
|
+
# %% ../nbs/04_DL.ipynb 23
|
|
86
86
|
def MLP_1(num_features,
|
|
87
87
|
num_targets,
|
|
88
88
|
hidden_units = [512, 218],
|
|
@@ -112,7 +112,7 @@ def MLP_1(num_features,
|
|
|
112
112
|
|
|
113
113
|
return model
|
|
114
114
|
|
|
115
|
-
# %% ../nbs/04_DL.ipynb
|
|
115
|
+
# %% ../nbs/04_DL.ipynb 29
|
|
116
116
|
class CNN1D_1(Module):
|
|
117
117
|
|
|
118
118
|
def __init__(self,
|
|
@@ -137,12 +137,12 @@ class CNN1D_1(Module):
|
|
|
137
137
|
x = self.fc2(x)
|
|
138
138
|
return x
|
|
139
139
|
|
|
140
|
-
# %% ../nbs/04_DL.ipynb
|
|
140
|
+
# %% ../nbs/04_DL.ipynb 33
|
|
141
141
|
def init_weights(m, leaky=0.):
|
|
142
142
|
"Initiate any Conv layer with Kaiming norm."
|
|
143
143
|
if isinstance(m, (nn.Conv1d,nn.Conv2d,nn.Conv3d)): init.kaiming_normal_(m.weight, a=leaky)
|
|
144
144
|
|
|
145
|
-
# %% ../nbs/04_DL.ipynb
|
|
145
|
+
# %% ../nbs/04_DL.ipynb 34
|
|
146
146
|
def lin_wn(ni,nf,dp=0.1,act=nn.SiLU):
|
|
147
147
|
"Weight norm of linear."
|
|
148
148
|
layers = nn.Sequential(
|
|
@@ -152,7 +152,7 @@ def lin_wn(ni,nf,dp=0.1,act=nn.SiLU):
|
|
|
152
152
|
if act: layers.append(act())
|
|
153
153
|
return layers
|
|
154
154
|
|
|
155
|
-
# %% ../nbs/04_DL.ipynb
|
|
155
|
+
# %% ../nbs/04_DL.ipynb 35
|
|
156
156
|
def conv_wn(ni, nf, ks=3, stride=1, padding=1, dp=0.1,act=nn.ReLU):
|
|
157
157
|
"Weight norm of conv."
|
|
158
158
|
layers = nn.Sequential(
|
|
@@ -162,7 +162,7 @@ def conv_wn(ni, nf, ks=3, stride=1, padding=1, dp=0.1,act=nn.ReLU):
|
|
|
162
162
|
if act: layers.append(act())
|
|
163
163
|
return layers
|
|
164
164
|
|
|
165
|
-
# %% ../nbs/04_DL.ipynb
|
|
165
|
+
# %% ../nbs/04_DL.ipynb 36
|
|
166
166
|
class CNN1D_2(nn.Module):
|
|
167
167
|
|
|
168
168
|
def __init__(self, ni, nf, amp_scale = 16):
|
|
@@ -212,7 +212,7 @@ class CNN1D_2(nn.Module):
|
|
|
212
212
|
|
|
213
213
|
return x
|
|
214
214
|
|
|
215
|
-
# %% ../nbs/04_DL.ipynb
|
|
215
|
+
# %% ../nbs/04_DL.ipynb 40
|
|
216
216
|
def train_dl(df,
|
|
217
217
|
feat_col,
|
|
218
218
|
target_col,
|
|
@@ -275,7 +275,7 @@ def train_dl(df,
|
|
|
275
275
|
|
|
276
276
|
return target, pred
|
|
277
277
|
|
|
278
|
-
# %% ../nbs/04_DL.ipynb
|
|
278
|
+
# %% ../nbs/04_DL.ipynb 45
|
|
279
279
|
@fc.delegates(train_dl)
|
|
280
280
|
def train_dl_cv(df,
|
|
281
281
|
feat_col,
|
|
@@ -325,7 +325,7 @@ def train_dl_cv(df,
|
|
|
325
325
|
|
|
326
326
|
return oof, metrics
|
|
327
327
|
|
|
328
|
-
# %% ../nbs/04_DL.ipynb
|
|
328
|
+
# %% ../nbs/04_DL.ipynb 53
|
|
329
329
|
def predict_dl(df,
|
|
330
330
|
feat_col,
|
|
331
331
|
target_col,
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# %% auto 0
|
|
6
6
|
__all__ = ['get_rdkit', 'get_morgan', 'get_esm', 'get_t5', 'get_t5_bfd', 'reduce_feature', 'remove_hi_corr', 'preprocess']
|
|
7
7
|
|
|
8
|
-
# %% ../nbs/01_feature.ipynb
|
|
8
|
+
# %% ../nbs/01_feature.ipynb 4
|
|
9
9
|
from fastbook import *
|
|
10
10
|
import torch,re,joblib,gc,esm
|
|
11
11
|
from tqdm.notebook import tqdm; tqdm.pandas()
|
|
@@ -14,7 +14,7 @@ from .core import Data
|
|
|
14
14
|
# Rdkit
|
|
15
15
|
from rdkit import Chem
|
|
16
16
|
from rdkit.ML.Descriptors import MoleculeDescriptors
|
|
17
|
-
from rdkit.Chem import Draw,Descriptors,AllChem
|
|
17
|
+
from rdkit.Chem import Draw,Descriptors,AllChem,rdFingerprintGenerator
|
|
18
18
|
|
|
19
19
|
# Models
|
|
20
20
|
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP
|
|
@@ -30,7 +30,7 @@ from umap.umap_ import UMAP
|
|
|
30
30
|
|
|
31
31
|
set_config(transform_output="pandas")
|
|
32
32
|
|
|
33
|
-
# %% ../nbs/01_feature.ipynb
|
|
33
|
+
# %% ../nbs/01_feature.ipynb 7
|
|
34
34
|
def get_rdkit(df: pd.DataFrame, # a dataframe that contains smiles
|
|
35
35
|
col:str = "SMILES", # colname of smile
|
|
36
36
|
normalize: bool = True, # normalize features using StandardScaler()
|
|
@@ -49,14 +49,17 @@ def get_rdkit(df: pd.DataFrame, # a dataframe that contains smiles
|
|
|
49
49
|
# feature_df = feature_df.reset_index()
|
|
50
50
|
return feature_df
|
|
51
51
|
|
|
52
|
-
# %% ../nbs/01_feature.ipynb
|
|
52
|
+
# %% ../nbs/01_feature.ipynb 11
|
|
53
53
|
def get_morgan(df: pd.DataFrame, # a dataframe that contains smiles
|
|
54
54
|
col: str = "SMILES", # colname of smile
|
|
55
55
|
radius=3
|
|
56
56
|
):
|
|
57
57
|
"Get 2048 morgan fingerprint (binary feature) from smiles in a dataframe"
|
|
58
58
|
mols = [Chem.MolFromSmiles(smi) for smi in df[col]]
|
|
59
|
-
|
|
59
|
+
|
|
60
|
+
mfpgen = rdFingerprintGenerator.GetMorganGenerator(radius=radius,fpSize=2048)
|
|
61
|
+
morgan_fps = [mfpgen.GetFingerprint(mol) for mol in mols]
|
|
62
|
+
|
|
60
63
|
fp_df = pd.DataFrame(np.array(morgan_fps), index=df.index)
|
|
61
64
|
fp_df.columns = "morgan_" + fp_df.columns.astype(str)
|
|
62
65
|
return fp_df
|
|
File without changes
|
|
@@ -7,7 +7,7 @@ __all__ = ['set_sns', 'get_color_dict', 'logo_func', 'get_logo', 'get_logo2', 'p
|
|
|
7
7
|
'plot_cluster', 'plot_bokeh', 'plot_count', 'plot_bar', 'plot_group_bar', 'plot_box', 'plot_corr',
|
|
8
8
|
'draw_corr', 'get_AUCDF', 'plot_confusion_matrix']
|
|
9
9
|
|
|
10
|
-
# %% ../nbs/02_plot.ipynb
|
|
10
|
+
# %% ../nbs/02_plot.ipynb 4
|
|
11
11
|
import joblib,logomaker
|
|
12
12
|
import fastcore.all as fc, pandas as pd, numpy as np, seaborn as sns
|
|
13
13
|
from adjustText import adjust_text
|
|
@@ -32,14 +32,14 @@ from bokeh.layouts import column
|
|
|
32
32
|
from bokeh.palettes import Category20_20
|
|
33
33
|
from itertools import cycle
|
|
34
34
|
|
|
35
|
-
# %% ../nbs/02_plot.ipynb
|
|
35
|
+
# %% ../nbs/02_plot.ipynb 6
|
|
36
36
|
def set_sns():
|
|
37
37
|
"Set seaborn resolution for notebook display"
|
|
38
38
|
sns.set(rc={"figure.dpi":300, 'savefig.dpi':300})
|
|
39
39
|
sns.set_context('notebook')
|
|
40
40
|
sns.set_style("ticks")
|
|
41
41
|
|
|
42
|
-
# %% ../nbs/02_plot.ipynb
|
|
42
|
+
# %% ../nbs/02_plot.ipynb 7
|
|
43
43
|
def get_color_dict(categories, # list of names to assign color
|
|
44
44
|
palette: str='tab20', # choose from sns.color_palette
|
|
45
45
|
):
|
|
@@ -49,7 +49,7 @@ def get_color_dict(categories, # list of names to assign color
|
|
|
49
49
|
color_map = {category: next(color_cycle) for category in categories}
|
|
50
50
|
return color_map
|
|
51
51
|
|
|
52
|
-
# %% ../nbs/02_plot.ipynb
|
|
52
|
+
# %% ../nbs/02_plot.ipynb 11
|
|
53
53
|
def logo_func(df:pd.DataFrame, # a dataframe that contains ratios for each amino acid at each position
|
|
54
54
|
title: str='logo', # title of the motif logo
|
|
55
55
|
):
|
|
@@ -81,7 +81,7 @@ def logo_func(df:pd.DataFrame, # a dataframe that contains ratios for each amino
|
|
|
81
81
|
logo.ax.set_yticks([])
|
|
82
82
|
logo.ax.set_title(title)
|
|
83
83
|
|
|
84
|
-
# %% ../nbs/02_plot.ipynb
|
|
84
|
+
# %% ../nbs/02_plot.ipynb 12
|
|
85
85
|
def get_logo(df: pd.DataFrame, # stacked Dataframe with kinase as index, substrates as columns
|
|
86
86
|
kinase: str, # a specific kinase name in index
|
|
87
87
|
):
|
|
@@ -120,7 +120,7 @@ def get_logo(df: pd.DataFrame, # stacked Dataframe with kinase as index, substra
|
|
|
120
120
|
# plot logo
|
|
121
121
|
logo_func(ratio2, kinase)
|
|
122
122
|
|
|
123
|
-
# %% ../nbs/02_plot.ipynb
|
|
123
|
+
# %% ../nbs/02_plot.ipynb 16
|
|
124
124
|
def get_logo2(full: pd.DataFrame, # a dataframe that contains the full matrix of a kinase, with index as amino acid, and columns as positions
|
|
125
125
|
title: str = 'logo', # title of the graph
|
|
126
126
|
):
|
|
@@ -159,7 +159,7 @@ def get_logo2(full: pd.DataFrame, # a dataframe that contains the full matrix of
|
|
|
159
159
|
|
|
160
160
|
logo_func(ratio2,title)
|
|
161
161
|
|
|
162
|
-
# %% ../nbs/02_plot.ipynb
|
|
162
|
+
# %% ../nbs/02_plot.ipynb 19
|
|
163
163
|
@fc.delegates(sns.scatterplot)
|
|
164
164
|
def plot_rank(sorted_df: pd.DataFrame, # a sorted dataframe
|
|
165
165
|
x: str, # column name for x axis
|
|
@@ -203,7 +203,7 @@ def plot_rank(sorted_df: pd.DataFrame, # a sorted dataframe
|
|
|
203
203
|
|
|
204
204
|
plt.tight_layout()
|
|
205
205
|
|
|
206
|
-
# %% ../nbs/02_plot.ipynb
|
|
206
|
+
# %% ../nbs/02_plot.ipynb 23
|
|
207
207
|
@fc.delegates(sns.histplot)
|
|
208
208
|
def plot_hist(df: pd.DataFrame, # a dataframe that contain values for plot
|
|
209
209
|
x: str, # column name of values
|
|
@@ -220,7 +220,7 @@ def plot_hist(df: pd.DataFrame, # a dataframe that contain values for plot
|
|
|
220
220
|
plt.figure(figsize=figsize)
|
|
221
221
|
sns.histplot(data=df,x=x,**hist_params,**kwargs)
|
|
222
222
|
|
|
223
|
-
# %% ../nbs/02_plot.ipynb
|
|
223
|
+
# %% ../nbs/02_plot.ipynb 27
|
|
224
224
|
@fc.delegates(sns.heatmap)
|
|
225
225
|
def plot_heatmap(matrix, # a matrix of values
|
|
226
226
|
title: str='heatmap', # title of the heatmap
|
|
@@ -235,7 +235,7 @@ def plot_heatmap(matrix, # a matrix of values
|
|
|
235
235
|
sns.heatmap(matrix, cmap=cmap, annot=False,**kwargs)
|
|
236
236
|
plt.title(title)
|
|
237
237
|
|
|
238
|
-
# %% ../nbs/02_plot.ipynb
|
|
238
|
+
# %% ../nbs/02_plot.ipynb 31
|
|
239
239
|
@fc.delegates(sns.scatterplot)
|
|
240
240
|
def plot_2d(X: pd.DataFrame, # a dataframe that has first column to be x, and second column to be y
|
|
241
241
|
**kwargs, # arguments for sns.scatterplot
|
|
@@ -244,7 +244,7 @@ def plot_2d(X: pd.DataFrame, # a dataframe that has first column to be x, and se
|
|
|
244
244
|
plt.figure(figsize=(7,7))
|
|
245
245
|
sns.scatterplot(data = X,x=X.columns[0],y=X.columns[1],alpha=0.7,**kwargs)
|
|
246
246
|
|
|
247
|
-
# %% ../nbs/02_plot.ipynb
|
|
247
|
+
# %% ../nbs/02_plot.ipynb 33
|
|
248
248
|
def plot_cluster(df: pd.DataFrame, # a dataframe of values that is waited for dimensionality reduction
|
|
249
249
|
method: str='pca', # dimensionality reduction method, choose from pca, umap, and tsne
|
|
250
250
|
hue: str=None, # colname of color
|
|
@@ -266,10 +266,10 @@ def plot_cluster(df: pd.DataFrame, # a dataframe of values that is waited for di
|
|
|
266
266
|
plt.xticks([])
|
|
267
267
|
plt.yticks([])
|
|
268
268
|
if name_list is not None:
|
|
269
|
-
texts = [plt.text(embedding_df[x_col][i], embedding_df[y_col][i], name_list[i],fontsize=8) for i in range(len(embedding_df))]
|
|
269
|
+
texts = [plt.text(embedding_df[x_col].iloc[i], embedding_df[y_col].iloc[i], name_list[i],fontsize=8) for i in range(len(embedding_df))]
|
|
270
270
|
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='black'))
|
|
271
271
|
|
|
272
|
-
# %% ../nbs/02_plot.ipynb
|
|
272
|
+
# %% ../nbs/02_plot.ipynb 37
|
|
273
273
|
def plot_bokeh(X:pd.DataFrame, # a dataframe of two columns from dimensionality reduction
|
|
274
274
|
idx, # pd.Series or list that indicates identities for searching box
|
|
275
275
|
hue:None, # pd.Series or list that indicates category for each sample
|
|
@@ -367,7 +367,7 @@ def plot_bokeh(X:pd.DataFrame, # a dataframe of two columns from dimensionality
|
|
|
367
367
|
layout = column(autocomplete, p)
|
|
368
368
|
show(layout)
|
|
369
369
|
|
|
370
|
-
# %% ../nbs/02_plot.ipynb
|
|
370
|
+
# %% ../nbs/02_plot.ipynb 40
|
|
371
371
|
def plot_count(cnt, # from df['x'].value_counts()
|
|
372
372
|
tick_spacing: float= None, # tick spacing for x axis
|
|
373
373
|
palette: str='tab20'):
|
|
@@ -383,7 +383,7 @@ def plot_count(cnt, # from df['x'].value_counts()
|
|
|
383
383
|
if tick_spacing is not None:
|
|
384
384
|
ax.xaxis.set_major_locator(MultipleLocator(tick_spacing))
|
|
385
385
|
|
|
386
|
-
# %% ../nbs/02_plot.ipynb
|
|
386
|
+
# %% ../nbs/02_plot.ipynb 42
|
|
387
387
|
@fc.delegates(sns.barplot)
|
|
388
388
|
def plot_bar(df,
|
|
389
389
|
value, # colname of value
|
|
@@ -403,7 +403,7 @@ def plot_bar(df,
|
|
|
403
403
|
|
|
404
404
|
idx = df.groupby(group)[value].mean().sort_values(ascending=ascending).index
|
|
405
405
|
|
|
406
|
-
sns.barplot(data=df, x=group, y=value, order=idx, **kwargs)
|
|
406
|
+
sns.barplot(data=df, x=group, y=value, order=idx,hue=group, legend=False, **kwargs)
|
|
407
407
|
|
|
408
408
|
if dots:
|
|
409
409
|
marker = {'marker': 'o',
|
|
@@ -438,7 +438,7 @@ def plot_bar(df,
|
|
|
438
438
|
|
|
439
439
|
plt.gca().spines[['right', 'top']].set_visible(False)
|
|
440
440
|
|
|
441
|
-
# %% ../nbs/02_plot.ipynb
|
|
441
|
+
# %% ../nbs/02_plot.ipynb 45
|
|
442
442
|
@fc.delegates(sns.barplot)
|
|
443
443
|
def plot_group_bar(df,
|
|
444
444
|
value_cols, # list of column names for values, the order depends on the first item
|
|
@@ -459,8 +459,13 @@ def plot_group_bar(df,
|
|
|
459
459
|
plt.figure(figsize=figsize)
|
|
460
460
|
|
|
461
461
|
# Create the bar plot
|
|
462
|
-
sns.barplot(data=df_melted,
|
|
463
|
-
|
|
462
|
+
sns.barplot(data=df_melted,
|
|
463
|
+
x=group,
|
|
464
|
+
y='Value',
|
|
465
|
+
hue='Ranking',
|
|
466
|
+
order=order,
|
|
467
|
+
capsize=0.1,
|
|
468
|
+
err_kws={'linewidth': 1.5,'color': 'gray'},
|
|
464
469
|
**kwargs)
|
|
465
470
|
|
|
466
471
|
# Increase font size for the x-axis and y-axis tick labels
|
|
@@ -481,7 +486,7 @@ def plot_group_bar(df,
|
|
|
481
486
|
plt.gca().spines[['right', 'top']].set_visible(False)
|
|
482
487
|
plt.legend(fontsize=fontsize) # if change legend location, use loc='upper right'
|
|
483
488
|
|
|
484
|
-
# %% ../nbs/02_plot.ipynb
|
|
489
|
+
# %% ../nbs/02_plot.ipynb 48
|
|
485
490
|
@fc.delegates(sns.boxplot)
|
|
486
491
|
def plot_box(df,
|
|
487
492
|
value, # colname of value
|
|
@@ -501,7 +506,7 @@ def plot_box(df,
|
|
|
501
506
|
idx = df[[group,value]].groupby(group).median().sort_values(value,ascending=False).index
|
|
502
507
|
|
|
503
508
|
|
|
504
|
-
sns.boxplot(data=df, x=group, y=value, order=idx, **kwargs)
|
|
509
|
+
sns.boxplot(data=df, x=group, y=value, order=idx,hue=group, legend=False, **kwargs)
|
|
505
510
|
|
|
506
511
|
if dots:
|
|
507
512
|
sns.stripplot(x=group, y=value, data=df, order=idx, jitter=True, color='black', size=3)
|
|
@@ -523,7 +528,7 @@ def plot_box(df,
|
|
|
523
528
|
# plt.gca().spines[['right', 'top']].set_visible(False)
|
|
524
529
|
|
|
525
530
|
|
|
526
|
-
# %% ../nbs/02_plot.ipynb
|
|
531
|
+
# %% ../nbs/02_plot.ipynb 51
|
|
527
532
|
@fc.delegates(sns.regplot)
|
|
528
533
|
def plot_corr(x, # x axis values, or colname of x axis
|
|
529
534
|
y, # y axis values, or colname of y axis
|
|
@@ -560,7 +565,7 @@ def plot_corr(x, # x axis values, or colname of x axis
|
|
|
560
565
|
transform=plt.gca().transAxes,
|
|
561
566
|
ha='center', va='center')
|
|
562
567
|
|
|
563
|
-
# %% ../nbs/02_plot.ipynb
|
|
568
|
+
# %% ../nbs/02_plot.ipynb 55
|
|
564
569
|
def draw_corr(corr):
|
|
565
570
|
|
|
566
571
|
"plot heatmap from df.corr()"
|
|
@@ -572,7 +577,7 @@ def draw_corr(corr):
|
|
|
572
577
|
plt.figure(figsize=(20, 16)) # Set the figure size
|
|
573
578
|
sns.heatmap(corr, annot=True, cmap='coolwarm', vmin=-1, vmax=1, mask=mask, fmt='.2f')
|
|
574
579
|
|
|
575
|
-
# %% ../nbs/02_plot.ipynb
|
|
580
|
+
# %% ../nbs/02_plot.ipynb 59
|
|
576
581
|
def get_AUCDF(df,col, reverse=False,plot=True,xlabel='Rank of reported kinase'):
|
|
577
582
|
|
|
578
583
|
"Plot CDF curve and get relative area under the curve"
|
|
@@ -637,7 +642,7 @@ def get_AUCDF(df,col, reverse=False,plot=True,xlabel='Rank of reported kinase'):
|
|
|
637
642
|
|
|
638
643
|
return AUCDF
|
|
639
644
|
|
|
640
|
-
# %% ../nbs/02_plot.ipynb
|
|
645
|
+
# %% ../nbs/02_plot.ipynb 62
|
|
641
646
|
def plot_confusion_matrix(target, # pd.Series
|
|
642
647
|
pred, # pd.Series
|
|
643
648
|
class_names:list=['0','1'],
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# %% auto 0
|
|
6
6
|
__all__ = ['get_splits', 'split_data', 'score_each', 'train_ml', 'train_ml_cv', 'predict_ml']
|
|
7
7
|
|
|
8
|
-
# %% ../nbs/03_ML.ipynb
|
|
8
|
+
# %% ../nbs/03_ML.ipynb 4
|
|
9
9
|
# katlas
|
|
10
10
|
from .core import Data
|
|
11
11
|
from .feature import *
|
|
@@ -29,7 +29,7 @@ from sklearn.ensemble import *
|
|
|
29
29
|
from sklearn import set_config
|
|
30
30
|
set_config(transform_output="pandas")
|
|
31
31
|
|
|
32
|
-
# %% ../nbs/03_ML.ipynb
|
|
32
|
+
# %% ../nbs/03_ML.ipynb 7
|
|
33
33
|
def get_splits(df: pd.DataFrame, # df contains info for split
|
|
34
34
|
stratified: str=None, # colname to make stratified kfold; sampling from different groups
|
|
35
35
|
group: str=None, # colname to make group kfold; test and train are from different groups
|
|
@@ -79,7 +79,7 @@ def get_splits(df: pd.DataFrame, # df contains info for split
|
|
|
79
79
|
|
|
80
80
|
return splits
|
|
81
81
|
|
|
82
|
-
# %% ../nbs/03_ML.ipynb
|
|
82
|
+
# %% ../nbs/03_ML.ipynb 12
|
|
83
83
|
def split_data(df: pd.DataFrame, # dataframe of values
|
|
84
84
|
feat_col: list, # feature columns
|
|
85
85
|
target_col: list, # target columns
|
|
@@ -95,7 +95,7 @@ def split_data(df: pd.DataFrame, # dataframe of values
|
|
|
95
95
|
|
|
96
96
|
return X_train, y_train, X_test, y_test
|
|
97
97
|
|
|
98
|
-
# %% ../nbs/03_ML.ipynb
|
|
98
|
+
# %% ../nbs/03_ML.ipynb 16
|
|
99
99
|
def score_each(target: pd.DataFrame, # target dataframe
|
|
100
100
|
pred: pd.DataFrame, # predicted dataframe
|
|
101
101
|
absolute = True, # if absolute, take average with absolute values for pearson/spearman
|
|
@@ -134,7 +134,7 @@ def score_each(target: pd.DataFrame, # target dataframe
|
|
|
134
134
|
|
|
135
135
|
return mse,pearson_mean, metrics_df
|
|
136
136
|
|
|
137
|
-
# %% ../nbs/03_ML.ipynb
|
|
137
|
+
# %% ../nbs/03_ML.ipynb 21
|
|
138
138
|
def train_ml(df, # dataframe of values
|
|
139
139
|
feat_col, # feature columns
|
|
140
140
|
target_col, # target columns
|
|
@@ -169,7 +169,7 @@ def train_ml(df, # dataframe of values
|
|
|
169
169
|
|
|
170
170
|
return y_test, y_pred
|
|
171
171
|
|
|
172
|
-
# %% ../nbs/03_ML.ipynb
|
|
172
|
+
# %% ../nbs/03_ML.ipynb 24
|
|
173
173
|
def train_ml_cv( df, # dataframe of values
|
|
174
174
|
feat_col, # feature columns
|
|
175
175
|
target_col, # target columns
|
|
@@ -213,7 +213,7 @@ def train_ml_cv( df, # dataframe of values
|
|
|
213
213
|
|
|
214
214
|
return oof, metrics
|
|
215
215
|
|
|
216
|
-
# %% ../nbs/03_ML.ipynb
|
|
216
|
+
# %% ../nbs/03_ML.ipynb 31
|
|
217
217
|
def predict_ml(df, # Dataframe that contains features
|
|
218
218
|
feat_col, # feature columns
|
|
219
219
|
target_col=None,
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: python-katlas
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: tools for predicting kinome specificities
|
|
5
|
-
Home-page: https://github.com/sky1ove/
|
|
5
|
+
Home-page: https://github.com/sky1ove/katlas
|
|
6
6
|
Author: lily
|
|
7
7
|
Author-email: lcai888666@gmail.com
|
|
8
8
|
License: Apache Software License 2.0
|
|
@@ -17,19 +17,35 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
17
17
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
18
18
|
Requires-Python: >=3.7
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
|
-
Provides-Extra: dev
|
|
21
20
|
License-File: LICENSE
|
|
21
|
+
Requires-Dist: statsmodels
|
|
22
|
+
Requires-Dist: fastparquet
|
|
23
|
+
Requires-Dist: tqdm
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: nbdev; extra == "dev"
|
|
26
|
+
Requires-Dist: pyngrok; extra == "dev"
|
|
27
|
+
Requires-Dist: fastai>=2.7.12; extra == "dev"
|
|
28
|
+
Requires-Dist: fastbook; extra == "dev"
|
|
29
|
+
Requires-Dist: fairscale; extra == "dev"
|
|
30
|
+
Requires-Dist: fair-esm; extra == "dev"
|
|
31
|
+
Requires-Dist: logomaker; extra == "dev"
|
|
32
|
+
Requires-Dist: seaborn; extra == "dev"
|
|
33
|
+
Requires-Dist: rdkit; extra == "dev"
|
|
34
|
+
Requires-Dist: umap-learn; extra == "dev"
|
|
35
|
+
Requires-Dist: adjustText; extra == "dev"
|
|
36
|
+
Requires-Dist: bokeh; extra == "dev"
|
|
37
|
+
Requires-Dist: scikit-learn>=1.3.0; extra == "dev"
|
|
38
|
+
Requires-Dist: openpyxl; extra == "dev"
|
|
22
39
|
|
|
23
40
|
# KATLAS
|
|
24
41
|
|
|
25
42
|
|
|
26
43
|
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
27
44
|
|
|
28
|
-
<
|
|
29
|
-
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
|
30
|
-
</a>
|
|
45
|
+
<img alt="Katlas logo" width="600" caption="Katlas logo" src="https://github.com/sky1ove/katlas/raw/main/dataset/images/logo.png" id="logo"/>
|
|
31
46
|
|
|
32
|
-
<
|
|
47
|
+
<p><a target="_blank" href="https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/index.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
|
|
48
|
+
<a href="https://pypi.org/project/python-katlas/"><img src="https://img.shields.io/pypi/v/python-katlas?link=https%3A%2F%2Fpypi.org%2Fproject%2Fpython-katlas%2F" alt="PyPI"></a></p>
|
|
33
49
|
|
|
34
50
|
KATLAS is a repository containing python tools to predict kinases given
|
|
35
51
|
a substrate sequence. It also contains datasets of kinase substrate
|
|
@@ -38,9 +54,8 @@ specificities and human phosphoproteomics.
|
|
|
38
54
|
***References***: Please cite the appropriate papers if KATLAS is
|
|
39
55
|
helpful to your research.
|
|
40
56
|
|
|
41
|
-
- KATLAS was described in the paper \[Decoding Human
|
|
42
|
-
|
|
43
|
-
(manuscript)\]
|
|
57
|
+
- KATLAS was described in the paper \[Computational Decoding of Human
|
|
58
|
+
Kinome Substrate Specificities and Functions\]
|
|
44
59
|
|
|
45
60
|
- The positional scanning peptide array (PSPA) data is from paper [An
|
|
46
61
|
atlas of substrate specificities for the human serine/threonine
|
|
@@ -60,13 +75,21 @@ helpful to your research.
|
|
|
60
75
|
phosphoproteome](https://www.nature.com/articles/s41587-019-0344-3),
|
|
61
76
|
and [CPTAC](https://pdc.cancer.gov/pdc/cptac-pancancer) /
|
|
62
77
|
[LinkedOmics](https://academic.oup.com/nar/article/46/D1/D956/4607804)
|
|
63
|
-
|
|
64
|
-
|
|
78
|
+
|
|
79
|
+
## Reproduce datasets & figures
|
|
80
|
+
|
|
81
|
+
Follow the instructions in katlas_raw:
|
|
82
|
+
https://github.com/sky1ove/katlas_raw
|
|
83
|
+
|
|
84
|
+
Need to install the package via: `pip install 'python-katlas[dev]' -U`
|
|
85
|
+
|
|
65
86
|
## Web applications
|
|
66
87
|
|
|
67
|
-
Users can now run the analysis directly on the web without needing to
|
|
88
|
+
Users can now run the analysis directly on the web without needing to
|
|
89
|
+
code.
|
|
68
90
|
|
|
69
|
-
Check out our latest web:
|
|
91
|
+
Check out our latest web platform:
|
|
92
|
+
[kinase-atlas.com](https://kinase-atlas.com/)
|
|
70
93
|
|
|
71
94
|
## Tutorials on Colab
|
|
72
95
|
|
|
@@ -77,14 +100,12 @@ Check out our latest web: [kinase-atlas.com](https://kinase-atlas.com/)
|
|
|
77
100
|
- 3. [Kinase enrichment analysis for AKT
|
|
78
101
|
inhibitor](https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/tutorial_03a_enrichment_AKTi.ipynb)
|
|
79
102
|
|
|
80
|
-
|
|
81
103
|
## Install
|
|
82
104
|
|
|
83
|
-
|
|
105
|
+
pip install python-katlas -U
|
|
84
106
|
|
|
85
|
-
|
|
86
|
-
pip install python-katlas -
|
|
87
|
-
```
|
|
107
|
+
To use other modules besides the core, do
|
|
108
|
+
`pip install 'python-katlas[dev]' -U`
|
|
88
109
|
|
|
89
110
|
## Import
|
|
90
111
|
|
|
@@ -233,6 +254,7 @@ get_pct('AEEKEyHSEGG',**param_PSPA_y, pct_ref = y_pct)
|
|
|
233
254
|
| TNK2 | -4.577 | 2.050581 |
|
|
234
255
|
| DDR2 | -4.920 | 10.403281 |
|
|
235
256
|
|
|
257
|
+
<p>93 rows × 2 columns</p>
|
|
236
258
|
|
|
237
259
|
|
|
238
260
|
## High-throughput substrate scoring on a dataframe
|
|
@@ -252,6 +274,7 @@ df.iloc[:,-2:]
|
|
|
252
274
|
```
|
|
253
275
|
|
|
254
276
|
|
|
277
|
+
|
|
255
278
|
| | site_seq | gene_site |
|
|
256
279
|
|-----|-----------------|----------------|
|
|
257
280
|
| 0 | VDDEKGDSNDDYDSA | A0A075B6Q4_S24 |
|
|
@@ -281,14 +304,15 @@ results
|
|
|
281
304
|
|
|
282
305
|
|
|
283
306
|
|
|
284
|
-
| kinase | SRC
|
|
285
|
-
|
|
286
|
-
| 0
|
|
287
|
-
| 1
|
|
288
|
-
| 2
|
|
289
|
-
| 3
|
|
290
|
-
| 4
|
|
307
|
+
| kinase | SRC | EPHA3 | FES | NTRK3 | ALK | EPHA8 | ABL1 | FLT3 | EPHB2 | FYN | ... | MEK5 | PKN2 | MAP2K7 | MRCKB | HIPK3 | CDK8 | BUB1 | MEKK3 | MAP2K3 | GRK1 |
|
|
308
|
+
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
309
|
+
| 0 | 0.991760 | 1.093712 | 1.051750 | 1.067134 | 1.013682 | 1.097519 | 0.966379 | 0.982464 | 1.054986 | 1.055910 | ... | 1.314859 | 1.635470 | 1.652251 | 1.622672 | 1.362973 | 1.797155 | 1.305198 | 1.423618 | 1.504941 | 1.872020 |
|
|
310
|
+
| 1 | 0.910262 | 0.953743 | 0.942327 | 0.950601 | 0.872694 | 0.932586 | 0.846899 | 0.826662 | 0.915020 | 0.942713 | ... | 1.175454 | 1.402006 | 1.430392 | 1.215826 | 1.569373 | 1.716455 | 1.270999 | 1.195081 | 1.223082 | 1.793290 |
|
|
311
|
+
| 2 | 0.849866 | 0.899910 | 0.848895 | 0.879652 | 0.874959 | 0.899414 | 0.839200 | 0.836523 | 0.858040 | 0.867269 | ... | 1.408003 | 1.813739 | 1.454786 | 1.084522 | 1.352556 | 1.524663 | 1.377839 | 1.173830 | 1.305691 | 1.811849 |
|
|
312
|
+
| 3 | 0.803826 | 0.836527 | 0.800759 | 0.894570 | 0.839905 | 0.781001 | 0.847847 | 0.807040 | 0.805877 | 0.801402 | ... | 1.110307 | 1.703637 | 1.795092 | 1.469653 | 1.549936 | 1.491344 | 1.446922 | 1.055452 | 1.534895 | 1.741090 |
|
|
313
|
+
| 4 | 0.822793 | 0.796532 | 0.792343 | 0.839882 | 0.810122 | 0.781420 | 0.805251 | 0.795022 | 0.790380 | 0.864538 | ... | 1.062617 | 1.357689 | 1.485945 | 1.249266 | 1.456078 | 1.422782 | 1.376471 | 1.089629 | 1.121309 | 1.697524 |
|
|
291
314
|
|
|
315
|
+
<p>5 rows × 289 columns</p>
|
|
292
316
|
|
|
293
317
|
|
|
294
318
|
## Phosphorylation sites
|
|
@@ -305,11 +329,11 @@ df.head(3)
|
|
|
305
329
|
|
|
306
330
|
|
|
307
331
|
|
|
308
|
-
|
|
|
309
|
-
|
|
310
|
-
| 0
|
|
311
|
-
| 1
|
|
312
|
-
| 2
|
|
332
|
+
| | gene | site | site_seq | protein | gene_name | gene_site | protein_site |
|
|
333
|
+
|----|----|----|----|----|----|----|----|
|
|
334
|
+
| 0 | ENSG00000003056.8 | S267 | DDQLGEESEERDDHL | ENSP00000000412.3 | M6PR | M6PR_S267 | ENSP00000000412_S267 |
|
|
335
|
+
| 1 | ENSG00000003056.8 | S267 | DDQLGEESEERDDHL | ENSP00000440488.2 | M6PR | M6PR_S267 | ENSP00000440488_S267 |
|
|
336
|
+
| 2 | ENSG00000048028.11 | S1053 | PPTIRPNSPYDLCSR | ENSP00000003302.4 | USP28 | USP28_S1053 | ENSP00000003302_S1053 |
|
|
313
337
|
|
|
314
338
|
|
|
315
339
|
|
|
@@ -321,11 +345,12 @@ df.head(3)
|
|
|
321
345
|
```
|
|
322
346
|
|
|
323
347
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
|
328
|
-
|
|
|
348
|
+
|
|
349
|
+
| | uniprot | position | residue | is_disopred | disopred_score | log10_hotspot_pval_min | isHotspot | uniprot_position | functional_score | current_uniprot | name | gene | Sequence | is_valid | site_seq | gene_site |
|
|
350
|
+
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
351
|
+
| 0 | A0A075B6Q4 | 24 | S | True | 0.91 | 6.839384 | True | A0A075B6Q4_24 | 0.149257 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | VDDEKGDSNDDYDSA | A0A075B6Q4_S24 |
|
|
352
|
+
| 1 | A0A075B6Q4 | 35 | S | True | 0.87 | 9.192622 | False | A0A075B6Q4_35 | 0.136966 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | YDSAGLLSDEDCMSV | A0A075B6Q4_S35 |
|
|
353
|
+
| 2 | A0A075B6Q4 | 57 | S | False | 0.28 | 0.818834 | False | A0A075B6Q4_57 | 0.125364 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | IADHLFWSEETKSRF | A0A075B6Q4_S57 |
|
|
329
354
|
|
|
330
355
|
|
|
331
356
|
|
|
@@ -337,11 +362,12 @@ df.head(3)
|
|
|
337
362
|
```
|
|
338
363
|
|
|
339
364
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
|
344
|
-
|
|
|
365
|
+
|
|
366
|
+
| | gene | protein | uniprot | site | gene_site | SITE_GRP_ID | species | site_seq | LT_LIT | MS_LIT | MS_CST | CST_CAT# | Ambiguous_Site |
|
|
367
|
+
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
368
|
+
| 0 | YWHAB | 14-3-3 beta | P31946 | T2 | YWHAB_T2 | 15718712 | human | \_\_\_\_\_\_MtMDksELV | NaN | 3.0 | 1.0 | None | 0 |
|
|
369
|
+
| 1 | YWHAB | 14-3-3 beta | P31946 | S6 | YWHAB_S6 | 15718709 | human | \_\_MtMDksELVQkAk | NaN | 8.0 | NaN | None | 0 |
|
|
370
|
+
| 2 | YWHAB | 14-3-3 beta | P31946 | Y21 | YWHAB_Y21 | 3426383 | human | LAEQAERyDDMAAAM | NaN | NaN | 4.0 | None | 0 |
|
|
345
371
|
|
|
346
372
|
|
|
347
373
|
|
|
@@ -353,12 +379,14 @@ df.head(3)
|
|
|
353
379
|
```
|
|
354
380
|
|
|
355
381
|
|
|
356
|
-
| | site_seq | gene_site | gene | source | num_site | acceptor | -7 | -6 | -5 | -4 | ... | -2 | -1 | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
|
357
|
-
|-----|-----------------|------------|-------|--------|----------|----------|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|
|
|
358
|
-
| 0 | AAAAAAASGGAGSDN | PBX1_S136 | PBX1 | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | G | A | G | S | D | N |
|
|
359
|
-
| 1 | AAAAAAASGGGVSPD | PBX2_S146 | PBX2 | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | G | G | V | S | P | D |
|
|
360
|
-
| 2 | AAAAAAASGVTTGKP | CLASR_S349 | CLASR | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | V | T | T | G | K | P |
|
|
361
382
|
|
|
383
|
+
| | site_seq | gene_site | gene | source | num_site | acceptor | -7 | -6 | -5 | -4 | ... | -2 | -1 | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
|
384
|
+
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
385
|
+
| 0 | AAAAAAASGGAGSDN | PBX1_S136 | PBX1 | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | G | A | G | S | D | N |
|
|
386
|
+
| 1 | AAAAAAASGGGVSPD | PBX2_S146 | PBX2 | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | G | G | V | S | P | D |
|
|
387
|
+
| 2 | AAAAAAASGVTTGKP | CLASR_S349 | CLASR | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | V | T | T | G | K | P |
|
|
388
|
+
|
|
389
|
+
<p>3 rows × 21 columns</p>
|
|
362
390
|
|
|
363
391
|
|
|
364
392
|
## Phosphorylation site sequence example
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -3,9 +3,10 @@
|
|
|
3
3
|
# See https://github.com/fastai/nbdev/blob/master/settings.ini for examples.
|
|
4
4
|
|
|
5
5
|
### Python library ###
|
|
6
|
-
repo =
|
|
7
|
-
lib_name = %(repo)s
|
|
8
|
-
|
|
6
|
+
repo = katlas
|
|
7
|
+
#lib_name = %(repo)s
|
|
8
|
+
lib_name = python-katlas
|
|
9
|
+
version = 0.1.4
|
|
9
10
|
min_python = 3.7
|
|
10
11
|
license = apache2
|
|
11
12
|
black_formatting = False
|
|
File without changes
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.1.1"
|
|
File without changes
|