python-katlas 0.1.3__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_katlas-0.1.3/python_katlas.egg-info → python_katlas-0.1.4}/PKG-INFO +17 -96
- {python_katlas-0.1.3 → python_katlas-0.1.4}/README.md +16 -95
- python_katlas-0.1.4/katlas/__init__.py +1 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/katlas/_modidx.py +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/katlas/plot.py +10 -5
- {python_katlas-0.1.3 → python_katlas-0.1.4/python_katlas.egg-info}/PKG-INFO +17 -96
- {python_katlas-0.1.3 → python_katlas-0.1.4}/python_katlas.egg-info/SOURCES.txt +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/python_katlas.egg-info/dependency_links.txt +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/python_katlas.egg-info/entry_points.txt +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/python_katlas.egg-info/not-zip-safe +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/python_katlas.egg-info/requires.txt +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/python_katlas.egg-info/top_level.txt +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/settings.ini +1 -1
- python_katlas-0.1.3/katlas/__init__.py +0 -1
- {python_katlas-0.1.3 → python_katlas-0.1.4}/LICENSE +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/MANIFEST.in +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/katlas/core.py +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/katlas/dl.py +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/katlas/feature.py +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/katlas/imports.py +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/katlas/train.py +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/setup.cfg +0 -0
- {python_katlas-0.1.3 → python_katlas-0.1.4}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: python-katlas
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: tools for predicting kinome specificities
|
|
5
5
|
Home-page: https://github.com/sky1ove/katlas
|
|
6
6
|
Author: lily
|
|
@@ -44,10 +44,8 @@ Requires-Dist: openpyxl; extra == "dev"
|
|
|
44
44
|
|
|
45
45
|
<img alt="Katlas logo" width="600" caption="Katlas logo" src="https://github.com/sky1ove/katlas/raw/main/dataset/images/logo.png" id="logo"/>
|
|
46
46
|
|
|
47
|
-
<a target="_blank" href="https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/index.ipynb">
|
|
48
|
-
<img src="https://
|
|
49
|
-
</a> <a href="https://pypi.org/project/python-katlas/">
|
|
50
|
-
<img src="https://img.shields.io/pypi/v/python-katlas?link=https%3A%2F%2Fpypi.org%2Fproject%2Fpython-katlas%2F" alt="PyPI"></a>
|
|
47
|
+
<p><a target="_blank" href="https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/index.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
|
|
48
|
+
<a href="https://pypi.org/project/python-katlas/"><img src="https://img.shields.io/pypi/v/python-katlas?link=https%3A%2F%2Fpypi.org%2Fproject%2Fpython-katlas%2F" alt="PyPI"></a></p>
|
|
51
49
|
|
|
52
50
|
KATLAS is a repository containing python tools to predict kinases given
|
|
53
51
|
a substrate sequence. It also contains datasets of kinase substrate
|
|
@@ -240,18 +238,7 @@ get_pct('AEEKEyHSEGG',**param_PSPA_y, pct_ref = y_pct)
|
|
|
240
238
|
|
|
241
239
|
considering string: ['-5A', '-4E', '-3E', '-2K', '-1E', '0Y', '1H', '2S', '3E', '4G', '5G']
|
|
242
240
|
|
|
243
|
-
|
|
244
|
-
<style scoped>
|
|
245
|
-
.dataframe tbody tr th:only-of-type {
|
|
246
|
-
vertical-align: middle;
|
|
247
|
-
}
|
|
248
|
-
.dataframe tbody tr th {
|
|
249
|
-
vertical-align: top;
|
|
250
|
-
}
|
|
251
|
-
.dataframe thead th {
|
|
252
|
-
text-align: right;
|
|
253
|
-
}
|
|
254
|
-
</style>
|
|
241
|
+
|
|
255
242
|
|
|
256
243
|
| | log2(score) | percentile |
|
|
257
244
|
|-------|-------------|------------|
|
|
@@ -268,7 +255,7 @@ get_pct('AEEKEyHSEGG',**param_PSPA_y, pct_ref = y_pct)
|
|
|
268
255
|
| DDR2 | -4.920 | 10.403281 |
|
|
269
256
|
|
|
270
257
|
<p>93 rows × 2 columns</p>
|
|
271
|
-
|
|
258
|
+
|
|
272
259
|
|
|
273
260
|
## High-throughput substrate scoring on a dataframe
|
|
274
261
|
|
|
@@ -286,18 +273,7 @@ df = Data.get_ochoa_site().head()
|
|
|
286
273
|
df.iloc[:,-2:]
|
|
287
274
|
```
|
|
288
275
|
|
|
289
|
-
|
|
290
|
-
<style scoped>
|
|
291
|
-
.dataframe tbody tr th:only-of-type {
|
|
292
|
-
vertical-align: middle;
|
|
293
|
-
}
|
|
294
|
-
.dataframe tbody tr th {
|
|
295
|
-
vertical-align: top;
|
|
296
|
-
}
|
|
297
|
-
.dataframe thead th {
|
|
298
|
-
text-align: right;
|
|
299
|
-
}
|
|
300
|
-
</style>
|
|
276
|
+
|
|
301
277
|
|
|
302
278
|
| | site_seq | gene_site |
|
|
303
279
|
|-----|-----------------|----------------|
|
|
@@ -307,7 +283,7 @@ df.iloc[:,-2:]
|
|
|
307
283
|
| 3 | KSRFTEYSMTSSVMR | A0A075B6Q4_S68 |
|
|
308
284
|
| 4 | FTEYSMTSSVMRRNE | A0A075B6Q4_S71 |
|
|
309
285
|
|
|
310
|
-
|
|
286
|
+
|
|
311
287
|
|
|
312
288
|
### Set the column name and param to calculate
|
|
313
289
|
|
|
@@ -326,18 +302,7 @@ results
|
|
|
326
302
|
|
|
327
303
|
100%|██████████| 289/289 [00:05<00:00, 56.64it/s]
|
|
328
304
|
|
|
329
|
-
|
|
330
|
-
<style scoped>
|
|
331
|
-
.dataframe tbody tr th:only-of-type {
|
|
332
|
-
vertical-align: middle;
|
|
333
|
-
}
|
|
334
|
-
.dataframe tbody tr th {
|
|
335
|
-
vertical-align: top;
|
|
336
|
-
}
|
|
337
|
-
.dataframe thead th {
|
|
338
|
-
text-align: right;
|
|
339
|
-
}
|
|
340
|
-
</style>
|
|
305
|
+
|
|
341
306
|
|
|
342
307
|
| kinase | SRC | EPHA3 | FES | NTRK3 | ALK | EPHA8 | ABL1 | FLT3 | EPHB2 | FYN | ... | MEK5 | PKN2 | MAP2K7 | MRCKB | HIPK3 | CDK8 | BUB1 | MEKK3 | MAP2K3 | GRK1 |
|
|
343
308
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -348,7 +313,7 @@ results
|
|
|
348
313
|
| 4 | 0.822793 | 0.796532 | 0.792343 | 0.839882 | 0.810122 | 0.781420 | 0.805251 | 0.795022 | 0.790380 | 0.864538 | ... | 1.062617 | 1.357689 | 1.485945 | 1.249266 | 1.456078 | 1.422782 | 1.376471 | 1.089629 | 1.121309 | 1.697524 |
|
|
349
314
|
|
|
350
315
|
<p>5 rows × 289 columns</p>
|
|
351
|
-
|
|
316
|
+
|
|
352
317
|
|
|
353
318
|
## Phosphorylation sites
|
|
354
319
|
|
|
@@ -362,18 +327,7 @@ df = Data.get_cptac_ensembl_site()
|
|
|
362
327
|
df.head(3)
|
|
363
328
|
```
|
|
364
329
|
|
|
365
|
-
|
|
366
|
-
<style scoped>
|
|
367
|
-
.dataframe tbody tr th:only-of-type {
|
|
368
|
-
vertical-align: middle;
|
|
369
|
-
}
|
|
370
|
-
.dataframe tbody tr th {
|
|
371
|
-
vertical-align: top;
|
|
372
|
-
}
|
|
373
|
-
.dataframe thead th {
|
|
374
|
-
text-align: right;
|
|
375
|
-
}
|
|
376
|
-
</style>
|
|
330
|
+
|
|
377
331
|
|
|
378
332
|
| | gene | site | site_seq | protein | gene_name | gene_site | protein_site |
|
|
379
333
|
|----|----|----|----|----|----|----|----|
|
|
@@ -381,7 +335,7 @@ df.head(3)
|
|
|
381
335
|
| 1 | ENSG00000003056.8 | S267 | DDQLGEESEERDDHL | ENSP00000440488.2 | M6PR | M6PR_S267 | ENSP00000440488_S267 |
|
|
382
336
|
| 2 | ENSG00000048028.11 | S1053 | PPTIRPNSPYDLCSR | ENSP00000003302.4 | USP28 | USP28_S1053 | ENSP00000003302_S1053 |
|
|
383
337
|
|
|
384
|
-
|
|
338
|
+
|
|
385
339
|
|
|
386
340
|
### [Ochoa et al. human phosphoproteome](https://www.nature.com/articles/s41587-019-0344-3)
|
|
387
341
|
|
|
@@ -390,18 +344,7 @@ df = Data.get_ochoa_site()
|
|
|
390
344
|
df.head(3)
|
|
391
345
|
```
|
|
392
346
|
|
|
393
|
-
|
|
394
|
-
<style scoped>
|
|
395
|
-
.dataframe tbody tr th:only-of-type {
|
|
396
|
-
vertical-align: middle;
|
|
397
|
-
}
|
|
398
|
-
.dataframe tbody tr th {
|
|
399
|
-
vertical-align: top;
|
|
400
|
-
}
|
|
401
|
-
.dataframe thead th {
|
|
402
|
-
text-align: right;
|
|
403
|
-
}
|
|
404
|
-
</style>
|
|
347
|
+
|
|
405
348
|
|
|
406
349
|
| | uniprot | position | residue | is_disopred | disopred_score | log10_hotspot_pval_min | isHotspot | uniprot_position | functional_score | current_uniprot | name | gene | Sequence | is_valid | site_seq | gene_site |
|
|
407
350
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -409,7 +352,7 @@ df.head(3)
|
|
|
409
352
|
| 1 | A0A075B6Q4 | 35 | S | True | 0.87 | 9.192622 | False | A0A075B6Q4_35 | 0.136966 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | YDSAGLLSDEDCMSV | A0A075B6Q4_S35 |
|
|
410
353
|
| 2 | A0A075B6Q4 | 57 | S | False | 0.28 | 0.818834 | False | A0A075B6Q4_57 | 0.125364 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | IADHLFWSEETKSRF | A0A075B6Q4_S57 |
|
|
411
354
|
|
|
412
|
-
|
|
355
|
+
|
|
413
356
|
|
|
414
357
|
### PhosphoSitePlus human phosphorylation site
|
|
415
358
|
|
|
@@ -418,18 +361,7 @@ df = Data.get_psp_human_site()
|
|
|
418
361
|
df.head(3)
|
|
419
362
|
```
|
|
420
363
|
|
|
421
|
-
|
|
422
|
-
<style scoped>
|
|
423
|
-
.dataframe tbody tr th:only-of-type {
|
|
424
|
-
vertical-align: middle;
|
|
425
|
-
}
|
|
426
|
-
.dataframe tbody tr th {
|
|
427
|
-
vertical-align: top;
|
|
428
|
-
}
|
|
429
|
-
.dataframe thead th {
|
|
430
|
-
text-align: right;
|
|
431
|
-
}
|
|
432
|
-
</style>
|
|
364
|
+
|
|
433
365
|
|
|
434
366
|
| | gene | protein | uniprot | site | gene_site | SITE_GRP_ID | species | site_seq | LT_LIT | MS_LIT | MS_CST | CST_CAT# | Ambiguous_Site |
|
|
435
367
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -437,7 +369,7 @@ df.head(3)
|
|
|
437
369
|
| 1 | YWHAB | 14-3-3 beta | P31946 | S6 | YWHAB_S6 | 15718709 | human | \_\_MtMDksELVQkAk | NaN | 8.0 | NaN | None | 0 |
|
|
438
370
|
| 2 | YWHAB | 14-3-3 beta | P31946 | Y21 | YWHAB_Y21 | 3426383 | human | LAEQAERyDDMAAAM | NaN | NaN | 4.0 | None | 0 |
|
|
439
371
|
|
|
440
|
-
|
|
372
|
+
|
|
441
373
|
|
|
442
374
|
### Unique sites of combined Ochoa & PhosphoSitePlus
|
|
443
375
|
|
|
@@ -446,18 +378,7 @@ df = Data.get_combine_site_psp_ochoa()
|
|
|
446
378
|
df.head(3)
|
|
447
379
|
```
|
|
448
380
|
|
|
449
|
-
|
|
450
|
-
<style scoped>
|
|
451
|
-
.dataframe tbody tr th:only-of-type {
|
|
452
|
-
vertical-align: middle;
|
|
453
|
-
}
|
|
454
|
-
.dataframe tbody tr th {
|
|
455
|
-
vertical-align: top;
|
|
456
|
-
}
|
|
457
|
-
.dataframe thead th {
|
|
458
|
-
text-align: right;
|
|
459
|
-
}
|
|
460
|
-
</style>
|
|
381
|
+
|
|
461
382
|
|
|
462
383
|
| | site_seq | gene_site | gene | source | num_site | acceptor | -7 | -6 | -5 | -4 | ... | -2 | -1 | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
|
463
384
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -466,7 +387,7 @@ df.head(3)
|
|
|
466
387
|
| 2 | AAAAAAASGVTTGKP | CLASR_S349 | CLASR | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | V | T | T | G | K | P |
|
|
467
388
|
|
|
468
389
|
<p>3 rows × 21 columns</p>
|
|
469
|
-
|
|
390
|
+
|
|
470
391
|
|
|
471
392
|
## Phosphorylation site sequence example
|
|
472
393
|
|
|
@@ -5,10 +5,8 @@
|
|
|
5
5
|
|
|
6
6
|
<img alt="Katlas logo" width="600" caption="Katlas logo" src="https://github.com/sky1ove/katlas/raw/main/dataset/images/logo.png" id="logo"/>
|
|
7
7
|
|
|
8
|
-
<a target="_blank" href="https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/index.ipynb">
|
|
9
|
-
<img src="https://
|
|
10
|
-
</a> <a href="https://pypi.org/project/python-katlas/">
|
|
11
|
-
<img src="https://img.shields.io/pypi/v/python-katlas?link=https%3A%2F%2Fpypi.org%2Fproject%2Fpython-katlas%2F" alt="PyPI"></a>
|
|
8
|
+
<p><a target="_blank" href="https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/index.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
|
|
9
|
+
<a href="https://pypi.org/project/python-katlas/"><img src="https://img.shields.io/pypi/v/python-katlas?link=https%3A%2F%2Fpypi.org%2Fproject%2Fpython-katlas%2F" alt="PyPI"></a></p>
|
|
12
10
|
|
|
13
11
|
KATLAS is a repository containing python tools to predict kinases given
|
|
14
12
|
a substrate sequence. It also contains datasets of kinase substrate
|
|
@@ -201,18 +199,7 @@ get_pct('AEEKEyHSEGG',**param_PSPA_y, pct_ref = y_pct)
|
|
|
201
199
|
|
|
202
200
|
considering string: ['-5A', '-4E', '-3E', '-2K', '-1E', '0Y', '1H', '2S', '3E', '4G', '5G']
|
|
203
201
|
|
|
204
|
-
|
|
205
|
-
<style scoped>
|
|
206
|
-
.dataframe tbody tr th:only-of-type {
|
|
207
|
-
vertical-align: middle;
|
|
208
|
-
}
|
|
209
|
-
.dataframe tbody tr th {
|
|
210
|
-
vertical-align: top;
|
|
211
|
-
}
|
|
212
|
-
.dataframe thead th {
|
|
213
|
-
text-align: right;
|
|
214
|
-
}
|
|
215
|
-
</style>
|
|
202
|
+
|
|
216
203
|
|
|
217
204
|
| | log2(score) | percentile |
|
|
218
205
|
|-------|-------------|------------|
|
|
@@ -229,7 +216,7 @@ get_pct('AEEKEyHSEGG',**param_PSPA_y, pct_ref = y_pct)
|
|
|
229
216
|
| DDR2 | -4.920 | 10.403281 |
|
|
230
217
|
|
|
231
218
|
<p>93 rows × 2 columns</p>
|
|
232
|
-
|
|
219
|
+
|
|
233
220
|
|
|
234
221
|
## High-throughput substrate scoring on a dataframe
|
|
235
222
|
|
|
@@ -247,18 +234,7 @@ df = Data.get_ochoa_site().head()
|
|
|
247
234
|
df.iloc[:,-2:]
|
|
248
235
|
```
|
|
249
236
|
|
|
250
|
-
|
|
251
|
-
<style scoped>
|
|
252
|
-
.dataframe tbody tr th:only-of-type {
|
|
253
|
-
vertical-align: middle;
|
|
254
|
-
}
|
|
255
|
-
.dataframe tbody tr th {
|
|
256
|
-
vertical-align: top;
|
|
257
|
-
}
|
|
258
|
-
.dataframe thead th {
|
|
259
|
-
text-align: right;
|
|
260
|
-
}
|
|
261
|
-
</style>
|
|
237
|
+
|
|
262
238
|
|
|
263
239
|
| | site_seq | gene_site |
|
|
264
240
|
|-----|-----------------|----------------|
|
|
@@ -268,7 +244,7 @@ df.iloc[:,-2:]
|
|
|
268
244
|
| 3 | KSRFTEYSMTSSVMR | A0A075B6Q4_S68 |
|
|
269
245
|
| 4 | FTEYSMTSSVMRRNE | A0A075B6Q4_S71 |
|
|
270
246
|
|
|
271
|
-
|
|
247
|
+
|
|
272
248
|
|
|
273
249
|
### Set the column name and param to calculate
|
|
274
250
|
|
|
@@ -287,18 +263,7 @@ results
|
|
|
287
263
|
|
|
288
264
|
100%|██████████| 289/289 [00:05<00:00, 56.64it/s]
|
|
289
265
|
|
|
290
|
-
|
|
291
|
-
<style scoped>
|
|
292
|
-
.dataframe tbody tr th:only-of-type {
|
|
293
|
-
vertical-align: middle;
|
|
294
|
-
}
|
|
295
|
-
.dataframe tbody tr th {
|
|
296
|
-
vertical-align: top;
|
|
297
|
-
}
|
|
298
|
-
.dataframe thead th {
|
|
299
|
-
text-align: right;
|
|
300
|
-
}
|
|
301
|
-
</style>
|
|
266
|
+
|
|
302
267
|
|
|
303
268
|
| kinase | SRC | EPHA3 | FES | NTRK3 | ALK | EPHA8 | ABL1 | FLT3 | EPHB2 | FYN | ... | MEK5 | PKN2 | MAP2K7 | MRCKB | HIPK3 | CDK8 | BUB1 | MEKK3 | MAP2K3 | GRK1 |
|
|
304
269
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -309,7 +274,7 @@ results
|
|
|
309
274
|
| 4 | 0.822793 | 0.796532 | 0.792343 | 0.839882 | 0.810122 | 0.781420 | 0.805251 | 0.795022 | 0.790380 | 0.864538 | ... | 1.062617 | 1.357689 | 1.485945 | 1.249266 | 1.456078 | 1.422782 | 1.376471 | 1.089629 | 1.121309 | 1.697524 |
|
|
310
275
|
|
|
311
276
|
<p>5 rows × 289 columns</p>
|
|
312
|
-
|
|
277
|
+
|
|
313
278
|
|
|
314
279
|
## Phosphorylation sites
|
|
315
280
|
|
|
@@ -323,18 +288,7 @@ df = Data.get_cptac_ensembl_site()
|
|
|
323
288
|
df.head(3)
|
|
324
289
|
```
|
|
325
290
|
|
|
326
|
-
|
|
327
|
-
<style scoped>
|
|
328
|
-
.dataframe tbody tr th:only-of-type {
|
|
329
|
-
vertical-align: middle;
|
|
330
|
-
}
|
|
331
|
-
.dataframe tbody tr th {
|
|
332
|
-
vertical-align: top;
|
|
333
|
-
}
|
|
334
|
-
.dataframe thead th {
|
|
335
|
-
text-align: right;
|
|
336
|
-
}
|
|
337
|
-
</style>
|
|
291
|
+
|
|
338
292
|
|
|
339
293
|
| | gene | site | site_seq | protein | gene_name | gene_site | protein_site |
|
|
340
294
|
|----|----|----|----|----|----|----|----|
|
|
@@ -342,7 +296,7 @@ df.head(3)
|
|
|
342
296
|
| 1 | ENSG00000003056.8 | S267 | DDQLGEESEERDDHL | ENSP00000440488.2 | M6PR | M6PR_S267 | ENSP00000440488_S267 |
|
|
343
297
|
| 2 | ENSG00000048028.11 | S1053 | PPTIRPNSPYDLCSR | ENSP00000003302.4 | USP28 | USP28_S1053 | ENSP00000003302_S1053 |
|
|
344
298
|
|
|
345
|
-
|
|
299
|
+
|
|
346
300
|
|
|
347
301
|
### [Ochoa et al. human phosphoproteome](https://www.nature.com/articles/s41587-019-0344-3)
|
|
348
302
|
|
|
@@ -351,18 +305,7 @@ df = Data.get_ochoa_site()
|
|
|
351
305
|
df.head(3)
|
|
352
306
|
```
|
|
353
307
|
|
|
354
|
-
|
|
355
|
-
<style scoped>
|
|
356
|
-
.dataframe tbody tr th:only-of-type {
|
|
357
|
-
vertical-align: middle;
|
|
358
|
-
}
|
|
359
|
-
.dataframe tbody tr th {
|
|
360
|
-
vertical-align: top;
|
|
361
|
-
}
|
|
362
|
-
.dataframe thead th {
|
|
363
|
-
text-align: right;
|
|
364
|
-
}
|
|
365
|
-
</style>
|
|
308
|
+
|
|
366
309
|
|
|
367
310
|
| | uniprot | position | residue | is_disopred | disopred_score | log10_hotspot_pval_min | isHotspot | uniprot_position | functional_score | current_uniprot | name | gene | Sequence | is_valid | site_seq | gene_site |
|
|
368
311
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -370,7 +313,7 @@ df.head(3)
|
|
|
370
313
|
| 1 | A0A075B6Q4 | 35 | S | True | 0.87 | 9.192622 | False | A0A075B6Q4_35 | 0.136966 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | YDSAGLLSDEDCMSV | A0A075B6Q4_S35 |
|
|
371
314
|
| 2 | A0A075B6Q4 | 57 | S | False | 0.28 | 0.818834 | False | A0A075B6Q4_57 | 0.125364 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | IADHLFWSEETKSRF | A0A075B6Q4_S57 |
|
|
372
315
|
|
|
373
|
-
|
|
316
|
+
|
|
374
317
|
|
|
375
318
|
### PhosphoSitePlus human phosphorylation site
|
|
376
319
|
|
|
@@ -379,18 +322,7 @@ df = Data.get_psp_human_site()
|
|
|
379
322
|
df.head(3)
|
|
380
323
|
```
|
|
381
324
|
|
|
382
|
-
|
|
383
|
-
<style scoped>
|
|
384
|
-
.dataframe tbody tr th:only-of-type {
|
|
385
|
-
vertical-align: middle;
|
|
386
|
-
}
|
|
387
|
-
.dataframe tbody tr th {
|
|
388
|
-
vertical-align: top;
|
|
389
|
-
}
|
|
390
|
-
.dataframe thead th {
|
|
391
|
-
text-align: right;
|
|
392
|
-
}
|
|
393
|
-
</style>
|
|
325
|
+
|
|
394
326
|
|
|
395
327
|
| | gene | protein | uniprot | site | gene_site | SITE_GRP_ID | species | site_seq | LT_LIT | MS_LIT | MS_CST | CST_CAT# | Ambiguous_Site |
|
|
396
328
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -398,7 +330,7 @@ df.head(3)
|
|
|
398
330
|
| 1 | YWHAB | 14-3-3 beta | P31946 | S6 | YWHAB_S6 | 15718709 | human | \_\_MtMDksELVQkAk | NaN | 8.0 | NaN | None | 0 |
|
|
399
331
|
| 2 | YWHAB | 14-3-3 beta | P31946 | Y21 | YWHAB_Y21 | 3426383 | human | LAEQAERyDDMAAAM | NaN | NaN | 4.0 | None | 0 |
|
|
400
332
|
|
|
401
|
-
|
|
333
|
+
|
|
402
334
|
|
|
403
335
|
### Unique sites of combined Ochoa & PhosphoSitePlus
|
|
404
336
|
|
|
@@ -407,18 +339,7 @@ df = Data.get_combine_site_psp_ochoa()
|
|
|
407
339
|
df.head(3)
|
|
408
340
|
```
|
|
409
341
|
|
|
410
|
-
|
|
411
|
-
<style scoped>
|
|
412
|
-
.dataframe tbody tr th:only-of-type {
|
|
413
|
-
vertical-align: middle;
|
|
414
|
-
}
|
|
415
|
-
.dataframe tbody tr th {
|
|
416
|
-
vertical-align: top;
|
|
417
|
-
}
|
|
418
|
-
.dataframe thead th {
|
|
419
|
-
text-align: right;
|
|
420
|
-
}
|
|
421
|
-
</style>
|
|
342
|
+
|
|
422
343
|
|
|
423
344
|
| | site_seq | gene_site | gene | source | num_site | acceptor | -7 | -6 | -5 | -4 | ... | -2 | -1 | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
|
424
345
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -427,7 +348,7 @@ df.head(3)
|
|
|
427
348
|
| 2 | AAAAAAASGVTTGKP | CLASR_S349 | CLASR | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | V | T | T | G | K | P |
|
|
428
349
|
|
|
429
350
|
<p>3 rows × 21 columns</p>
|
|
430
|
-
|
|
351
|
+
|
|
431
352
|
|
|
432
353
|
## Phosphorylation site sequence example
|
|
433
354
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.4"
|
|
File without changes
|
|
@@ -266,7 +266,7 @@ def plot_cluster(df: pd.DataFrame, # a dataframe of values that is waited for di
|
|
|
266
266
|
plt.xticks([])
|
|
267
267
|
plt.yticks([])
|
|
268
268
|
if name_list is not None:
|
|
269
|
-
texts = [plt.text(embedding_df[x_col][i], embedding_df[y_col][i], name_list[i],fontsize=8) for i in range(len(embedding_df))]
|
|
269
|
+
texts = [plt.text(embedding_df[x_col].iloc[i], embedding_df[y_col].iloc[i], name_list[i],fontsize=8) for i in range(len(embedding_df))]
|
|
270
270
|
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='black'))
|
|
271
271
|
|
|
272
272
|
# %% ../nbs/02_plot.ipynb 37
|
|
@@ -403,7 +403,7 @@ def plot_bar(df,
|
|
|
403
403
|
|
|
404
404
|
idx = df.groupby(group)[value].mean().sort_values(ascending=ascending).index
|
|
405
405
|
|
|
406
|
-
sns.barplot(data=df, x=group, y=value, order=idx, **kwargs)
|
|
406
|
+
sns.barplot(data=df, x=group, y=value, order=idx,hue=group, legend=False, **kwargs)
|
|
407
407
|
|
|
408
408
|
if dots:
|
|
409
409
|
marker = {'marker': 'o',
|
|
@@ -459,8 +459,13 @@ def plot_group_bar(df,
|
|
|
459
459
|
plt.figure(figsize=figsize)
|
|
460
460
|
|
|
461
461
|
# Create the bar plot
|
|
462
|
-
sns.barplot(data=df_melted,
|
|
463
|
-
|
|
462
|
+
sns.barplot(data=df_melted,
|
|
463
|
+
x=group,
|
|
464
|
+
y='Value',
|
|
465
|
+
hue='Ranking',
|
|
466
|
+
order=order,
|
|
467
|
+
capsize=0.1,
|
|
468
|
+
err_kws={'linewidth': 1.5,'color': 'gray'},
|
|
464
469
|
**kwargs)
|
|
465
470
|
|
|
466
471
|
# Increase font size for the x-axis and y-axis tick labels
|
|
@@ -501,7 +506,7 @@ def plot_box(df,
|
|
|
501
506
|
idx = df[[group,value]].groupby(group).median().sort_values(value,ascending=False).index
|
|
502
507
|
|
|
503
508
|
|
|
504
|
-
sns.boxplot(data=df, x=group, y=value, order=idx, **kwargs)
|
|
509
|
+
sns.boxplot(data=df, x=group, y=value, order=idx,hue=group, legend=False, **kwargs)
|
|
505
510
|
|
|
506
511
|
if dots:
|
|
507
512
|
sns.stripplot(x=group, y=value, data=df, order=idx, jitter=True, color='black', size=3)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: python-katlas
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: tools for predicting kinome specificities
|
|
5
5
|
Home-page: https://github.com/sky1ove/katlas
|
|
6
6
|
Author: lily
|
|
@@ -44,10 +44,8 @@ Requires-Dist: openpyxl; extra == "dev"
|
|
|
44
44
|
|
|
45
45
|
<img alt="Katlas logo" width="600" caption="Katlas logo" src="https://github.com/sky1ove/katlas/raw/main/dataset/images/logo.png" id="logo"/>
|
|
46
46
|
|
|
47
|
-
<a target="_blank" href="https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/index.ipynb">
|
|
48
|
-
<img src="https://
|
|
49
|
-
</a> <a href="https://pypi.org/project/python-katlas/">
|
|
50
|
-
<img src="https://img.shields.io/pypi/v/python-katlas?link=https%3A%2F%2Fpypi.org%2Fproject%2Fpython-katlas%2F" alt="PyPI"></a>
|
|
47
|
+
<p><a target="_blank" href="https://colab.research.google.com/github/sky1ove/katlas/blob/main/nbs/index.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
|
|
48
|
+
<a href="https://pypi.org/project/python-katlas/"><img src="https://img.shields.io/pypi/v/python-katlas?link=https%3A%2F%2Fpypi.org%2Fproject%2Fpython-katlas%2F" alt="PyPI"></a></p>
|
|
51
49
|
|
|
52
50
|
KATLAS is a repository containing python tools to predict kinases given
|
|
53
51
|
a substrate sequence. It also contains datasets of kinase substrate
|
|
@@ -240,18 +238,7 @@ get_pct('AEEKEyHSEGG',**param_PSPA_y, pct_ref = y_pct)
|
|
|
240
238
|
|
|
241
239
|
considering string: ['-5A', '-4E', '-3E', '-2K', '-1E', '0Y', '1H', '2S', '3E', '4G', '5G']
|
|
242
240
|
|
|
243
|
-
|
|
244
|
-
<style scoped>
|
|
245
|
-
.dataframe tbody tr th:only-of-type {
|
|
246
|
-
vertical-align: middle;
|
|
247
|
-
}
|
|
248
|
-
.dataframe tbody tr th {
|
|
249
|
-
vertical-align: top;
|
|
250
|
-
}
|
|
251
|
-
.dataframe thead th {
|
|
252
|
-
text-align: right;
|
|
253
|
-
}
|
|
254
|
-
</style>
|
|
241
|
+
|
|
255
242
|
|
|
256
243
|
| | log2(score) | percentile |
|
|
257
244
|
|-------|-------------|------------|
|
|
@@ -268,7 +255,7 @@ get_pct('AEEKEyHSEGG',**param_PSPA_y, pct_ref = y_pct)
|
|
|
268
255
|
| DDR2 | -4.920 | 10.403281 |
|
|
269
256
|
|
|
270
257
|
<p>93 rows × 2 columns</p>
|
|
271
|
-
|
|
258
|
+
|
|
272
259
|
|
|
273
260
|
## High-throughput substrate scoring on a dataframe
|
|
274
261
|
|
|
@@ -286,18 +273,7 @@ df = Data.get_ochoa_site().head()
|
|
|
286
273
|
df.iloc[:,-2:]
|
|
287
274
|
```
|
|
288
275
|
|
|
289
|
-
|
|
290
|
-
<style scoped>
|
|
291
|
-
.dataframe tbody tr th:only-of-type {
|
|
292
|
-
vertical-align: middle;
|
|
293
|
-
}
|
|
294
|
-
.dataframe tbody tr th {
|
|
295
|
-
vertical-align: top;
|
|
296
|
-
}
|
|
297
|
-
.dataframe thead th {
|
|
298
|
-
text-align: right;
|
|
299
|
-
}
|
|
300
|
-
</style>
|
|
276
|
+
|
|
301
277
|
|
|
302
278
|
| | site_seq | gene_site |
|
|
303
279
|
|-----|-----------------|----------------|
|
|
@@ -307,7 +283,7 @@ df.iloc[:,-2:]
|
|
|
307
283
|
| 3 | KSRFTEYSMTSSVMR | A0A075B6Q4_S68 |
|
|
308
284
|
| 4 | FTEYSMTSSVMRRNE | A0A075B6Q4_S71 |
|
|
309
285
|
|
|
310
|
-
|
|
286
|
+
|
|
311
287
|
|
|
312
288
|
### Set the column name and param to calculate
|
|
313
289
|
|
|
@@ -326,18 +302,7 @@ results
|
|
|
326
302
|
|
|
327
303
|
100%|██████████| 289/289 [00:05<00:00, 56.64it/s]
|
|
328
304
|
|
|
329
|
-
|
|
330
|
-
<style scoped>
|
|
331
|
-
.dataframe tbody tr th:only-of-type {
|
|
332
|
-
vertical-align: middle;
|
|
333
|
-
}
|
|
334
|
-
.dataframe tbody tr th {
|
|
335
|
-
vertical-align: top;
|
|
336
|
-
}
|
|
337
|
-
.dataframe thead th {
|
|
338
|
-
text-align: right;
|
|
339
|
-
}
|
|
340
|
-
</style>
|
|
305
|
+
|
|
341
306
|
|
|
342
307
|
| kinase | SRC | EPHA3 | FES | NTRK3 | ALK | EPHA8 | ABL1 | FLT3 | EPHB2 | FYN | ... | MEK5 | PKN2 | MAP2K7 | MRCKB | HIPK3 | CDK8 | BUB1 | MEKK3 | MAP2K3 | GRK1 |
|
|
343
308
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -348,7 +313,7 @@ results
|
|
|
348
313
|
| 4 | 0.822793 | 0.796532 | 0.792343 | 0.839882 | 0.810122 | 0.781420 | 0.805251 | 0.795022 | 0.790380 | 0.864538 | ... | 1.062617 | 1.357689 | 1.485945 | 1.249266 | 1.456078 | 1.422782 | 1.376471 | 1.089629 | 1.121309 | 1.697524 |
|
|
349
314
|
|
|
350
315
|
<p>5 rows × 289 columns</p>
|
|
351
|
-
|
|
316
|
+
|
|
352
317
|
|
|
353
318
|
## Phosphorylation sites
|
|
354
319
|
|
|
@@ -362,18 +327,7 @@ df = Data.get_cptac_ensembl_site()
|
|
|
362
327
|
df.head(3)
|
|
363
328
|
```
|
|
364
329
|
|
|
365
|
-
|
|
366
|
-
<style scoped>
|
|
367
|
-
.dataframe tbody tr th:only-of-type {
|
|
368
|
-
vertical-align: middle;
|
|
369
|
-
}
|
|
370
|
-
.dataframe tbody tr th {
|
|
371
|
-
vertical-align: top;
|
|
372
|
-
}
|
|
373
|
-
.dataframe thead th {
|
|
374
|
-
text-align: right;
|
|
375
|
-
}
|
|
376
|
-
</style>
|
|
330
|
+
|
|
377
331
|
|
|
378
332
|
| | gene | site | site_seq | protein | gene_name | gene_site | protein_site |
|
|
379
333
|
|----|----|----|----|----|----|----|----|
|
|
@@ -381,7 +335,7 @@ df.head(3)
|
|
|
381
335
|
| 1 | ENSG00000003056.8 | S267 | DDQLGEESEERDDHL | ENSP00000440488.2 | M6PR | M6PR_S267 | ENSP00000440488_S267 |
|
|
382
336
|
| 2 | ENSG00000048028.11 | S1053 | PPTIRPNSPYDLCSR | ENSP00000003302.4 | USP28 | USP28_S1053 | ENSP00000003302_S1053 |
|
|
383
337
|
|
|
384
|
-
|
|
338
|
+
|
|
385
339
|
|
|
386
340
|
### [Ochoa et al. human phosphoproteome](https://www.nature.com/articles/s41587-019-0344-3)
|
|
387
341
|
|
|
@@ -390,18 +344,7 @@ df = Data.get_ochoa_site()
|
|
|
390
344
|
df.head(3)
|
|
391
345
|
```
|
|
392
346
|
|
|
393
|
-
|
|
394
|
-
<style scoped>
|
|
395
|
-
.dataframe tbody tr th:only-of-type {
|
|
396
|
-
vertical-align: middle;
|
|
397
|
-
}
|
|
398
|
-
.dataframe tbody tr th {
|
|
399
|
-
vertical-align: top;
|
|
400
|
-
}
|
|
401
|
-
.dataframe thead th {
|
|
402
|
-
text-align: right;
|
|
403
|
-
}
|
|
404
|
-
</style>
|
|
347
|
+
|
|
405
348
|
|
|
406
349
|
| | uniprot | position | residue | is_disopred | disopred_score | log10_hotspot_pval_min | isHotspot | uniprot_position | functional_score | current_uniprot | name | gene | Sequence | is_valid | site_seq | gene_site |
|
|
407
350
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -409,7 +352,7 @@ df.head(3)
|
|
|
409
352
|
| 1 | A0A075B6Q4 | 35 | S | True | 0.87 | 9.192622 | False | A0A075B6Q4_35 | 0.136966 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | YDSAGLLSDEDCMSV | A0A075B6Q4_S35 |
|
|
410
353
|
| 2 | A0A075B6Q4 | 57 | S | False | 0.28 | 0.818834 | False | A0A075B6Q4_57 | 0.125364 | A0A075B6Q4 | A0A075B6Q4_HUMAN | None | MDIQKSENEDDSEWEDVDDEKGDSNDDYDSAGLLSDEDCMSVPGKT... | True | IADHLFWSEETKSRF | A0A075B6Q4_S57 |
|
|
411
354
|
|
|
412
|
-
|
|
355
|
+
|
|
413
356
|
|
|
414
357
|
### PhosphoSitePlus human phosphorylation site
|
|
415
358
|
|
|
@@ -418,18 +361,7 @@ df = Data.get_psp_human_site()
|
|
|
418
361
|
df.head(3)
|
|
419
362
|
```
|
|
420
363
|
|
|
421
|
-
|
|
422
|
-
<style scoped>
|
|
423
|
-
.dataframe tbody tr th:only-of-type {
|
|
424
|
-
vertical-align: middle;
|
|
425
|
-
}
|
|
426
|
-
.dataframe tbody tr th {
|
|
427
|
-
vertical-align: top;
|
|
428
|
-
}
|
|
429
|
-
.dataframe thead th {
|
|
430
|
-
text-align: right;
|
|
431
|
-
}
|
|
432
|
-
</style>
|
|
364
|
+
|
|
433
365
|
|
|
434
366
|
| | gene | protein | uniprot | site | gene_site | SITE_GRP_ID | species | site_seq | LT_LIT | MS_LIT | MS_CST | CST_CAT# | Ambiguous_Site |
|
|
435
367
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -437,7 +369,7 @@ df.head(3)
|
|
|
437
369
|
| 1 | YWHAB | 14-3-3 beta | P31946 | S6 | YWHAB_S6 | 15718709 | human | \_\_MtMDksELVQkAk | NaN | 8.0 | NaN | None | 0 |
|
|
438
370
|
| 2 | YWHAB | 14-3-3 beta | P31946 | Y21 | YWHAB_Y21 | 3426383 | human | LAEQAERyDDMAAAM | NaN | NaN | 4.0 | None | 0 |
|
|
439
371
|
|
|
440
|
-
|
|
372
|
+
|
|
441
373
|
|
|
442
374
|
### Unique sites of combined Ochoa & PhosphoSitePlus
|
|
443
375
|
|
|
@@ -446,18 +378,7 @@ df = Data.get_combine_site_psp_ochoa()
|
|
|
446
378
|
df.head(3)
|
|
447
379
|
```
|
|
448
380
|
|
|
449
|
-
|
|
450
|
-
<style scoped>
|
|
451
|
-
.dataframe tbody tr th:only-of-type {
|
|
452
|
-
vertical-align: middle;
|
|
453
|
-
}
|
|
454
|
-
.dataframe tbody tr th {
|
|
455
|
-
vertical-align: top;
|
|
456
|
-
}
|
|
457
|
-
.dataframe thead th {
|
|
458
|
-
text-align: right;
|
|
459
|
-
}
|
|
460
|
-
</style>
|
|
381
|
+
|
|
461
382
|
|
|
462
383
|
| | site_seq | gene_site | gene | source | num_site | acceptor | -7 | -6 | -5 | -4 | ... | -2 | -1 | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
|
463
384
|
|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
|
|
@@ -466,7 +387,7 @@ df.head(3)
|
|
|
466
387
|
| 2 | AAAAAAASGVTTGKP | CLASR_S349 | CLASR | ochoa | 1 | S | A | A | A | A | ... | A | A | S | G | V | T | T | G | K | P |
|
|
467
388
|
|
|
468
389
|
<p>3 rows × 21 columns</p>
|
|
469
|
-
|
|
390
|
+
|
|
470
391
|
|
|
471
392
|
## Phosphorylation site sequence example
|
|
472
393
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.1.2"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|