python-katlas 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_katlas-0.2.0/python_katlas.egg-info → python_katlas-0.2.2}/PKG-INFO +15 -58
- {python_katlas-0.2.0 → python_katlas-0.2.2}/README.md +1 -17
- python_katlas-0.2.2/katlas/__init__.py +1 -0
- python_katlas-0.2.2/katlas/_modidx.py +141 -0
- python_katlas-0.2.2/katlas/common.py +10 -0
- python_katlas-0.2.2/katlas/compare.py +118 -0
- python_katlas-0.2.2/katlas/data.py +544 -0
- python_katlas-0.2.2/katlas/hierarchical.py +20 -0
- python_katlas-0.2.2/katlas/lo.py +69 -0
- {python_katlas-0.2.0 → python_katlas-0.2.2}/katlas/pathway.py +13 -13
- python_katlas-0.2.2/katlas/plot.py +356 -0
- python_katlas-0.2.2/katlas/pspa.py +138 -0
- python_katlas-0.2.2/katlas/pssm.py +375 -0
- python_katlas-0.2.0/katlas/score.py → python_katlas-0.2.2/katlas/scoring.py +30 -27
- {python_katlas-0.2.0 → python_katlas-0.2.2}/katlas/utils.py +92 -45
- python_katlas-0.2.2/pyproject.toml +59 -0
- {python_katlas-0.2.0 → python_katlas-0.2.2/python_katlas.egg-info}/PKG-INFO +15 -58
- {python_katlas-0.2.0 → python_katlas-0.2.2}/python_katlas.egg-info/SOURCES.txt +5 -10
- python_katlas-0.2.2/python_katlas.egg-info/requires.txt +15 -0
- python_katlas-0.2.0/katlas/__init__.py +0 -1
- python_katlas-0.2.0/katlas/_modidx.py +0 -216
- python_katlas-0.2.0/katlas/clustering.py +0 -142
- python_katlas-0.2.0/katlas/common.py +0 -4
- python_katlas-0.2.0/katlas/core.py +0 -6
- python_katlas-0.2.0/katlas/data.py +0 -455
- python_katlas-0.2.0/katlas/dnn.py +0 -384
- python_katlas-0.2.0/katlas/feature.py +0 -320
- python_katlas-0.2.0/katlas/plot.py +0 -924
- python_katlas-0.2.0/katlas/pssm.py +0 -844
- python_katlas-0.2.0/katlas/statistics.py +0 -102
- python_katlas-0.2.0/katlas/train.py +0 -207
- python_katlas-0.2.0/pyproject.toml +0 -11
- python_katlas-0.2.0/python_katlas.egg-info/not-zip-safe +0 -1
- python_katlas-0.2.0/python_katlas.egg-info/requires.txt +0 -27
- python_katlas-0.2.0/settings.ini +0 -40
- python_katlas-0.2.0/setup.py +0 -57
- {python_katlas-0.2.0 → python_katlas-0.2.2}/LICENSE +0 -0
- {python_katlas-0.2.0 → python_katlas-0.2.2}/MANIFEST.in +0 -0
- {python_katlas-0.2.0 → python_katlas-0.2.2}/python_katlas.egg-info/dependency_links.txt +0 -0
- {python_katlas-0.2.0 → python_katlas-0.2.2}/python_katlas.egg-info/entry_points.txt +0 -0
- {python_katlas-0.2.0 → python_katlas-0.2.2}/python_katlas.egg-info/top_level.txt +0 -0
- {python_katlas-0.2.0 → python_katlas-0.2.2}/setup.cfg +0 -0
|
@@ -1,62 +1,35 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-katlas
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: tools for predicting kinome specificities
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
Keywords: nbdev
|
|
10
|
-
Classifier: Development Status :: 4 - Beta
|
|
11
|
-
Classifier: Intended Audience :: Developers
|
|
5
|
+
Author-email: lily <lcai888666@gmail.com>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Repository, https://github.com/sky1ove/katlas
|
|
8
|
+
Project-URL: Documentation, https://sky1ove.github.io/katlas
|
|
9
|
+
Keywords: nbdev,jupyter,notebook,python
|
|
12
10
|
Classifier: Natural Language :: English
|
|
13
|
-
Classifier:
|
|
14
|
-
Classifier:
|
|
15
|
-
Classifier: Programming Language :: Python :: 3
|
|
16
|
-
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
|
|
18
|
-
Requires-Python: >=3.7
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Requires-Python: >=3.10
|
|
19
16
|
Description-Content-Type: text/markdown
|
|
20
17
|
License-File: LICENSE
|
|
21
18
|
Requires-Dist: pandas
|
|
22
19
|
Requires-Dist: gdown
|
|
23
|
-
Requires-Dist: statsmodels
|
|
24
|
-
Requires-Dist: statannotations
|
|
25
|
-
Requires-Dist: fastparquet
|
|
26
20
|
Requires-Dist: pyarrow
|
|
27
21
|
Requires-Dist: tqdm
|
|
28
22
|
Requires-Dist: logomaker-kinase
|
|
29
23
|
Requires-Dist: seaborn
|
|
30
|
-
Requires-Dist: bokeh
|
|
31
24
|
Requires-Dist: reactome2py
|
|
32
|
-
Requires-Dist: adjustText
|
|
33
25
|
Requires-Dist: scikit-learn
|
|
34
|
-
Requires-Dist: umap-learn
|
|
35
|
-
Requires-Dist: ipywidgets
|
|
36
26
|
Requires-Dist: biopython
|
|
27
|
+
Requires-Dist: filelock>=3.25.2
|
|
28
|
+
Requires-Dist: python-kplot>=0.0.3
|
|
37
29
|
Provides-Extra: dev
|
|
38
30
|
Requires-Dist: nbdev; extra == "dev"
|
|
39
|
-
Requires-Dist:
|
|
40
|
-
Requires-Dist: fastai; extra == "dev"
|
|
41
|
-
Requires-Dist: fairscale; extra == "dev"
|
|
42
|
-
Requires-Dist: fair-esm; extra == "dev"
|
|
43
|
-
Requires-Dist: rdkit; extra == "dev"
|
|
44
|
-
Requires-Dist: openpyxl; extra == "dev"
|
|
45
|
-
Requires-Dist: transformers; extra == "dev"
|
|
46
|
-
Requires-Dist: sentencepiece; extra == "dev"
|
|
47
|
-
Dynamic: author
|
|
48
|
-
Dynamic: author-email
|
|
49
|
-
Dynamic: classifier
|
|
50
|
-
Dynamic: description
|
|
51
|
-
Dynamic: description-content-type
|
|
52
|
-
Dynamic: home-page
|
|
53
|
-
Dynamic: keywords
|
|
54
|
-
Dynamic: license
|
|
31
|
+
Requires-Dist: jupyterlab>=3.6.8; extra == "dev"
|
|
55
32
|
Dynamic: license-file
|
|
56
|
-
Dynamic: provides-extra
|
|
57
|
-
Dynamic: requires-dist
|
|
58
|
-
Dynamic: requires-python
|
|
59
|
-
Dynamic: summary
|
|
60
33
|
|
|
61
34
|
# KATLAS
|
|
62
35
|
|
|
@@ -94,11 +67,6 @@ helpful to your research.
|
|
|
94
67
|
and [CPTAC](https://pdc.cancer.gov/pdc/cptac-pancancer) /
|
|
95
68
|
[LinkedOmics](https://academic.oup.com/nar/article/46/D1/D956/4607804)
|
|
96
69
|
|
|
97
|
-
## Reproduce datasets & figures
|
|
98
|
-
|
|
99
|
-
Follow the instructions in katlas_raw:
|
|
100
|
-
https://github.com/sky1ove/katlas_raw
|
|
101
|
-
|
|
102
70
|
## Web applications
|
|
103
71
|
|
|
104
72
|
Users can now run the analysis directly on the web without needing to
|
|
@@ -109,21 +77,10 @@ Check out our latest web platform:
|
|
|
109
77
|
|
|
110
78
|
## Install
|
|
111
79
|
|
|
112
|
-
UV:
|
|
113
|
-
|
|
114
80
|
``` bash
|
|
115
|
-
|
|
81
|
+
pip install python-katlas
|
|
116
82
|
```
|
|
117
83
|
|
|
118
|
-
pip:
|
|
119
|
-
|
|
120
|
-
``` bash
|
|
121
|
-
pip install -U python-katlas
|
|
122
|
-
```
|
|
123
|
-
|
|
124
|
-
If using machine-learning related modules, need to install development
|
|
125
|
-
verison: `pip install -U "python-katlas[dev]"`
|
|
126
|
-
|
|
127
84
|
## Import
|
|
128
85
|
|
|
129
86
|
``` python
|
|
@@ -34,11 +34,6 @@ helpful to your research.
|
|
|
34
34
|
and [CPTAC](https://pdc.cancer.gov/pdc/cptac-pancancer) /
|
|
35
35
|
[LinkedOmics](https://academic.oup.com/nar/article/46/D1/D956/4607804)
|
|
36
36
|
|
|
37
|
-
## Reproduce datasets & figures
|
|
38
|
-
|
|
39
|
-
Follow the instructions in katlas_raw:
|
|
40
|
-
https://github.com/sky1ove/katlas_raw
|
|
41
|
-
|
|
42
37
|
## Web applications
|
|
43
38
|
|
|
44
39
|
Users can now run the analysis directly on the web without needing to
|
|
@@ -49,21 +44,10 @@ Check out our latest web platform:
|
|
|
49
44
|
|
|
50
45
|
## Install
|
|
51
46
|
|
|
52
|
-
UV:
|
|
53
|
-
|
|
54
|
-
``` bash
|
|
55
|
-
uv add -U python-katlas
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
pip:
|
|
59
|
-
|
|
60
47
|
``` bash
|
|
61
|
-
pip install
|
|
48
|
+
pip install python-katlas
|
|
62
49
|
```
|
|
63
50
|
|
|
64
|
-
If using machine-learning related modules, need to install development
|
|
65
|
-
verison: `pip install -U "python-katlas[dev]"`
|
|
66
|
-
|
|
67
51
|
## Import
|
|
68
52
|
|
|
69
53
|
``` python
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.2.2"
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# Autogenerated by nbdev
|
|
2
|
+
|
|
3
|
+
d = { 'settings': { 'branch': 'main',
|
|
4
|
+
'doc_baseurl': '/katlas',
|
|
5
|
+
'doc_host': 'https://sky1ove.github.io',
|
|
6
|
+
'git_url': 'https://github.com/sky1ove/katlas',
|
|
7
|
+
'lib_path': 'katlas'},
|
|
8
|
+
'syms': { 'katlas.common': {},
|
|
9
|
+
'katlas.compare': { 'katlas.compare.cosine_overall_flat': ('compare.html#cosine_overall_flat', 'katlas/compare.py'),
|
|
10
|
+
'katlas.compare.cosine_similarity': ('compare.html#cosine_similarity', 'katlas/compare.py'),
|
|
11
|
+
'katlas.compare.js_divergence': ('compare.html#js_divergence', 'katlas/compare.py'),
|
|
12
|
+
'katlas.compare.js_divergence_flat': ('compare.html#js_divergence_flat', 'katlas/compare.py'),
|
|
13
|
+
'katlas.compare.js_similarity': ('compare.html#js_similarity', 'katlas/compare.py'),
|
|
14
|
+
'katlas.compare.js_similarity_flat': ('compare.html#js_similarity_flat', 'katlas/compare.py'),
|
|
15
|
+
'katlas.compare.kl_divergence': ('compare.html#kl_divergence', 'katlas/compare.py'),
|
|
16
|
+
'katlas.compare.kl_divergence_flat': ('compare.html#kl_divergence_flat', 'katlas/compare.py')},
|
|
17
|
+
'katlas.data': { 'katlas.data.CPTAC': ('data.html#cptac', 'katlas/data.py'),
|
|
18
|
+
'katlas.data.CPTAC._read_file': ('data.html#cptac._read_file', 'katlas/data.py'),
|
|
19
|
+
'katlas.data.CPTAC.get_id': ('data.html#cptac.get_id', 'katlas/data.py'),
|
|
20
|
+
'katlas.data.CPTAC.list_cancer': ('data.html#cptac.list_cancer', 'katlas/data.py'),
|
|
21
|
+
'katlas.data.Data': ('data.html#data', 'katlas/data.py'),
|
|
22
|
+
'katlas.data.Data.clear_cache': ('data.html#data.clear_cache', 'katlas/data.py'),
|
|
23
|
+
'katlas.data.Data.download': ('data.html#data.download', 'katlas/data.py'),
|
|
24
|
+
'katlas.data.Data.get_aa_info': ('data.html#data.get_aa_info', 'katlas/data.py'),
|
|
25
|
+
'katlas.data.Data.get_aa_morgan': ('data.html#data.get_aa_morgan', 'katlas/data.py'),
|
|
26
|
+
'katlas.data.Data.get_aa_rdkit': ('data.html#data.get_aa_rdkit', 'katlas/data.py'),
|
|
27
|
+
'katlas.data.Data.get_cddm': ('data.html#data.get_cddm', 'katlas/data.py'),
|
|
28
|
+
'katlas.data.Data.get_cddm_LO': ('data.html#data.get_cddm_lo', 'katlas/data.py'),
|
|
29
|
+
'katlas.data.Data.get_cddm_LO_upper': ('data.html#data.get_cddm_lo_upper', 'katlas/data.py'),
|
|
30
|
+
'katlas.data.Data.get_cddm_upper': ('data.html#data.get_cddm_upper', 'katlas/data.py'),
|
|
31
|
+
'katlas.data.Data.get_combine_site_phosphorylated': ( 'data.html#data.get_combine_site_phosphorylated',
|
|
32
|
+
'katlas/data.py'),
|
|
33
|
+
'katlas.data.Data.get_combine_site_psp_ochoa': ('data.html#data.get_combine_site_psp_ochoa', 'katlas/data.py'),
|
|
34
|
+
'katlas.data.Data.get_cptac_ensembl_site': ('data.html#data.get_cptac_ensembl_site', 'katlas/data.py'),
|
|
35
|
+
'katlas.data.Data.get_cptac_gene_site': ('data.html#data.get_cptac_gene_site', 'katlas/data.py'),
|
|
36
|
+
'katlas.data.Data.get_cptac_unique_site': ('data.html#data.get_cptac_unique_site', 'katlas/data.py'),
|
|
37
|
+
'katlas.data.Data.get_human_site': ('data.html#data.get_human_site', 'katlas/data.py'),
|
|
38
|
+
'katlas.data.Data.get_kd_uniprot': ('data.html#data.get_kd_uniprot', 'katlas/data.py'),
|
|
39
|
+
'katlas.data.Data.get_kinase_info': ('data.html#data.get_kinase_info', 'katlas/data.py'),
|
|
40
|
+
'katlas.data.Data.get_kinase_uniprot': ('data.html#data.get_kinase_uniprot', 'katlas/data.py'),
|
|
41
|
+
'katlas.data.Data.get_ks_background': ('data.html#data.get_ks_background', 'katlas/data.py'),
|
|
42
|
+
'katlas.data.Data.get_ks_dataset': ('data.html#data.get_ks_dataset', 'katlas/data.py'),
|
|
43
|
+
'katlas.data.Data.get_ks_unique': ('data.html#data.get_ks_unique', 'katlas/data.py'),
|
|
44
|
+
'katlas.data.Data.get_num_dict': ('data.html#data.get_num_dict', 'katlas/data.py'),
|
|
45
|
+
'katlas.data.Data.get_ochoa_site': ('data.html#data.get_ochoa_site', 'katlas/data.py'),
|
|
46
|
+
'katlas.data.Data.get_psp_human_site': ('data.html#data.get_psp_human_site', 'katlas/data.py'),
|
|
47
|
+
'katlas.data.Data.get_pspa': ('data.html#data.get_pspa', 'katlas/data.py'),
|
|
48
|
+
'katlas.data.Data.get_pspa_raw': ('data.html#data.get_pspa_raw', 'katlas/data.py'),
|
|
49
|
+
'katlas.data.Data.get_pspa_scale': ('data.html#data.get_pspa_scale', 'katlas/data.py'),
|
|
50
|
+
'katlas.data.Data.get_pspa_st': ('data.html#data.get_pspa_st', 'katlas/data.py'),
|
|
51
|
+
'katlas.data.Data.get_pspa_st_pct': ('data.html#data.get_pspa_st_pct', 'katlas/data.py'),
|
|
52
|
+
'katlas.data.Data.get_pspa_tyr': ('data.html#data.get_pspa_tyr', 'katlas/data.py'),
|
|
53
|
+
'katlas.data.Data.get_pspa_tyr_pct': ('data.html#data.get_pspa_tyr_pct', 'katlas/data.py'),
|
|
54
|
+
'katlas.data.Data.get_reactome_pathway': ('data.html#data.get_reactome_pathway', 'katlas/data.py'),
|
|
55
|
+
'katlas.data.Data.get_reactome_pathway_lo': ('data.html#data.get_reactome_pathway_lo', 'katlas/data.py'),
|
|
56
|
+
'katlas.data.Data.read_file': ('data.html#data.read_file', 'katlas/data.py'),
|
|
57
|
+
'katlas.data._default_dataset_dir': ('data.html#_default_dataset_dir', 'katlas/data.py'),
|
|
58
|
+
'katlas.data._normalize_dataset_dir': ('data.html#_normalize_dataset_dir', 'katlas/data.py'),
|
|
59
|
+
'katlas.data._normalize_required_files': ('data.html#_normalize_required_files', 'katlas/data.py'),
|
|
60
|
+
'katlas.data._read_dataset_file_cached': ('data.html#_read_dataset_file_cached', 'katlas/data.py')},
|
|
61
|
+
'katlas.hierarchical': { 'katlas.hierarchical.get_1d_js': ('hierarchical.html#get_1d_js', 'katlas/hierarchical.py'),
|
|
62
|
+
'katlas.hierarchical.get_1d_js_parallel': ( 'hierarchical.html#get_1d_js_parallel',
|
|
63
|
+
'katlas/hierarchical.py')},
|
|
64
|
+
'katlas.lo': { 'katlas.lo.get_pssm_LO': ('lo.html#get_pssm_lo', 'katlas/lo.py'),
|
|
65
|
+
'katlas.lo.get_pssm_LO_flat': ('lo.html#get_pssm_lo_flat', 'katlas/lo.py'),
|
|
66
|
+
'katlas.lo.plot_logo_LO': ('lo.html#plot_logo_lo', 'katlas/lo.py'),
|
|
67
|
+
'katlas.lo.plot_logo_heatmap_LO': ('lo.html#plot_logo_heatmap_lo', 'katlas/lo.py')},
|
|
68
|
+
'katlas.pathway': { 'katlas.pathway.add_reactome_ref': ('pathway.html#add_reactome_ref', 'katlas/pathway.py'),
|
|
69
|
+
'katlas.pathway.get_overlap': ('pathway.html#get_overlap', 'katlas/pathway.py'),
|
|
70
|
+
'katlas.pathway.get_reactome': ('pathway.html#get_reactome', 'katlas/pathway.py'),
|
|
71
|
+
'katlas.pathway.get_reactome_raw': ('pathway.html#get_reactome_raw', 'katlas/pathway.py'),
|
|
72
|
+
'katlas.pathway.plot_path': ('pathway.html#plot_path', 'katlas/pathway.py'),
|
|
73
|
+
'katlas.pathway.query_reactome': ('pathway.html#query_reactome', 'katlas/pathway.py')},
|
|
74
|
+
'katlas.plot': { 'katlas.plot.change_center_name': ('plot.html#change_center_name', 'katlas/plot.py'),
|
|
75
|
+
'katlas.plot.convert_logo_df': ('plot.html#convert_logo_df', 'katlas/plot.py'),
|
|
76
|
+
'katlas.plot.get_logo_IC': ('plot.html#get_logo_ic', 'katlas/plot.py'),
|
|
77
|
+
'katlas.plot.get_pos_min_max': ('plot.html#get_pos_min_max', 'katlas/plot.py'),
|
|
78
|
+
'katlas.plot.pSTY2sty': ('plot.html#psty2sty', 'katlas/plot.py'),
|
|
79
|
+
'katlas.plot.plot_heatmap': ('plot.html#plot_heatmap', 'katlas/plot.py'),
|
|
80
|
+
'katlas.plot.plot_heatmap_simple': ('plot.html#plot_heatmap_simple', 'katlas/plot.py'),
|
|
81
|
+
'katlas.plot.plot_logo': ('plot.html#plot_logo', 'katlas/plot.py'),
|
|
82
|
+
'katlas.plot.plot_logo_heatmap': ('plot.html#plot_logo_heatmap', 'katlas/plot.py'),
|
|
83
|
+
'katlas.plot.plot_logo_raw': ('plot.html#plot_logo_raw', 'katlas/plot.py'),
|
|
84
|
+
'katlas.plot.plot_logos': ('plot.html#plot_logos', 'katlas/plot.py'),
|
|
85
|
+
'katlas.plot.plot_logos_idx': ('plot.html#plot_logos_idx', 'katlas/plot.py'),
|
|
86
|
+
'katlas.plot.plot_two_heatmaps': ('plot.html#plot_two_heatmaps', 'katlas/plot.py'),
|
|
87
|
+
'katlas.plot.scale_pos_neg_values': ('plot.html#scale_pos_neg_values', 'katlas/plot.py'),
|
|
88
|
+
'katlas.plot.scale_zero_position': ('plot.html#scale_zero_position', 'katlas/plot.py'),
|
|
89
|
+
'katlas.plot.sty2pSTY': ('plot.html#sty2psty', 'katlas/plot.py'),
|
|
90
|
+
'katlas.plot.sty2pSTY_df': ('plot.html#sty2psty_df', 'katlas/plot.py')},
|
|
91
|
+
'katlas.pspa': { 'katlas.pspa.get_logo': ('pspa.html#get_logo', 'katlas/pspa.py'),
|
|
92
|
+
'katlas.pspa.get_one_kinase': ('pspa.html#get_one_kinase', 'katlas/pspa.py'),
|
|
93
|
+
'katlas.pspa.plot_logo_heatmap_pspa': ('pspa.html#plot_logo_heatmap_pspa', 'katlas/pspa.py'),
|
|
94
|
+
'katlas.pspa.plot_logo_pspa': ('pspa.html#plot_logo_pspa', 'katlas/pspa.py'),
|
|
95
|
+
'katlas.pspa.preprocess_pspa': ('pspa.html#preprocess_pspa', 'katlas/pspa.py'),
|
|
96
|
+
'katlas.pspa.raw2norm': ('pspa.html#raw2norm', 'katlas/pspa.py')},
|
|
97
|
+
'katlas.pssm': { 'katlas.pssm._clean_zero': ('pssm.html#_clean_zero', 'katlas/pssm.py'),
|
|
98
|
+
'katlas.pssm.clean_zero_normalize': ('pssm.html#clean_zero_normalize', 'katlas/pssm.py'),
|
|
99
|
+
'katlas.pssm.flatten_pssm': ('pssm.html#flatten_pssm', 'katlas/pssm.py'),
|
|
100
|
+
'katlas.pssm.get_IC': ('pssm.html#get_ic', 'katlas/pssm.py'),
|
|
101
|
+
'katlas.pssm.get_IC_flat': ('pssm.html#get_ic_flat', 'katlas/pssm.py'),
|
|
102
|
+
'katlas.pssm.get_cluster_pssms': ('pssm.html#get_cluster_pssms', 'katlas/pssm.py'),
|
|
103
|
+
'katlas.pssm.get_entropy': ('pssm.html#get_entropy', 'katlas/pssm.py'),
|
|
104
|
+
'katlas.pssm.get_entropy_flat': ('pssm.html#get_entropy_flat', 'katlas/pssm.py'),
|
|
105
|
+
'katlas.pssm.get_prob': ('pssm.html#get_prob', 'katlas/pssm.py'),
|
|
106
|
+
'katlas.pssm.get_pssm_seq_labels': ('pssm.html#get_pssm_seq_labels', 'katlas/pssm.py'),
|
|
107
|
+
'katlas.pssm.get_pssm_weight': ('pssm.html#get_pssm_weight', 'katlas/pssm.py'),
|
|
108
|
+
'katlas.pssm.get_specificity': ('pssm.html#get_specificity', 'katlas/pssm.py'),
|
|
109
|
+
'katlas.pssm.get_specificity_flat': ('pssm.html#get_specificity_flat', 'katlas/pssm.py'),
|
|
110
|
+
'katlas.pssm.pssm_to_seq': ('pssm.html#pssm_to_seq', 'katlas/pssm.py'),
|
|
111
|
+
'katlas.pssm.recover_pssm': ('pssm.html#recover_pssm', 'katlas/pssm.py')},
|
|
112
|
+
'katlas.scoring': { 'katlas.scoring.Params': ('scoring.html#params', 'katlas/scoring.py'),
|
|
113
|
+
'katlas.scoring.STY2sty': ('scoring.html#sty2sty', 'katlas/scoring.py'),
|
|
114
|
+
'katlas.scoring.cut_seq': ('scoring.html#cut_seq', 'katlas/scoring.py'),
|
|
115
|
+
'katlas.scoring.duplicate_ref_zero': ('scoring.html#duplicate_ref_zero', 'katlas/scoring.py'),
|
|
116
|
+
'katlas.scoring.get_dict': ('scoring.html#get_dict', 'katlas/scoring.py'),
|
|
117
|
+
'katlas.scoring.get_pct': ('scoring.html#get_pct', 'katlas/scoring.py'),
|
|
118
|
+
'katlas.scoring.get_pct_df': ('scoring.html#get_pct_df', 'katlas/scoring.py'),
|
|
119
|
+
'katlas.scoring.multiply': ('scoring.html#multiply', 'katlas/scoring.py'),
|
|
120
|
+
'katlas.scoring.multiply_generic': ('scoring.html#multiply_generic', 'katlas/scoring.py'),
|
|
121
|
+
'katlas.scoring.multiply_pspa': ('scoring.html#multiply_pspa', 'katlas/scoring.py'),
|
|
122
|
+
'katlas.scoring.predict_kinase': ('scoring.html#predict_kinase', 'katlas/scoring.py'),
|
|
123
|
+
'katlas.scoring.predict_kinase_df': ('scoring.html#predict_kinase_df', 'katlas/scoring.py'),
|
|
124
|
+
'katlas.scoring.preprocess_ref': ('scoring.html#preprocess_ref', 'katlas/scoring.py'),
|
|
125
|
+
'katlas.scoring.sumup': ('scoring.html#sumup', 'katlas/scoring.py')},
|
|
126
|
+
'katlas.utils': { 'katlas.utils.aln2df': ('utils.html#aln2df', 'katlas/utils.py'),
|
|
127
|
+
'katlas.utils.check_seq': ('utils.html#check_seq', 'katlas/utils.py'),
|
|
128
|
+
'katlas.utils.check_seqs': ('utils.html#check_seqs', 'katlas/utils.py'),
|
|
129
|
+
'katlas.utils.clean_feat': ('utils.html#clean_feat', 'katlas/utils.py'),
|
|
130
|
+
'katlas.utils.extract_site_seq': ('utils.html#extract_site_seq', 'katlas/utils.py'),
|
|
131
|
+
'katlas.utils.get_aln_freq': ('utils.html#get_aln_freq', 'katlas/utils.py'),
|
|
132
|
+
'katlas.utils.get_fasta': ('utils.html#get_fasta', 'katlas/utils.py'),
|
|
133
|
+
'katlas.utils.get_subfamily_color': ('utils.html#get_subfamily_color', 'katlas/utils.py'),
|
|
134
|
+
'katlas.utils.phosphorylate_seq': ('utils.html#phosphorylate_seq', 'katlas/utils.py'),
|
|
135
|
+
'katlas.utils.phosphorylate_seq_df': ('utils.html#phosphorylate_seq_df', 'katlas/utils.py'),
|
|
136
|
+
'katlas.utils.prepare_path': ('utils.html#prepare_path', 'katlas/utils.py'),
|
|
137
|
+
'katlas.utils.remove_hi_corr': ('utils.html#remove_hi_corr', 'katlas/utils.py'),
|
|
138
|
+
'katlas.utils.run_clustalo': ('utils.html#run_clustalo', 'katlas/utils.py'),
|
|
139
|
+
'katlas.utils.standardize': ('utils.html#standardize', 'katlas/utils.py'),
|
|
140
|
+
'katlas.utils.validate_site': ('utils.html#validate_site', 'katlas/utils.py'),
|
|
141
|
+
'katlas.utils.validate_site_df': ('utils.html#validate_site_df', 'katlas/utils.py')}}}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""algorithms about comparing two PSSMs"""
|
|
2
|
+
|
|
3
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/06_compare.ipynb.
|
|
4
|
+
|
|
5
|
+
# %% auto #0
|
|
6
|
+
__all__ = ['kl_divergence', 'kl_divergence_flat', 'js_divergence', 'js_divergence_flat', 'js_similarity', 'js_similarity_flat',
|
|
7
|
+
'cosine_similarity', 'cosine_overall_flat']
|
|
8
|
+
|
|
9
|
+
# %% ../nbs/06_compare.ipynb #76949d54-f519-4bc6-9da5-d48a05bbfe69
|
|
10
|
+
import numpy as np, pandas as pd
|
|
11
|
+
from .pssm import EPSILON
|
|
12
|
+
|
|
13
|
+
# %% ../nbs/06_compare.ipynb #b1ef2ee7-70fd-4703-96b4-1c1a9053987f
|
|
14
|
+
def kl_divergence(p1, # target pssm p (array-like, shape: (AA, positions))
|
|
15
|
+
p2, # pred pssm q (array-like, same shape as p1)
|
|
16
|
+
):
|
|
17
|
+
"""
|
|
18
|
+
KL divergence D_KL(p1 || p2) over positions.
|
|
19
|
+
|
|
20
|
+
p1 and p2 are arrays (df or np) with index as aa and column as position.
|
|
21
|
+
Returns average divergence across positions if mean=True, else per-position.
|
|
22
|
+
"""
|
|
23
|
+
if p1.shape != p2.shape: raise ValueError("Shapes of p1 and p2 must match.")
|
|
24
|
+
p1, p2 = p1.align(p2, join='inner', axis=None)
|
|
25
|
+
# Mask invalid positions (both zero)
|
|
26
|
+
valid = (p1 + p2) > 0
|
|
27
|
+
p1 = np.where(valid, p1, 0.0)
|
|
28
|
+
p2 = np.where(valid, p2, 0.0)
|
|
29
|
+
|
|
30
|
+
# KL divergence: sum_x p1(x) log(p1(x)/p2(x))
|
|
31
|
+
kl = np.sum(p1 * np.log((p1 + EPSILON) / (p2 + EPSILON)), axis=0)
|
|
32
|
+
|
|
33
|
+
return kl
|
|
34
|
+
|
|
35
|
+
# %% ../nbs/06_compare.ipynb #71084bb3-169a-4dc2-b5dd-1b802fda8225
|
|
36
|
+
def kl_divergence_flat(p1_flat, # pd.Series of target flattened pssm p
|
|
37
|
+
p2_flat, # pd.Series of pred flattened pssm q
|
|
38
|
+
):
|
|
39
|
+
|
|
40
|
+
"p1 and p2 are two flattened pd.Series with index as aa and column as position"
|
|
41
|
+
kld = kl_divergence(p1_flat,p2_flat) # do not do js.mean() because it's 1d
|
|
42
|
+
total_position = len(p1_flat.index.str.extract(r'(-?\d+)').drop_duplicates())
|
|
43
|
+
return float(kld/total_position)
|
|
44
|
+
|
|
45
|
+
# %% ../nbs/06_compare.ipynb #eca99343-fbcd-48c2-a1ff-88af31fd2346
|
|
46
|
+
def js_divergence(p1, # pssm
|
|
47
|
+
p2, # pssm
|
|
48
|
+
index=True,
|
|
49
|
+
):
|
|
50
|
+
"p1 and p2 are two arrays (df or np) with index as aa and column as position"
|
|
51
|
+
if p1.shape != p2.shape: raise ValueError("Shapes of p1 and p2 must match.")
|
|
52
|
+
p1, p2 = p1.align(p2, join='inner', axis=None)
|
|
53
|
+
if index: positions=p1.columns
|
|
54
|
+
valid = (p1 + p2) > 0
|
|
55
|
+
p1 = np.where(valid, p1, 0.0)
|
|
56
|
+
p2 = np.where(valid, p2, 0.0)
|
|
57
|
+
|
|
58
|
+
m = 0.5 * (p1 + p2)
|
|
59
|
+
|
|
60
|
+
js = 0.5 * np.sum(p1 * np.log((p1+ EPSILON) / (m + EPSILON)), axis=0) + \
|
|
61
|
+
0.5 * np.sum(p2 * np.log((p2+ EPSILON) / (m + EPSILON)), axis=0)
|
|
62
|
+
return pd.Series(js,index=positions) if index else js
|
|
63
|
+
|
|
64
|
+
# %% ../nbs/06_compare.ipynb #37553737-13b3-4461-ad93-fe4cf863f25b
|
|
65
|
+
def js_divergence_flat(p1_flat, # pd.Series of flattened pssm
|
|
66
|
+
p2_flat, # pd.Series of flattened pssm
|
|
67
|
+
):
|
|
68
|
+
|
|
69
|
+
"p1 and p2 are two flattened pd.Series with index as aa and column as position"
|
|
70
|
+
js = js_divergence(p1_flat,p2_flat,index=False)
|
|
71
|
+
total_position = len(p1_flat.index.str.extract(r'(-?\d+)').drop_duplicates())
|
|
72
|
+
return float(js/total_position)
|
|
73
|
+
|
|
74
|
+
# %% ../nbs/06_compare.ipynb #7e480e54-c5de-4726-b208-c531e07a2adc
|
|
75
|
+
def js_similarity(pssm1,pssm2):
|
|
76
|
+
"Convert JSD to bits to be in range (0,1) then 1-JSD."
|
|
77
|
+
distance = js_divergence(pssm1,pssm2)/np.log(2)
|
|
78
|
+
similarity = 1-distance
|
|
79
|
+
return similarity
|
|
80
|
+
|
|
81
|
+
# %% ../nbs/06_compare.ipynb #0c51e8b5-df29-431b-8013-ff46388b4872
|
|
82
|
+
def js_similarity_flat(p1_flat,p2_flat):
|
|
83
|
+
"Convert JSD to bits to be in range (0,1) then 1-JSD. "
|
|
84
|
+
return 1-(js_divergence_flat(p1_flat,p2_flat)/np.log(2))
|
|
85
|
+
|
|
86
|
+
# %% ../nbs/06_compare.ipynb #8d2e228a-8543-4174-8c24-ce6ded317c8d
|
|
87
|
+
def cosine_similarity(pssm1: pd.DataFrame, pssm2: pd.DataFrame) -> pd.Series:
|
|
88
|
+
"Compute cosine similarity per position (column) between two PSSMs."
|
|
89
|
+
|
|
90
|
+
if pssm1.shape != pssm2.shape: raise ValueError("PSSMs must have the same shape")
|
|
91
|
+
|
|
92
|
+
sims = {}
|
|
93
|
+
for pos in pssm1.columns:
|
|
94
|
+
v1 = pssm1[pos]
|
|
95
|
+
v2 = pssm2[pos]
|
|
96
|
+
v1,v2 = v1.align(v2, join='inner') # make sure the aa index match with each other
|
|
97
|
+
|
|
98
|
+
norm1 = np.linalg.norm(v1)
|
|
99
|
+
norm2 = np.linalg.norm(v2)
|
|
100
|
+
|
|
101
|
+
if norm1 == 0 or norm2 == 0:
|
|
102
|
+
sims[pos] = 0.0
|
|
103
|
+
else:
|
|
104
|
+
dot_product = np.dot(v1,v2) # sum(v1*v2)
|
|
105
|
+
sims[pos] = dot_product / (norm1 * norm2)
|
|
106
|
+
|
|
107
|
+
return pd.Series(sims)
|
|
108
|
+
|
|
109
|
+
# %% ../nbs/06_compare.ipynb #d830fbaa-4a9f-4d5d-98ba-289fc91bff8e
|
|
110
|
+
def cosine_overall_flat(pssm1_flat, pssm2_flat):
|
|
111
|
+
"""Compute overall cosine similarity between two PSSMs (flattened)."""
|
|
112
|
+
# match index for dot product
|
|
113
|
+
pssm1_flat, pssm2_flat = pssm1_flat.align(pssm2_flat, join='inner')
|
|
114
|
+
norm1 = np.linalg.norm(pssm1_flat)
|
|
115
|
+
norm2 = np.linalg.norm(pssm2_flat)
|
|
116
|
+
if norm1 == 0 or norm2 == 0: return 0.0
|
|
117
|
+
dot_product = sum(pssm1_flat*pssm2_flat) # np.dot(pssm1_flat, pssm2_flat)
|
|
118
|
+
return dot_product/ (norm1 * norm2)
|