python-katlas 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {python_katlas-0.2.0/python_katlas.egg-info → python_katlas-0.2.2}/PKG-INFO +15 -58
  2. {python_katlas-0.2.0 → python_katlas-0.2.2}/README.md +1 -17
  3. python_katlas-0.2.2/katlas/__init__.py +1 -0
  4. python_katlas-0.2.2/katlas/_modidx.py +141 -0
  5. python_katlas-0.2.2/katlas/common.py +10 -0
  6. python_katlas-0.2.2/katlas/compare.py +118 -0
  7. python_katlas-0.2.2/katlas/data.py +544 -0
  8. python_katlas-0.2.2/katlas/hierarchical.py +20 -0
  9. python_katlas-0.2.2/katlas/lo.py +69 -0
  10. {python_katlas-0.2.0 → python_katlas-0.2.2}/katlas/pathway.py +13 -13
  11. python_katlas-0.2.2/katlas/plot.py +356 -0
  12. python_katlas-0.2.2/katlas/pspa.py +138 -0
  13. python_katlas-0.2.2/katlas/pssm.py +375 -0
  14. python_katlas-0.2.0/katlas/score.py → python_katlas-0.2.2/katlas/scoring.py +30 -27
  15. {python_katlas-0.2.0 → python_katlas-0.2.2}/katlas/utils.py +92 -45
  16. python_katlas-0.2.2/pyproject.toml +59 -0
  17. {python_katlas-0.2.0 → python_katlas-0.2.2/python_katlas.egg-info}/PKG-INFO +15 -58
  18. {python_katlas-0.2.0 → python_katlas-0.2.2}/python_katlas.egg-info/SOURCES.txt +5 -10
  19. python_katlas-0.2.2/python_katlas.egg-info/requires.txt +15 -0
  20. python_katlas-0.2.0/katlas/__init__.py +0 -1
  21. python_katlas-0.2.0/katlas/_modidx.py +0 -216
  22. python_katlas-0.2.0/katlas/clustering.py +0 -142
  23. python_katlas-0.2.0/katlas/common.py +0 -4
  24. python_katlas-0.2.0/katlas/core.py +0 -6
  25. python_katlas-0.2.0/katlas/data.py +0 -455
  26. python_katlas-0.2.0/katlas/dnn.py +0 -384
  27. python_katlas-0.2.0/katlas/feature.py +0 -320
  28. python_katlas-0.2.0/katlas/plot.py +0 -924
  29. python_katlas-0.2.0/katlas/pssm.py +0 -844
  30. python_katlas-0.2.0/katlas/statistics.py +0 -102
  31. python_katlas-0.2.0/katlas/train.py +0 -207
  32. python_katlas-0.2.0/pyproject.toml +0 -11
  33. python_katlas-0.2.0/python_katlas.egg-info/not-zip-safe +0 -1
  34. python_katlas-0.2.0/python_katlas.egg-info/requires.txt +0 -27
  35. python_katlas-0.2.0/settings.ini +0 -40
  36. python_katlas-0.2.0/setup.py +0 -57
  37. {python_katlas-0.2.0 → python_katlas-0.2.2}/LICENSE +0 -0
  38. {python_katlas-0.2.0 → python_katlas-0.2.2}/MANIFEST.in +0 -0
  39. {python_katlas-0.2.0 → python_katlas-0.2.2}/python_katlas.egg-info/dependency_links.txt +0 -0
  40. {python_katlas-0.2.0 → python_katlas-0.2.2}/python_katlas.egg-info/entry_points.txt +0 -0
  41. {python_katlas-0.2.0 → python_katlas-0.2.2}/python_katlas.egg-info/top_level.txt +0 -0
  42. {python_katlas-0.2.0 → python_katlas-0.2.2}/setup.cfg +0 -0
@@ -1,62 +1,35 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-katlas
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: tools for predicting kinome specificities
5
- Home-page: https://github.com/sky1ove/katlas
6
- Author: lily
7
- Author-email: lcai888666@gmail.com
8
- License: Apache Software License 2.0
9
- Keywords: nbdev jupyter notebook python
10
- Classifier: Development Status :: 4 - Beta
11
- Classifier: Intended Audience :: Developers
5
+ Author-email: lily <lcai888666@gmail.com>
6
+ License: Apache-2.0
7
+ Project-URL: Repository, https://github.com/sky1ove/katlas
8
+ Project-URL: Documentation, https://sky1ove.github.io/katlas
9
+ Keywords: nbdev,jupyter,notebook,python
12
10
  Classifier: Natural Language :: English
13
- Classifier: Programming Language :: Python :: 3.7
14
- Classifier: Programming Language :: Python :: 3.8
15
- Classifier: Programming Language :: Python :: 3.9
16
- Classifier: Programming Language :: Python :: 3.10
17
- Classifier: License :: OSI Approved :: Apache Software License
18
- Requires-Python: >=3.7
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Requires-Python: >=3.10
19
16
  Description-Content-Type: text/markdown
20
17
  License-File: LICENSE
21
18
  Requires-Dist: pandas
22
19
  Requires-Dist: gdown
23
- Requires-Dist: statsmodels
24
- Requires-Dist: statannotations
25
- Requires-Dist: fastparquet
26
20
  Requires-Dist: pyarrow
27
21
  Requires-Dist: tqdm
28
22
  Requires-Dist: logomaker-kinase
29
23
  Requires-Dist: seaborn
30
- Requires-Dist: bokeh
31
24
  Requires-Dist: reactome2py
32
- Requires-Dist: adjustText
33
25
  Requires-Dist: scikit-learn
34
- Requires-Dist: umap-learn
35
- Requires-Dist: ipywidgets
36
26
  Requires-Dist: biopython
27
+ Requires-Dist: filelock>=3.25.2
28
+ Requires-Dist: python-kplot>=0.0.3
37
29
  Provides-Extra: dev
38
30
  Requires-Dist: nbdev; extra == "dev"
39
- Requires-Dist: pyngrok; extra == "dev"
40
- Requires-Dist: fastai; extra == "dev"
41
- Requires-Dist: fairscale; extra == "dev"
42
- Requires-Dist: fair-esm; extra == "dev"
43
- Requires-Dist: rdkit; extra == "dev"
44
- Requires-Dist: openpyxl; extra == "dev"
45
- Requires-Dist: transformers; extra == "dev"
46
- Requires-Dist: sentencepiece; extra == "dev"
47
- Dynamic: author
48
- Dynamic: author-email
49
- Dynamic: classifier
50
- Dynamic: description
51
- Dynamic: description-content-type
52
- Dynamic: home-page
53
- Dynamic: keywords
54
- Dynamic: license
31
+ Requires-Dist: jupyterlab>=3.6.8; extra == "dev"
55
32
  Dynamic: license-file
56
- Dynamic: provides-extra
57
- Dynamic: requires-dist
58
- Dynamic: requires-python
59
- Dynamic: summary
60
33
 
61
34
  # KATLAS
62
35
 
@@ -94,11 +67,6 @@ helpful to your research.
94
67
  and [CPTAC](https://pdc.cancer.gov/pdc/cptac-pancancer) /
95
68
  [LinkedOmics](https://academic.oup.com/nar/article/46/D1/D956/4607804)
96
69
 
97
- ## Reproduce datasets & figures
98
-
99
- Follow the instructions in katlas_raw:
100
- https://github.com/sky1ove/katlas_raw
101
-
102
70
  ## Web applications
103
71
 
104
72
  Users can now run the analysis directly on the web without needing to
@@ -109,21 +77,10 @@ Check out our latest web platform:
109
77
 
110
78
  ## Install
111
79
 
112
- UV:
113
-
114
80
  ``` bash
115
- uv add -U python-katlas
81
+ pip install python-katlas
116
82
  ```
117
83
 
118
- pip:
119
-
120
- ``` bash
121
- pip install -U python-katlas
122
- ```
123
-
124
- If using machine-learning related modules, need to install development
125
- verison: `pip install -U "python-katlas[dev]"`
126
-
127
84
  ## Import
128
85
 
129
86
  ``` python
@@ -34,11 +34,6 @@ helpful to your research.
34
34
  and [CPTAC](https://pdc.cancer.gov/pdc/cptac-pancancer) /
35
35
  [LinkedOmics](https://academic.oup.com/nar/article/46/D1/D956/4607804)
36
36
 
37
- ## Reproduce datasets & figures
38
-
39
- Follow the instructions in katlas_raw:
40
- https://github.com/sky1ove/katlas_raw
41
-
42
37
  ## Web applications
43
38
 
44
39
  Users can now run the analysis directly on the web without needing to
@@ -49,21 +44,10 @@ Check out our latest web platform:
49
44
 
50
45
  ## Install
51
46
 
52
- UV:
53
-
54
- ``` bash
55
- uv add -U python-katlas
56
- ```
57
-
58
- pip:
59
-
60
47
  ``` bash
61
- pip install -U python-katlas
48
+ pip install python-katlas
62
49
  ```
63
50
 
64
- If using machine-learning related modules, need to install development
65
- verison: `pip install -U "python-katlas[dev]"`
66
-
67
51
  ## Import
68
52
 
69
53
  ``` python
@@ -0,0 +1 @@
1
+ __version__ = "0.2.2"
@@ -0,0 +1,141 @@
1
+ # Autogenerated by nbdev
2
+
3
+ d = { 'settings': { 'branch': 'main',
4
+ 'doc_baseurl': '/katlas',
5
+ 'doc_host': 'https://sky1ove.github.io',
6
+ 'git_url': 'https://github.com/sky1ove/katlas',
7
+ 'lib_path': 'katlas'},
8
+ 'syms': { 'katlas.common': {},
9
+ 'katlas.compare': { 'katlas.compare.cosine_overall_flat': ('compare.html#cosine_overall_flat', 'katlas/compare.py'),
10
+ 'katlas.compare.cosine_similarity': ('compare.html#cosine_similarity', 'katlas/compare.py'),
11
+ 'katlas.compare.js_divergence': ('compare.html#js_divergence', 'katlas/compare.py'),
12
+ 'katlas.compare.js_divergence_flat': ('compare.html#js_divergence_flat', 'katlas/compare.py'),
13
+ 'katlas.compare.js_similarity': ('compare.html#js_similarity', 'katlas/compare.py'),
14
+ 'katlas.compare.js_similarity_flat': ('compare.html#js_similarity_flat', 'katlas/compare.py'),
15
+ 'katlas.compare.kl_divergence': ('compare.html#kl_divergence', 'katlas/compare.py'),
16
+ 'katlas.compare.kl_divergence_flat': ('compare.html#kl_divergence_flat', 'katlas/compare.py')},
17
+ 'katlas.data': { 'katlas.data.CPTAC': ('data.html#cptac', 'katlas/data.py'),
18
+ 'katlas.data.CPTAC._read_file': ('data.html#cptac._read_file', 'katlas/data.py'),
19
+ 'katlas.data.CPTAC.get_id': ('data.html#cptac.get_id', 'katlas/data.py'),
20
+ 'katlas.data.CPTAC.list_cancer': ('data.html#cptac.list_cancer', 'katlas/data.py'),
21
+ 'katlas.data.Data': ('data.html#data', 'katlas/data.py'),
22
+ 'katlas.data.Data.clear_cache': ('data.html#data.clear_cache', 'katlas/data.py'),
23
+ 'katlas.data.Data.download': ('data.html#data.download', 'katlas/data.py'),
24
+ 'katlas.data.Data.get_aa_info': ('data.html#data.get_aa_info', 'katlas/data.py'),
25
+ 'katlas.data.Data.get_aa_morgan': ('data.html#data.get_aa_morgan', 'katlas/data.py'),
26
+ 'katlas.data.Data.get_aa_rdkit': ('data.html#data.get_aa_rdkit', 'katlas/data.py'),
27
+ 'katlas.data.Data.get_cddm': ('data.html#data.get_cddm', 'katlas/data.py'),
28
+ 'katlas.data.Data.get_cddm_LO': ('data.html#data.get_cddm_lo', 'katlas/data.py'),
29
+ 'katlas.data.Data.get_cddm_LO_upper': ('data.html#data.get_cddm_lo_upper', 'katlas/data.py'),
30
+ 'katlas.data.Data.get_cddm_upper': ('data.html#data.get_cddm_upper', 'katlas/data.py'),
31
+ 'katlas.data.Data.get_combine_site_phosphorylated': ( 'data.html#data.get_combine_site_phosphorylated',
32
+ 'katlas/data.py'),
33
+ 'katlas.data.Data.get_combine_site_psp_ochoa': ('data.html#data.get_combine_site_psp_ochoa', 'katlas/data.py'),
34
+ 'katlas.data.Data.get_cptac_ensembl_site': ('data.html#data.get_cptac_ensembl_site', 'katlas/data.py'),
35
+ 'katlas.data.Data.get_cptac_gene_site': ('data.html#data.get_cptac_gene_site', 'katlas/data.py'),
36
+ 'katlas.data.Data.get_cptac_unique_site': ('data.html#data.get_cptac_unique_site', 'katlas/data.py'),
37
+ 'katlas.data.Data.get_human_site': ('data.html#data.get_human_site', 'katlas/data.py'),
38
+ 'katlas.data.Data.get_kd_uniprot': ('data.html#data.get_kd_uniprot', 'katlas/data.py'),
39
+ 'katlas.data.Data.get_kinase_info': ('data.html#data.get_kinase_info', 'katlas/data.py'),
40
+ 'katlas.data.Data.get_kinase_uniprot': ('data.html#data.get_kinase_uniprot', 'katlas/data.py'),
41
+ 'katlas.data.Data.get_ks_background': ('data.html#data.get_ks_background', 'katlas/data.py'),
42
+ 'katlas.data.Data.get_ks_dataset': ('data.html#data.get_ks_dataset', 'katlas/data.py'),
43
+ 'katlas.data.Data.get_ks_unique': ('data.html#data.get_ks_unique', 'katlas/data.py'),
44
+ 'katlas.data.Data.get_num_dict': ('data.html#data.get_num_dict', 'katlas/data.py'),
45
+ 'katlas.data.Data.get_ochoa_site': ('data.html#data.get_ochoa_site', 'katlas/data.py'),
46
+ 'katlas.data.Data.get_psp_human_site': ('data.html#data.get_psp_human_site', 'katlas/data.py'),
47
+ 'katlas.data.Data.get_pspa': ('data.html#data.get_pspa', 'katlas/data.py'),
48
+ 'katlas.data.Data.get_pspa_raw': ('data.html#data.get_pspa_raw', 'katlas/data.py'),
49
+ 'katlas.data.Data.get_pspa_scale': ('data.html#data.get_pspa_scale', 'katlas/data.py'),
50
+ 'katlas.data.Data.get_pspa_st': ('data.html#data.get_pspa_st', 'katlas/data.py'),
51
+ 'katlas.data.Data.get_pspa_st_pct': ('data.html#data.get_pspa_st_pct', 'katlas/data.py'),
52
+ 'katlas.data.Data.get_pspa_tyr': ('data.html#data.get_pspa_tyr', 'katlas/data.py'),
53
+ 'katlas.data.Data.get_pspa_tyr_pct': ('data.html#data.get_pspa_tyr_pct', 'katlas/data.py'),
54
+ 'katlas.data.Data.get_reactome_pathway': ('data.html#data.get_reactome_pathway', 'katlas/data.py'),
55
+ 'katlas.data.Data.get_reactome_pathway_lo': ('data.html#data.get_reactome_pathway_lo', 'katlas/data.py'),
56
+ 'katlas.data.Data.read_file': ('data.html#data.read_file', 'katlas/data.py'),
57
+ 'katlas.data._default_dataset_dir': ('data.html#_default_dataset_dir', 'katlas/data.py'),
58
+ 'katlas.data._normalize_dataset_dir': ('data.html#_normalize_dataset_dir', 'katlas/data.py'),
59
+ 'katlas.data._normalize_required_files': ('data.html#_normalize_required_files', 'katlas/data.py'),
60
+ 'katlas.data._read_dataset_file_cached': ('data.html#_read_dataset_file_cached', 'katlas/data.py')},
61
+ 'katlas.hierarchical': { 'katlas.hierarchical.get_1d_js': ('hierarchical.html#get_1d_js', 'katlas/hierarchical.py'),
62
+ 'katlas.hierarchical.get_1d_js_parallel': ( 'hierarchical.html#get_1d_js_parallel',
63
+ 'katlas/hierarchical.py')},
64
+ 'katlas.lo': { 'katlas.lo.get_pssm_LO': ('lo.html#get_pssm_lo', 'katlas/lo.py'),
65
+ 'katlas.lo.get_pssm_LO_flat': ('lo.html#get_pssm_lo_flat', 'katlas/lo.py'),
66
+ 'katlas.lo.plot_logo_LO': ('lo.html#plot_logo_lo', 'katlas/lo.py'),
67
+ 'katlas.lo.plot_logo_heatmap_LO': ('lo.html#plot_logo_heatmap_lo', 'katlas/lo.py')},
68
+ 'katlas.pathway': { 'katlas.pathway.add_reactome_ref': ('pathway.html#add_reactome_ref', 'katlas/pathway.py'),
69
+ 'katlas.pathway.get_overlap': ('pathway.html#get_overlap', 'katlas/pathway.py'),
70
+ 'katlas.pathway.get_reactome': ('pathway.html#get_reactome', 'katlas/pathway.py'),
71
+ 'katlas.pathway.get_reactome_raw': ('pathway.html#get_reactome_raw', 'katlas/pathway.py'),
72
+ 'katlas.pathway.plot_path': ('pathway.html#plot_path', 'katlas/pathway.py'),
73
+ 'katlas.pathway.query_reactome': ('pathway.html#query_reactome', 'katlas/pathway.py')},
74
+ 'katlas.plot': { 'katlas.plot.change_center_name': ('plot.html#change_center_name', 'katlas/plot.py'),
75
+ 'katlas.plot.convert_logo_df': ('plot.html#convert_logo_df', 'katlas/plot.py'),
76
+ 'katlas.plot.get_logo_IC': ('plot.html#get_logo_ic', 'katlas/plot.py'),
77
+ 'katlas.plot.get_pos_min_max': ('plot.html#get_pos_min_max', 'katlas/plot.py'),
78
+ 'katlas.plot.pSTY2sty': ('plot.html#psty2sty', 'katlas/plot.py'),
79
+ 'katlas.plot.plot_heatmap': ('plot.html#plot_heatmap', 'katlas/plot.py'),
80
+ 'katlas.plot.plot_heatmap_simple': ('plot.html#plot_heatmap_simple', 'katlas/plot.py'),
81
+ 'katlas.plot.plot_logo': ('plot.html#plot_logo', 'katlas/plot.py'),
82
+ 'katlas.plot.plot_logo_heatmap': ('plot.html#plot_logo_heatmap', 'katlas/plot.py'),
83
+ 'katlas.plot.plot_logo_raw': ('plot.html#plot_logo_raw', 'katlas/plot.py'),
84
+ 'katlas.plot.plot_logos': ('plot.html#plot_logos', 'katlas/plot.py'),
85
+ 'katlas.plot.plot_logos_idx': ('plot.html#plot_logos_idx', 'katlas/plot.py'),
86
+ 'katlas.plot.plot_two_heatmaps': ('plot.html#plot_two_heatmaps', 'katlas/plot.py'),
87
+ 'katlas.plot.scale_pos_neg_values': ('plot.html#scale_pos_neg_values', 'katlas/plot.py'),
88
+ 'katlas.plot.scale_zero_position': ('plot.html#scale_zero_position', 'katlas/plot.py'),
89
+ 'katlas.plot.sty2pSTY': ('plot.html#sty2psty', 'katlas/plot.py'),
90
+ 'katlas.plot.sty2pSTY_df': ('plot.html#sty2psty_df', 'katlas/plot.py')},
91
+ 'katlas.pspa': { 'katlas.pspa.get_logo': ('pspa.html#get_logo', 'katlas/pspa.py'),
92
+ 'katlas.pspa.get_one_kinase': ('pspa.html#get_one_kinase', 'katlas/pspa.py'),
93
+ 'katlas.pspa.plot_logo_heatmap_pspa': ('pspa.html#plot_logo_heatmap_pspa', 'katlas/pspa.py'),
94
+ 'katlas.pspa.plot_logo_pspa': ('pspa.html#plot_logo_pspa', 'katlas/pspa.py'),
95
+ 'katlas.pspa.preprocess_pspa': ('pspa.html#preprocess_pspa', 'katlas/pspa.py'),
96
+ 'katlas.pspa.raw2norm': ('pspa.html#raw2norm', 'katlas/pspa.py')},
97
+ 'katlas.pssm': { 'katlas.pssm._clean_zero': ('pssm.html#_clean_zero', 'katlas/pssm.py'),
98
+ 'katlas.pssm.clean_zero_normalize': ('pssm.html#clean_zero_normalize', 'katlas/pssm.py'),
99
+ 'katlas.pssm.flatten_pssm': ('pssm.html#flatten_pssm', 'katlas/pssm.py'),
100
+ 'katlas.pssm.get_IC': ('pssm.html#get_ic', 'katlas/pssm.py'),
101
+ 'katlas.pssm.get_IC_flat': ('pssm.html#get_ic_flat', 'katlas/pssm.py'),
102
+ 'katlas.pssm.get_cluster_pssms': ('pssm.html#get_cluster_pssms', 'katlas/pssm.py'),
103
+ 'katlas.pssm.get_entropy': ('pssm.html#get_entropy', 'katlas/pssm.py'),
104
+ 'katlas.pssm.get_entropy_flat': ('pssm.html#get_entropy_flat', 'katlas/pssm.py'),
105
+ 'katlas.pssm.get_prob': ('pssm.html#get_prob', 'katlas/pssm.py'),
106
+ 'katlas.pssm.get_pssm_seq_labels': ('pssm.html#get_pssm_seq_labels', 'katlas/pssm.py'),
107
+ 'katlas.pssm.get_pssm_weight': ('pssm.html#get_pssm_weight', 'katlas/pssm.py'),
108
+ 'katlas.pssm.get_specificity': ('pssm.html#get_specificity', 'katlas/pssm.py'),
109
+ 'katlas.pssm.get_specificity_flat': ('pssm.html#get_specificity_flat', 'katlas/pssm.py'),
110
+ 'katlas.pssm.pssm_to_seq': ('pssm.html#pssm_to_seq', 'katlas/pssm.py'),
111
+ 'katlas.pssm.recover_pssm': ('pssm.html#recover_pssm', 'katlas/pssm.py')},
112
+ 'katlas.scoring': { 'katlas.scoring.Params': ('scoring.html#params', 'katlas/scoring.py'),
113
+ 'katlas.scoring.STY2sty': ('scoring.html#sty2sty', 'katlas/scoring.py'),
114
+ 'katlas.scoring.cut_seq': ('scoring.html#cut_seq', 'katlas/scoring.py'),
115
+ 'katlas.scoring.duplicate_ref_zero': ('scoring.html#duplicate_ref_zero', 'katlas/scoring.py'),
116
+ 'katlas.scoring.get_dict': ('scoring.html#get_dict', 'katlas/scoring.py'),
117
+ 'katlas.scoring.get_pct': ('scoring.html#get_pct', 'katlas/scoring.py'),
118
+ 'katlas.scoring.get_pct_df': ('scoring.html#get_pct_df', 'katlas/scoring.py'),
119
+ 'katlas.scoring.multiply': ('scoring.html#multiply', 'katlas/scoring.py'),
120
+ 'katlas.scoring.multiply_generic': ('scoring.html#multiply_generic', 'katlas/scoring.py'),
121
+ 'katlas.scoring.multiply_pspa': ('scoring.html#multiply_pspa', 'katlas/scoring.py'),
122
+ 'katlas.scoring.predict_kinase': ('scoring.html#predict_kinase', 'katlas/scoring.py'),
123
+ 'katlas.scoring.predict_kinase_df': ('scoring.html#predict_kinase_df', 'katlas/scoring.py'),
124
+ 'katlas.scoring.preprocess_ref': ('scoring.html#preprocess_ref', 'katlas/scoring.py'),
125
+ 'katlas.scoring.sumup': ('scoring.html#sumup', 'katlas/scoring.py')},
126
+ 'katlas.utils': { 'katlas.utils.aln2df': ('utils.html#aln2df', 'katlas/utils.py'),
127
+ 'katlas.utils.check_seq': ('utils.html#check_seq', 'katlas/utils.py'),
128
+ 'katlas.utils.check_seqs': ('utils.html#check_seqs', 'katlas/utils.py'),
129
+ 'katlas.utils.clean_feat': ('utils.html#clean_feat', 'katlas/utils.py'),
130
+ 'katlas.utils.extract_site_seq': ('utils.html#extract_site_seq', 'katlas/utils.py'),
131
+ 'katlas.utils.get_aln_freq': ('utils.html#get_aln_freq', 'katlas/utils.py'),
132
+ 'katlas.utils.get_fasta': ('utils.html#get_fasta', 'katlas/utils.py'),
133
+ 'katlas.utils.get_subfamily_color': ('utils.html#get_subfamily_color', 'katlas/utils.py'),
134
+ 'katlas.utils.phosphorylate_seq': ('utils.html#phosphorylate_seq', 'katlas/utils.py'),
135
+ 'katlas.utils.phosphorylate_seq_df': ('utils.html#phosphorylate_seq_df', 'katlas/utils.py'),
136
+ 'katlas.utils.prepare_path': ('utils.html#prepare_path', 'katlas/utils.py'),
137
+ 'katlas.utils.remove_hi_corr': ('utils.html#remove_hi_corr', 'katlas/utils.py'),
138
+ 'katlas.utils.run_clustalo': ('utils.html#run_clustalo', 'katlas/utils.py'),
139
+ 'katlas.utils.standardize': ('utils.html#standardize', 'katlas/utils.py'),
140
+ 'katlas.utils.validate_site': ('utils.html#validate_site', 'katlas/utils.py'),
141
+ 'katlas.utils.validate_site_df': ('utils.html#validate_site_df', 'katlas/utils.py')}}}
@@ -0,0 +1,10 @@
1
+ from .compare import *
2
+ from .data import *
3
+ from .hierarchical import *
4
+ from .lo import *
5
+ from .pathway import *
6
+ from .plot import *
7
+ from .pspa import *
8
+ from .pssm import *
9
+ from .scoring import *
10
+ from .utils import *
@@ -0,0 +1,118 @@
1
+ """algorithms about comparing two PSSMs"""
2
+
3
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/06_compare.ipynb.
4
+
5
+ # %% auto #0
6
+ __all__ = ['kl_divergence', 'kl_divergence_flat', 'js_divergence', 'js_divergence_flat', 'js_similarity', 'js_similarity_flat',
7
+ 'cosine_similarity', 'cosine_overall_flat']
8
+
9
+ # %% ../nbs/06_compare.ipynb #76949d54-f519-4bc6-9da5-d48a05bbfe69
10
+ import numpy as np, pandas as pd
11
+ from .pssm import EPSILON
12
+
13
+ # %% ../nbs/06_compare.ipynb #b1ef2ee7-70fd-4703-96b4-1c1a9053987f
14
+ def kl_divergence(p1, # target pssm p (array-like, shape: (AA, positions))
15
+ p2, # pred pssm q (array-like, same shape as p1)
16
+ ):
17
+ """
18
+ KL divergence D_KL(p1 || p2) over positions.
19
+
20
+ p1 and p2 are arrays (df or np) with index as aa and column as position.
21
+ Returns average divergence across positions if mean=True, else per-position.
22
+ """
23
+ if p1.shape != p2.shape: raise ValueError("Shapes of p1 and p2 must match.")
24
+ p1, p2 = p1.align(p2, join='inner', axis=None)
25
+ # Mask invalid positions (both zero)
26
+ valid = (p1 + p2) > 0
27
+ p1 = np.where(valid, p1, 0.0)
28
+ p2 = np.where(valid, p2, 0.0)
29
+
30
+ # KL divergence: sum_x p1(x) log(p1(x)/p2(x))
31
+ kl = np.sum(p1 * np.log((p1 + EPSILON) / (p2 + EPSILON)), axis=0)
32
+
33
+ return kl
34
+
35
+ # %% ../nbs/06_compare.ipynb #71084bb3-169a-4dc2-b5dd-1b802fda8225
36
+ def kl_divergence_flat(p1_flat, # pd.Series of target flattened pssm p
37
+ p2_flat, # pd.Series of pred flattened pssm q
38
+ ):
39
+
40
+ "p1 and p2 are two flattened pd.Series with index as aa and column as position"
41
+ kld = kl_divergence(p1_flat,p2_flat) # do not do js.mean() because it's 1d
42
+ total_position = len(p1_flat.index.str.extract(r'(-?\d+)').drop_duplicates())
43
+ return float(kld/total_position)
44
+
45
+ # %% ../nbs/06_compare.ipynb #eca99343-fbcd-48c2-a1ff-88af31fd2346
46
+ def js_divergence(p1, # pssm
47
+ p2, # pssm
48
+ index=True,
49
+ ):
50
+ "p1 and p2 are two arrays (df or np) with index as aa and column as position"
51
+ if p1.shape != p2.shape: raise ValueError("Shapes of p1 and p2 must match.")
52
+ p1, p2 = p1.align(p2, join='inner', axis=None)
53
+ if index: positions=p1.columns
54
+ valid = (p1 + p2) > 0
55
+ p1 = np.where(valid, p1, 0.0)
56
+ p2 = np.where(valid, p2, 0.0)
57
+
58
+ m = 0.5 * (p1 + p2)
59
+
60
+ js = 0.5 * np.sum(p1 * np.log((p1+ EPSILON) / (m + EPSILON)), axis=0) + \
61
+ 0.5 * np.sum(p2 * np.log((p2+ EPSILON) / (m + EPSILON)), axis=0)
62
+ return pd.Series(js,index=positions) if index else js
63
+
64
+ # %% ../nbs/06_compare.ipynb #37553737-13b3-4461-ad93-fe4cf863f25b
65
+ def js_divergence_flat(p1_flat, # pd.Series of flattened pssm
66
+ p2_flat, # pd.Series of flattened pssm
67
+ ):
68
+
69
+ "p1 and p2 are two flattened pd.Series with index as aa and column as position"
70
+ js = js_divergence(p1_flat,p2_flat,index=False)
71
+ total_position = len(p1_flat.index.str.extract(r'(-?\d+)').drop_duplicates())
72
+ return float(js/total_position)
73
+
74
+ # %% ../nbs/06_compare.ipynb #7e480e54-c5de-4726-b208-c531e07a2adc
75
+ def js_similarity(pssm1,pssm2):
76
+ "Convert JSD to bits to be in range (0,1) then 1-JSD."
77
+ distance = js_divergence(pssm1,pssm2)/np.log(2)
78
+ similarity = 1-distance
79
+ return similarity
80
+
81
+ # %% ../nbs/06_compare.ipynb #0c51e8b5-df29-431b-8013-ff46388b4872
82
+ def js_similarity_flat(p1_flat,p2_flat):
83
+ "Convert JSD to bits to be in range (0,1) then 1-JSD. "
84
+ return 1-(js_divergence_flat(p1_flat,p2_flat)/np.log(2))
85
+
86
+ # %% ../nbs/06_compare.ipynb #8d2e228a-8543-4174-8c24-ce6ded317c8d
87
+ def cosine_similarity(pssm1: pd.DataFrame, pssm2: pd.DataFrame) -> pd.Series:
88
+ "Compute cosine similarity per position (column) between two PSSMs."
89
+
90
+ if pssm1.shape != pssm2.shape: raise ValueError("PSSMs must have the same shape")
91
+
92
+ sims = {}
93
+ for pos in pssm1.columns:
94
+ v1 = pssm1[pos]
95
+ v2 = pssm2[pos]
96
+ v1,v2 = v1.align(v2, join='inner') # make sure the aa index match with each other
97
+
98
+ norm1 = np.linalg.norm(v1)
99
+ norm2 = np.linalg.norm(v2)
100
+
101
+ if norm1 == 0 or norm2 == 0:
102
+ sims[pos] = 0.0
103
+ else:
104
+ dot_product = np.dot(v1,v2) # sum(v1*v2)
105
+ sims[pos] = dot_product / (norm1 * norm2)
106
+
107
+ return pd.Series(sims)
108
+
109
+ # %% ../nbs/06_compare.ipynb #d830fbaa-4a9f-4d5d-98ba-289fc91bff8e
110
+ def cosine_overall_flat(pssm1_flat, pssm2_flat):
111
+ """Compute overall cosine similarity between two PSSMs (flattened)."""
112
+ # match index for dot product
113
+ pssm1_flat, pssm2_flat = pssm1_flat.align(pssm2_flat, join='inner')
114
+ norm1 = np.linalg.norm(pssm1_flat)
115
+ norm2 = np.linalg.norm(pssm2_flat)
116
+ if norm1 == 0 or norm2 == 0: return 0.0
117
+ dot_product = sum(pssm1_flat*pssm2_flat) # np.dot(pssm1_flat, pssm2_flat)
118
+ return dot_product/ (norm1 * norm2)