python-katlas 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- katlas/__init__.py +1 -1
- katlas/_modidx.py +187 -80
- katlas/clustering.py +142 -0
- katlas/common.py +4 -0
- katlas/core.py +6 -816
- katlas/data.py +455 -0
- katlas/dnn.py +384 -0
- katlas/feature.py +136 -111
- katlas/pathway.py +170 -0
- katlas/plot.py +541 -287
- katlas/pssm.py +844 -0
- katlas/score.py +322 -0
- katlas/statistics.py +102 -0
- katlas/train.py +51 -77
- katlas/utils.py +189 -0
- {python_katlas-0.1.4.dist-info → python_katlas-0.2.0.dist-info}/METADATA +252 -133
- python_katlas-0.2.0.dist-info/RECORD +21 -0
- {python_katlas-0.1.4.dist-info → python_katlas-0.2.0.dist-info}/WHEEL +1 -1
- {python_katlas-0.1.4.dist-info → python_katlas-0.2.0.dist-info}/entry_points.txt +0 -0
- {python_katlas-0.1.4.dist-info → python_katlas-0.2.0.dist-info/licenses}/LICENSE +0 -0
- {python_katlas-0.1.4.dist-info → python_katlas-0.2.0.dist-info}/top_level.txt +0 -0
- katlas/dl.py +0 -357
- katlas/imports.py +0 -7
- python_katlas-0.1.4.dist-info/RECORD +0 -14
katlas/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.2.0"
|
katlas/_modidx.py
CHANGED
|
@@ -5,105 +5,212 @@ d = { 'settings': { 'branch': 'main',
|
|
|
5
5
|
'doc_host': 'https://sky1ove.github.io',
|
|
6
6
|
'git_url': 'https://github.com/sky1ove/katlas',
|
|
7
7
|
'lib_path': 'katlas'},
|
|
8
|
-
'syms': { 'katlas.
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
'katlas.
|
|
23
|
-
|
|
24
|
-
'katlas.
|
|
25
|
-
'katlas.
|
|
26
|
-
'katlas.
|
|
27
|
-
'katlas.
|
|
28
|
-
'katlas.
|
|
29
|
-
'katlas.
|
|
30
|
-
'katlas.
|
|
31
|
-
'katlas.
|
|
32
|
-
'katlas.
|
|
33
|
-
'katlas.
|
|
34
|
-
'katlas.
|
|
35
|
-
|
|
36
|
-
'katlas.
|
|
37
|
-
'katlas.
|
|
38
|
-
'katlas.
|
|
39
|
-
'katlas.
|
|
40
|
-
'katlas.
|
|
41
|
-
'katlas.
|
|
42
|
-
'katlas.
|
|
43
|
-
'katlas.
|
|
44
|
-
'katlas.
|
|
45
|
-
'katlas.
|
|
46
|
-
'katlas.
|
|
47
|
-
'katlas.
|
|
48
|
-
'katlas.
|
|
49
|
-
'katlas.
|
|
50
|
-
'katlas.
|
|
51
|
-
'katlas.
|
|
52
|
-
'katlas.
|
|
53
|
-
'katlas.
|
|
54
|
-
'katlas.
|
|
55
|
-
'katlas.
|
|
56
|
-
'katlas.
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
8
|
+
'syms': { 'katlas.clustering': { 'katlas.clustering.get_1d_distance': ('hierarchical.html#get_1d_distance', 'katlas/clustering.py'),
|
|
9
|
+
'katlas.clustering.get_1d_distance_parallel': ( 'hierarchical.html#get_1d_distance_parallel',
|
|
10
|
+
'katlas/clustering.py'),
|
|
11
|
+
'katlas.clustering.get_1d_js': ('hierarchical.html#get_1d_js', 'katlas/clustering.py'),
|
|
12
|
+
'katlas.clustering.get_1d_js_parallel': ('hierarchical.html#get_1d_js_parallel', 'katlas/clustering.py'),
|
|
13
|
+
'katlas.clustering.get_Z': ('hierarchical.html#get_z', 'katlas/clustering.py'),
|
|
14
|
+
'katlas.clustering.get_distance': ('hierarchical.html#get_distance', 'katlas/clustering.py'),
|
|
15
|
+
'katlas.clustering.get_pssm_seq_labels': ( 'hierarchical.html#get_pssm_seq_labels',
|
|
16
|
+
'katlas/clustering.py'),
|
|
17
|
+
'katlas.clustering.plot_dendrogram': ('hierarchical.html#plot_dendrogram', 'katlas/clustering.py'),
|
|
18
|
+
'katlas.clustering.pssm_to_seq': ('hierarchical.html#pssm_to_seq', 'katlas/clustering.py')},
|
|
19
|
+
'katlas.common': {},
|
|
20
|
+
'katlas.core': {},
|
|
21
|
+
'katlas.data': { 'katlas.data.CPTAC': ('data.html#cptac', 'katlas/data.py'),
|
|
22
|
+
'katlas.data.CPTAC._read_file': ('data.html#cptac._read_file', 'katlas/data.py'),
|
|
23
|
+
'katlas.data.CPTAC.get_id': ('data.html#cptac.get_id', 'katlas/data.py'),
|
|
24
|
+
'katlas.data.CPTAC.list_cancer': ('data.html#cptac.list_cancer', 'katlas/data.py'),
|
|
25
|
+
'katlas.data.Data': ('data.html#data', 'katlas/data.py'),
|
|
26
|
+
'katlas.data.Data.download': ('data.html#data.download', 'katlas/data.py'),
|
|
27
|
+
'katlas.data.Data.get_aa_info': ('data.html#data.get_aa_info', 'katlas/data.py'),
|
|
28
|
+
'katlas.data.Data.get_aa_morgan': ('data.html#data.get_aa_morgan', 'katlas/data.py'),
|
|
29
|
+
'katlas.data.Data.get_aa_rdkit': ('data.html#data.get_aa_rdkit', 'katlas/data.py'),
|
|
30
|
+
'katlas.data.Data.get_cddm': ('data.html#data.get_cddm', 'katlas/data.py'),
|
|
31
|
+
'katlas.data.Data.get_cddm_LO': ('data.html#data.get_cddm_lo', 'katlas/data.py'),
|
|
32
|
+
'katlas.data.Data.get_cddm_LO_upper': ('data.html#data.get_cddm_lo_upper', 'katlas/data.py'),
|
|
33
|
+
'katlas.data.Data.get_cddm_upper': ('data.html#data.get_cddm_upper', 'katlas/data.py'),
|
|
34
|
+
'katlas.data.Data.get_combine_site_phosphorylated': ( 'data.html#data.get_combine_site_phosphorylated',
|
|
35
|
+
'katlas/data.py'),
|
|
36
|
+
'katlas.data.Data.get_combine_site_psp_ochoa': ('data.html#data.get_combine_site_psp_ochoa', 'katlas/data.py'),
|
|
37
|
+
'katlas.data.Data.get_cptac_ensembl_site': ('data.html#data.get_cptac_ensembl_site', 'katlas/data.py'),
|
|
38
|
+
'katlas.data.Data.get_cptac_gene_site': ('data.html#data.get_cptac_gene_site', 'katlas/data.py'),
|
|
39
|
+
'katlas.data.Data.get_cptac_unique_site': ('data.html#data.get_cptac_unique_site', 'katlas/data.py'),
|
|
40
|
+
'katlas.data.Data.get_human_site': ('data.html#data.get_human_site', 'katlas/data.py'),
|
|
41
|
+
'katlas.data.Data.get_kd_uniprot': ('data.html#data.get_kd_uniprot', 'katlas/data.py'),
|
|
42
|
+
'katlas.data.Data.get_kinase_info': ('data.html#data.get_kinase_info', 'katlas/data.py'),
|
|
43
|
+
'katlas.data.Data.get_kinase_uniprot': ('data.html#data.get_kinase_uniprot', 'katlas/data.py'),
|
|
44
|
+
'katlas.data.Data.get_ks_background': ('data.html#data.get_ks_background', 'katlas/data.py'),
|
|
45
|
+
'katlas.data.Data.get_ks_dataset': ('data.html#data.get_ks_dataset', 'katlas/data.py'),
|
|
46
|
+
'katlas.data.Data.get_ks_unique': ('data.html#data.get_ks_unique', 'katlas/data.py'),
|
|
47
|
+
'katlas.data.Data.get_num_dict': ('data.html#data.get_num_dict', 'katlas/data.py'),
|
|
48
|
+
'katlas.data.Data.get_ochoa_site': ('data.html#data.get_ochoa_site', 'katlas/data.py'),
|
|
49
|
+
'katlas.data.Data.get_psp_human_site': ('data.html#data.get_psp_human_site', 'katlas/data.py'),
|
|
50
|
+
'katlas.data.Data.get_pspa': ('data.html#data.get_pspa', 'katlas/data.py'),
|
|
51
|
+
'katlas.data.Data.get_pspa_scale': ('data.html#data.get_pspa_scale', 'katlas/data.py'),
|
|
52
|
+
'katlas.data.Data.get_pspa_st': ('data.html#data.get_pspa_st', 'katlas/data.py'),
|
|
53
|
+
'katlas.data.Data.get_pspa_st_pct': ('data.html#data.get_pspa_st_pct', 'katlas/data.py'),
|
|
54
|
+
'katlas.data.Data.get_pspa_tyr': ('data.html#data.get_pspa_tyr', 'katlas/data.py'),
|
|
55
|
+
'katlas.data.Data.get_pspa_tyr_pct': ('data.html#data.get_pspa_tyr_pct', 'katlas/data.py'),
|
|
56
|
+
'katlas.data.Data.get_reactome_pathway': ('data.html#data.get_reactome_pathway', 'katlas/data.py'),
|
|
57
|
+
'katlas.data.Data.get_reactome_pathway_lo': ('data.html#data.get_reactome_pathway_lo', 'katlas/data.py'),
|
|
58
|
+
'katlas.data.Data.read_file': ('data.html#data.read_file', 'katlas/data.py')},
|
|
59
|
+
'katlas.dnn': { 'katlas.dnn.CE': ('dnn.html#ce', 'katlas/dnn.py'),
|
|
60
|
+
'katlas.dnn.CNN1D': ('dnn.html#cnn1d', 'katlas/dnn.py'),
|
|
61
|
+
'katlas.dnn.CNN1D.__init__': ('dnn.html#cnn1d.__init__', 'katlas/dnn.py'),
|
|
62
|
+
'katlas.dnn.CNN1D.forward': ('dnn.html#cnn1d.forward', 'katlas/dnn.py'),
|
|
63
|
+
'katlas.dnn.GeneralDataset': ('dnn.html#generaldataset', 'katlas/dnn.py'),
|
|
64
|
+
'katlas.dnn.GeneralDataset.__getitem__': ('dnn.html#generaldataset.__getitem__', 'katlas/dnn.py'),
|
|
65
|
+
'katlas.dnn.GeneralDataset.__init__': ('dnn.html#generaldataset.__init__', 'katlas/dnn.py'),
|
|
66
|
+
'katlas.dnn.GeneralDataset.__len__': ('dnn.html#generaldataset.__len__', 'katlas/dnn.py'),
|
|
67
|
+
'katlas.dnn.JSD': ('dnn.html#jsd', 'katlas/dnn.py'),
|
|
68
|
+
'katlas.dnn.KLD': ('dnn.html#kld', 'katlas/dnn.py'),
|
|
69
|
+
'katlas.dnn.MLP': ('dnn.html#mlp', 'katlas/dnn.py'),
|
|
70
|
+
'katlas.dnn.PSSM_model': ('dnn.html#pssm_model', 'katlas/dnn.py'),
|
|
71
|
+
'katlas.dnn.PSSM_model.__init__': ('dnn.html#pssm_model.__init__', 'katlas/dnn.py'),
|
|
72
|
+
'katlas.dnn.PSSM_model.forward': ('dnn.html#pssm_model.forward', 'katlas/dnn.py'),
|
|
73
|
+
'katlas.dnn.conv_wn': ('dnn.html#conv_wn', 'katlas/dnn.py'),
|
|
74
|
+
'katlas.dnn.init_weights': ('dnn.html#init_weights', 'katlas/dnn.py'),
|
|
75
|
+
'katlas.dnn.lin_wn': ('dnn.html#lin_wn', 'katlas/dnn.py'),
|
|
76
|
+
'katlas.dnn.predict_dl': ('dnn.html#predict_dl', 'katlas/dnn.py'),
|
|
77
|
+
'katlas.dnn.seed_everything': ('dnn.html#seed_everything', 'katlas/dnn.py'),
|
|
78
|
+
'katlas.dnn.train_dl': ('dnn.html#train_dl', 'katlas/dnn.py'),
|
|
79
|
+
'katlas.dnn.train_dl_cv': ('dnn.html#train_dl_cv', 'katlas/dnn.py')},
|
|
80
|
+
'katlas.feature': { 'katlas.feature.filter_range_columns': ('feature.html#filter_range_columns', 'katlas/feature.py'),
|
|
81
|
+
'katlas.feature.get_clusters_elbow': ('feature.html#get_clusters_elbow', 'katlas/feature.py'),
|
|
82
|
+
'katlas.feature.get_esm': ('feature.html#get_esm', 'katlas/feature.py'),
|
|
77
83
|
'katlas.feature.get_morgan': ('feature.html#get_morgan', 'katlas/feature.py'),
|
|
78
84
|
'katlas.feature.get_rdkit': ('feature.html#get_rdkit', 'katlas/feature.py'),
|
|
85
|
+
'katlas.feature.get_rdkit_3d': ('feature.html#get_rdkit_3d', 'katlas/feature.py'),
|
|
86
|
+
'katlas.feature.get_rdkit_all': ('feature.html#get_rdkit_all', 'katlas/feature.py'),
|
|
87
|
+
'katlas.feature.get_rdkit_df': ('feature.html#get_rdkit_df', 'katlas/feature.py'),
|
|
79
88
|
'katlas.feature.get_t5': ('feature.html#get_t5', 'katlas/feature.py'),
|
|
80
89
|
'katlas.feature.get_t5_bfd': ('feature.html#get_t5_bfd', 'katlas/feature.py'),
|
|
90
|
+
'katlas.feature.kmeans': ('feature.html#kmeans', 'katlas/feature.py'),
|
|
91
|
+
'katlas.feature.onehot_encode': ('feature.html#onehot_encode', 'katlas/feature.py'),
|
|
92
|
+
'katlas.feature.onehot_encode_df': ('feature.html#onehot_encode_df', 'katlas/feature.py'),
|
|
81
93
|
'katlas.feature.preprocess': ('feature.html#preprocess', 'katlas/feature.py'),
|
|
82
|
-
'katlas.feature.
|
|
83
|
-
'katlas.feature.
|
|
84
|
-
'katlas.
|
|
85
|
-
|
|
94
|
+
'katlas.feature.remove_hi_corr': ('feature.html#remove_hi_corr', 'katlas/feature.py'),
|
|
95
|
+
'katlas.feature.standardize': ('feature.html#standardize', 'katlas/feature.py')},
|
|
96
|
+
'katlas.pathway': { 'katlas.pathway.add_reactome_ref': ('pathway.html#add_reactome_ref', 'katlas/pathway.py'),
|
|
97
|
+
'katlas.pathway.get_overlap': ('pathway.html#get_overlap', 'katlas/pathway.py'),
|
|
98
|
+
'katlas.pathway.get_reactome': ('pathway.html#get_reactome', 'katlas/pathway.py'),
|
|
99
|
+
'katlas.pathway.get_reactome_raw': ('pathway.html#get_reactome_raw', 'katlas/pathway.py'),
|
|
100
|
+
'katlas.pathway.plot_path': ('pathway.html#plot_path', 'katlas/pathway.py'),
|
|
101
|
+
'katlas.pathway.query_reactome': ('pathway.html#query_reactome', 'katlas/pathway.py')},
|
|
102
|
+
'katlas.plot': { 'katlas.plot.add_stats': ('plot.html#add_stats', 'katlas/plot.py'),
|
|
103
|
+
'katlas.plot.calculate_pct': ('plot.html#calculate_pct', 'katlas/plot.py'),
|
|
86
104
|
'katlas.plot.get_AUCDF': ('plot.html#get_aucdf', 'katlas/plot.py'),
|
|
87
105
|
'katlas.plot.get_color_dict': ('plot.html#get_color_dict', 'katlas/plot.py'),
|
|
88
|
-
'katlas.plot.
|
|
89
|
-
'katlas.plot.
|
|
90
|
-
'katlas.plot.
|
|
106
|
+
'katlas.plot.get_hue_big': ('plot.html#get_hue_big', 'katlas/plot.py'),
|
|
107
|
+
'katlas.plot.get_plt_color': ('plot.html#get_plt_color', 'katlas/plot.py'),
|
|
108
|
+
'katlas.plot.get_similarity': ('plot.html#get_similarity', 'katlas/plot.py'),
|
|
109
|
+
'katlas.plot.get_subfamily_color': ('plot.html#get_subfamily_color', 'katlas/plot.py'),
|
|
91
110
|
'katlas.plot.plot_2d': ('plot.html#plot_2d', 'katlas/plot.py'),
|
|
92
111
|
'katlas.plot.plot_bar': ('plot.html#plot_bar', 'katlas/plot.py'),
|
|
93
112
|
'katlas.plot.plot_bokeh': ('plot.html#plot_bokeh', 'katlas/plot.py'),
|
|
94
113
|
'katlas.plot.plot_box': ('plot.html#plot_box', 'katlas/plot.py'),
|
|
95
114
|
'katlas.plot.plot_cluster': ('plot.html#plot_cluster', 'katlas/plot.py'),
|
|
115
|
+
'katlas.plot.plot_cnt': ('plot.html#plot_cnt', 'katlas/plot.py'),
|
|
116
|
+
'katlas.plot.plot_composition': ('plot.html#plot_composition', 'katlas/plot.py'),
|
|
96
117
|
'katlas.plot.plot_confusion_matrix': ('plot.html#plot_confusion_matrix', 'katlas/plot.py'),
|
|
97
118
|
'katlas.plot.plot_corr': ('plot.html#plot_corr', 'katlas/plot.py'),
|
|
98
119
|
'katlas.plot.plot_count': ('plot.html#plot_count', 'katlas/plot.py'),
|
|
99
120
|
'katlas.plot.plot_group_bar': ('plot.html#plot_group_bar', 'katlas/plot.py'),
|
|
100
|
-
'katlas.plot.plot_heatmap': ('plot.html#plot_heatmap', 'katlas/plot.py'),
|
|
101
121
|
'katlas.plot.plot_hist': ('plot.html#plot_hist', 'katlas/plot.py'),
|
|
122
|
+
'katlas.plot.plot_pie': ('plot.html#plot_pie', 'katlas/plot.py'),
|
|
102
123
|
'katlas.plot.plot_rank': ('plot.html#plot_rank', 'katlas/plot.py'),
|
|
124
|
+
'katlas.plot.plot_rel': ('plot.html#plot_rel', 'katlas/plot.py'),
|
|
125
|
+
'katlas.plot.plot_stacked': ('plot.html#plot_stacked', 'katlas/plot.py'),
|
|
126
|
+
'katlas.plot.plot_violin': ('plot.html#plot_violin', 'katlas/plot.py'),
|
|
127
|
+
'katlas.plot.reduce_feature': ('plot.html#reduce_feature', 'katlas/plot.py'),
|
|
128
|
+
'katlas.plot.save_pdf': ('plot.html#save_pdf', 'katlas/plot.py'),
|
|
129
|
+
'katlas.plot.save_show': ('plot.html#save_show', 'katlas/plot.py'),
|
|
130
|
+
'katlas.plot.save_svg': ('plot.html#save_svg', 'katlas/plot.py'),
|
|
103
131
|
'katlas.plot.set_sns': ('plot.html#set_sns', 'katlas/plot.py')},
|
|
104
|
-
'katlas.
|
|
132
|
+
'katlas.pssm': { 'katlas.pssm._clean_zero': ('pssm.html#_clean_zero', 'katlas/pssm.py'),
|
|
133
|
+
'katlas.pssm.change_center_name': ('pssm.html#change_center_name', 'katlas/pssm.py'),
|
|
134
|
+
'katlas.pssm.clean_zero_normalize': ('pssm.html#clean_zero_normalize', 'katlas/pssm.py'),
|
|
135
|
+
'katlas.pssm.convert_logo_df': ('pssm.html#convert_logo_df', 'katlas/pssm.py'),
|
|
136
|
+
'katlas.pssm.cosine_overall_flat': ('pssm.html#cosine_overall_flat', 'katlas/pssm.py'),
|
|
137
|
+
'katlas.pssm.cosine_similarity': ('pssm.html#cosine_similarity', 'katlas/pssm.py'),
|
|
138
|
+
'katlas.pssm.flatten_pssm': ('pssm.html#flatten_pssm', 'katlas/pssm.py'),
|
|
139
|
+
'katlas.pssm.get_IC': ('pssm.html#get_ic', 'katlas/pssm.py'),
|
|
140
|
+
'katlas.pssm.get_IC_flat': ('pssm.html#get_ic_flat', 'katlas/pssm.py'),
|
|
141
|
+
'katlas.pssm.get_cluster_pssms': ('pssm.html#get_cluster_pssms', 'katlas/pssm.py'),
|
|
142
|
+
'katlas.pssm.get_entropy': ('pssm.html#get_entropy', 'katlas/pssm.py'),
|
|
143
|
+
'katlas.pssm.get_entropy_flat': ('pssm.html#get_entropy_flat', 'katlas/pssm.py'),
|
|
144
|
+
'katlas.pssm.get_logo': ('pssm.html#get_logo', 'katlas/pssm.py'),
|
|
145
|
+
'katlas.pssm.get_logo_IC': ('pssm.html#get_logo_ic', 'katlas/pssm.py'),
|
|
146
|
+
'katlas.pssm.get_one_kinase': ('pssm.html#get_one_kinase', 'katlas/pssm.py'),
|
|
147
|
+
'katlas.pssm.get_pos_min_max': ('pssm.html#get_pos_min_max', 'katlas/pssm.py'),
|
|
148
|
+
'katlas.pssm.get_prob': ('pssm.html#get_prob', 'katlas/pssm.py'),
|
|
149
|
+
'katlas.pssm.get_pssm_LO': ('pssm.html#get_pssm_lo', 'katlas/pssm.py'),
|
|
150
|
+
'katlas.pssm.get_pssm_LO_flat': ('pssm.html#get_pssm_lo_flat', 'katlas/pssm.py'),
|
|
151
|
+
'katlas.pssm.get_specificity': ('pssm.html#get_specificity', 'katlas/pssm.py'),
|
|
152
|
+
'katlas.pssm.get_specificity_flat': ('pssm.html#get_specificity_flat', 'katlas/pssm.py'),
|
|
153
|
+
'katlas.pssm.js_divergence': ('pssm.html#js_divergence', 'katlas/pssm.py'),
|
|
154
|
+
'katlas.pssm.js_divergence_flat': ('pssm.html#js_divergence_flat', 'katlas/pssm.py'),
|
|
155
|
+
'katlas.pssm.js_similarity': ('pssm.html#js_similarity', 'katlas/pssm.py'),
|
|
156
|
+
'katlas.pssm.js_similarity_flat': ('pssm.html#js_similarity_flat', 'katlas/pssm.py'),
|
|
157
|
+
'katlas.pssm.kl_divergence': ('pssm.html#kl_divergence', 'katlas/pssm.py'),
|
|
158
|
+
'katlas.pssm.kl_divergence_flat': ('pssm.html#kl_divergence_flat', 'katlas/pssm.py'),
|
|
159
|
+
'katlas.pssm.plot_heatmap': ('pssm.html#plot_heatmap', 'katlas/pssm.py'),
|
|
160
|
+
'katlas.pssm.plot_heatmap_simple': ('pssm.html#plot_heatmap_simple', 'katlas/pssm.py'),
|
|
161
|
+
'katlas.pssm.plot_logo': ('pssm.html#plot_logo', 'katlas/pssm.py'),
|
|
162
|
+
'katlas.pssm.plot_logo_LO': ('pssm.html#plot_logo_lo', 'katlas/pssm.py'),
|
|
163
|
+
'katlas.pssm.plot_logo_heatmap': ('pssm.html#plot_logo_heatmap', 'katlas/pssm.py'),
|
|
164
|
+
'katlas.pssm.plot_logo_heatmap_LO': ('pssm.html#plot_logo_heatmap_lo', 'katlas/pssm.py'),
|
|
165
|
+
'katlas.pssm.plot_logo_heatmap_pspa': ('pssm.html#plot_logo_heatmap_pspa', 'katlas/pssm.py'),
|
|
166
|
+
'katlas.pssm.plot_logo_pspa': ('pssm.html#plot_logo_pspa', 'katlas/pssm.py'),
|
|
167
|
+
'katlas.pssm.plot_logo_raw': ('pssm.html#plot_logo_raw', 'katlas/pssm.py'),
|
|
168
|
+
'katlas.pssm.plot_logos': ('pssm.html#plot_logos', 'katlas/pssm.py'),
|
|
169
|
+
'katlas.pssm.plot_logos_idx': ('pssm.html#plot_logos_idx', 'katlas/pssm.py'),
|
|
170
|
+
'katlas.pssm.plot_two_heatmaps': ('pssm.html#plot_two_heatmaps', 'katlas/pssm.py'),
|
|
171
|
+
'katlas.pssm.preprocess_pspa': ('pssm.html#preprocess_pspa', 'katlas/pssm.py'),
|
|
172
|
+
'katlas.pssm.raw2norm': ('pssm.html#raw2norm', 'katlas/pssm.py'),
|
|
173
|
+
'katlas.pssm.recover_pssm': ('pssm.html#recover_pssm', 'katlas/pssm.py'),
|
|
174
|
+
'katlas.pssm.scale_pos_neg_values': ('pssm.html#scale_pos_neg_values', 'katlas/pssm.py'),
|
|
175
|
+
'katlas.pssm.scale_zero_position': ('pssm.html#scale_zero_position', 'katlas/pssm.py'),
|
|
176
|
+
'katlas.pssm.sty2pSTY_df': ('pssm.html#sty2psty_df', 'katlas/pssm.py')},
|
|
177
|
+
'katlas.score': { 'katlas.score.Params': ('scoring.html#params', 'katlas/score.py'),
|
|
178
|
+
'katlas.score.STY2sty': ('scoring.html#sty2sty', 'katlas/score.py'),
|
|
179
|
+
'katlas.score.cut_seq': ('scoring.html#cut_seq', 'katlas/score.py'),
|
|
180
|
+
'katlas.score.duplicate_ref_zero': ('scoring.html#duplicate_ref_zero', 'katlas/score.py'),
|
|
181
|
+
'katlas.score.get_dict': ('scoring.html#get_dict', 'katlas/score.py'),
|
|
182
|
+
'katlas.score.get_pct': ('scoring.html#get_pct', 'katlas/score.py'),
|
|
183
|
+
'katlas.score.get_pct_df': ('scoring.html#get_pct_df', 'katlas/score.py'),
|
|
184
|
+
'katlas.score.multiply': ('scoring.html#multiply', 'katlas/score.py'),
|
|
185
|
+
'katlas.score.multiply_generic': ('scoring.html#multiply_generic', 'katlas/score.py'),
|
|
186
|
+
'katlas.score.multiply_pspa': ('scoring.html#multiply_pspa', 'katlas/score.py'),
|
|
187
|
+
'katlas.score.predict_kinase': ('scoring.html#predict_kinase', 'katlas/score.py'),
|
|
188
|
+
'katlas.score.predict_kinase_df': ('scoring.html#predict_kinase_df', 'katlas/score.py'),
|
|
189
|
+
'katlas.score.preprocess_ref': ('scoring.html#preprocess_ref', 'katlas/score.py'),
|
|
190
|
+
'katlas.score.sumup': ('scoring.html#sumup', 'katlas/score.py')},
|
|
191
|
+
'katlas.statistics': { 'katlas.statistics.get_metaP': ('statistics.html#get_metap', 'katlas/statistics.py'),
|
|
192
|
+
'katlas.statistics.get_pvalue': ('statistics.html#get_pvalue', 'katlas/statistics.py')},
|
|
193
|
+
'katlas.train': { 'katlas.train.calculate_ce': ('ml.html#calculate_ce', 'katlas/train.py'),
|
|
194
|
+
'katlas.train.get_score': ('ml.html#get_score', 'katlas/train.py'),
|
|
195
|
+
'katlas.train.get_splits': ('ml.html#get_splits', 'katlas/train.py'),
|
|
196
|
+
'katlas.train.post_process': ('ml.html#post_process', 'katlas/train.py'),
|
|
197
|
+
'katlas.train.post_process_oof': ('ml.html#post_process_oof', 'katlas/train.py'),
|
|
105
198
|
'katlas.train.predict_ml': ('ml.html#predict_ml', 'katlas/train.py'),
|
|
106
|
-
'katlas.train.score_each': ('ml.html#score_each', 'katlas/train.py'),
|
|
107
199
|
'katlas.train.split_data': ('ml.html#split_data', 'katlas/train.py'),
|
|
108
200
|
'katlas.train.train_ml': ('ml.html#train_ml', 'katlas/train.py'),
|
|
109
|
-
'katlas.train.train_ml_cv': ('ml.html#train_ml_cv', 'katlas/train.py')}
|
|
201
|
+
'katlas.train.train_ml_cv': ('ml.html#train_ml_cv', 'katlas/train.py')},
|
|
202
|
+
'katlas.utils': { 'katlas.utils.aln2df': ('utils.html#aln2df', 'katlas/utils.py'),
|
|
203
|
+
'katlas.utils.check_seq': ('utils.html#check_seq', 'katlas/utils.py'),
|
|
204
|
+
'katlas.utils.check_seqs': ('utils.html#check_seqs', 'katlas/utils.py'),
|
|
205
|
+
'katlas.utils.extract_site_seq': ('utils.html#extract_site_seq', 'katlas/utils.py'),
|
|
206
|
+
'katlas.utils.get_aln_freq': ('utils.html#get_aln_freq', 'katlas/utils.py'),
|
|
207
|
+
'katlas.utils.get_diff': ('utils.html#get_diff', 'katlas/utils.py'),
|
|
208
|
+
'katlas.utils.get_fasta': ('utils.html#get_fasta', 'katlas/utils.py'),
|
|
209
|
+
'katlas.utils.pSTY2sty': ('utils.html#psty2sty', 'katlas/utils.py'),
|
|
210
|
+
'katlas.utils.phosphorylate_seq': ('utils.html#phosphorylate_seq', 'katlas/utils.py'),
|
|
211
|
+
'katlas.utils.phosphorylate_seq_df': ('utils.html#phosphorylate_seq_df', 'katlas/utils.py'),
|
|
212
|
+
'katlas.utils.prepare_path': ('utils.html#prepare_path', 'katlas/utils.py'),
|
|
213
|
+
'katlas.utils.run_clustalo': ('utils.html#run_clustalo', 'katlas/utils.py'),
|
|
214
|
+
'katlas.utils.sty2pSTY': ('utils.html#sty2psty', 'katlas/utils.py'),
|
|
215
|
+
'katlas.utils.validate_site': ('utils.html#validate_site', 'katlas/utils.py'),
|
|
216
|
+
'katlas.utils.validate_site_df': ('utils.html#validate_site_df', 'katlas/utils.py')}}}
|
katlas/clustering.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/03_hierarchical.ipynb.
|
|
2
|
+
|
|
3
|
+
# %% auto 0
|
|
4
|
+
__all__ = ['get_1d_distance', 'get_1d_js', 'get_distance', 'get_1d_distance_parallel', 'get_1d_js_parallel', 'get_Z',
|
|
5
|
+
'plot_dendrogram', 'pssm_to_seq', 'get_pssm_seq_labels']
|
|
6
|
+
|
|
7
|
+
# %% ../nbs/03_hierarchical.ipynb 3
|
|
8
|
+
import pandas as pd,numpy as np
|
|
9
|
+
from .data import *
|
|
10
|
+
from .pssm import *
|
|
11
|
+
from .plot import set_sns
|
|
12
|
+
from tqdm import tqdm
|
|
13
|
+
from functools import partial
|
|
14
|
+
from tqdm.contrib.concurrent import process_map
|
|
15
|
+
from fastcore.meta import delegates
|
|
16
|
+
from scipy.cluster.hierarchy import linkage, fcluster,dendrogram
|
|
17
|
+
from matplotlib import pyplot as plt
|
|
18
|
+
|
|
19
|
+
# %% ../nbs/03_hierarchical.ipynb 6
|
|
20
|
+
def get_1d_distance(df,func_flat):
|
|
21
|
+
"Compute 1D distance for each row in a dataframe given a distance function "
|
|
22
|
+
n = len(df)
|
|
23
|
+
dist = []
|
|
24
|
+
for i in tqdm(range(n)):
|
|
25
|
+
for j in range(i+1, n):
|
|
26
|
+
d = func_flat(df.iloc[i], df.iloc[j])
|
|
27
|
+
dist.append(d)
|
|
28
|
+
return np.array(dist)
|
|
29
|
+
|
|
30
|
+
# %% ../nbs/03_hierarchical.ipynb 8
|
|
31
|
+
def get_1d_js(df):
|
|
32
|
+
"Compute 1D distance using JS divergence."
|
|
33
|
+
return get_1d_distance(df,js_divergence_flat)
|
|
34
|
+
|
|
35
|
+
# %% ../nbs/03_hierarchical.ipynb 11
|
|
36
|
+
def get_distance(pair, df, func):
|
|
37
|
+
i, j = pair
|
|
38
|
+
return func(df.iloc[i], df.iloc[j])
|
|
39
|
+
|
|
40
|
+
# %% ../nbs/03_hierarchical.ipynb 12
|
|
41
|
+
def get_1d_distance_parallel(df, func_flat, max_workers=4, chunksize=100):
|
|
42
|
+
"Parallel compute 1D distance for each row in a dataframe given a distance function "
|
|
43
|
+
n = len(df)
|
|
44
|
+
index_pairs = [(i, j) for i in range(n) for j in range(i + 1, n)]
|
|
45
|
+
|
|
46
|
+
bound_worker = partial(get_distance, df=df, func=func_flat)
|
|
47
|
+
|
|
48
|
+
dist = process_map(bound_worker, index_pairs, max_workers=max_workers, chunksize=chunksize)
|
|
49
|
+
return np.array(dist)
|
|
50
|
+
|
|
51
|
+
# %% ../nbs/03_hierarchical.ipynb 14
|
|
52
|
+
@delegates(get_1d_distance_parallel)
|
|
53
|
+
def get_1d_js_parallel(df, func_flat=js_divergence_flat, **kwargs):
|
|
54
|
+
"Compute 1D distance matrix using JS divergence."
|
|
55
|
+
return get_1d_distance_parallel(df, func_flat=func_flat, **kwargs)
|
|
56
|
+
|
|
57
|
+
# %% ../nbs/03_hierarchical.ipynb 16
|
|
58
|
+
def get_Z(pssms,func_flat=js_divergence_flat,parallel=True):
|
|
59
|
+
"Get linkage matrix Z from pssms dataframe"
|
|
60
|
+
distance = get_1d_distance_parallel(pssms,func_flat=func_flat) if parallel else get_1d_distance(pssms,func_flat=func_flat)
|
|
61
|
+
Z = linkage(distance, method='ward')
|
|
62
|
+
return Z
|
|
63
|
+
|
|
64
|
+
# %% ../nbs/03_hierarchical.ipynb 19
|
|
65
|
+
def plot_dendrogram(Z,
|
|
66
|
+
color_thr=0.07,
|
|
67
|
+
dense=7, # the higher the more dense for each row
|
|
68
|
+
line_width=1,
|
|
69
|
+
title=None,
|
|
70
|
+
scale=1,
|
|
71
|
+
**kwargs):
|
|
72
|
+
length = (len(Z) + 1) // dense
|
|
73
|
+
|
|
74
|
+
plt.figure(figsize=(5*scale,length*scale))
|
|
75
|
+
with plt.rc_context({'lines.linewidth': line_width}):
|
|
76
|
+
dendrogram(
|
|
77
|
+
Z,
|
|
78
|
+
orientation='left',
|
|
79
|
+
leaf_font_size=7,
|
|
80
|
+
color_threshold=color_thr,
|
|
81
|
+
**kwargs
|
|
82
|
+
)
|
|
83
|
+
if title is not None: plt.title(title)
|
|
84
|
+
plt.xlabel('Distance')
|
|
85
|
+
# plt.savefig(output, bbox_inches='tight')
|
|
86
|
+
# plt.close()
|
|
87
|
+
ax = plt.gca()
|
|
88
|
+
for spine in ['top', 'right', 'left', 'bottom']:
|
|
89
|
+
ax.spines[spine].set_visible(False)
|
|
90
|
+
|
|
91
|
+
# %% ../nbs/03_hierarchical.ipynb 23
|
|
92
|
+
def pssm_to_seq(pssm_df,
|
|
93
|
+
thr=0.2, # threshold of probability to show in sequence
|
|
94
|
+
clean_center=True, # if true, zero out non-last three values in position 0 (keep only s,t,y values at center)
|
|
95
|
+
):
|
|
96
|
+
"Represent PSSM in string sequence of amino acids"
|
|
97
|
+
|
|
98
|
+
pssm_df = pssm_df.copy()
|
|
99
|
+
if clean_center:
|
|
100
|
+
pssm_df.loc[pssm_df.index[:-3], 0] = 0 # keep only s,t,y in center 0 position
|
|
101
|
+
|
|
102
|
+
pssm_df.index = pssm_df.index.map(lambda x: x.replace('pS', 's').replace('pT', 't').replace('pY', 'y'))
|
|
103
|
+
|
|
104
|
+
consensus = []
|
|
105
|
+
for i, col in enumerate(pssm_df.columns):
|
|
106
|
+
# consider the case where sum for the position is 0
|
|
107
|
+
column_vals = pssm_df[col]
|
|
108
|
+
if column_vals.sum() == 0:
|
|
109
|
+
symbol = '_'
|
|
110
|
+
else:
|
|
111
|
+
top = column_vals.nlargest(3)
|
|
112
|
+
passing = [aa for aa, prob in zip(top.index, top.values) if prob > thr]
|
|
113
|
+
|
|
114
|
+
if not passing:
|
|
115
|
+
symbol = '.'
|
|
116
|
+
elif len(passing) == 1:
|
|
117
|
+
symbol = passing[0]
|
|
118
|
+
else:
|
|
119
|
+
symbol = f"[{'/'.join(passing)}]"
|
|
120
|
+
|
|
121
|
+
if col == 0: # center position
|
|
122
|
+
if symbol.startswith('['):
|
|
123
|
+
symbol = symbol[:-1] + ']*'
|
|
124
|
+
else:
|
|
125
|
+
symbol += '*'
|
|
126
|
+
|
|
127
|
+
consensus.append(symbol)
|
|
128
|
+
|
|
129
|
+
return ''.join(consensus)
|
|
130
|
+
|
|
131
|
+
# %% ../nbs/03_hierarchical.ipynb 26
|
|
132
|
+
def get_pssm_seq_labels(pssms,
|
|
133
|
+
count_map=None, # df index as key, counts as value
|
|
134
|
+
thr=0.3, # threshold of probability to show in sequence
|
|
135
|
+
):
|
|
136
|
+
"Use index of pssms and the pssm to seq to represent pssm."
|
|
137
|
+
if count_map is not None:
|
|
138
|
+
labels=[str(i)+f' (n={count_map[i]:,})' + ': '+pssm_to_seq(recover_pssm(r),thr=thr) for i,r in pssms.iterrows()]
|
|
139
|
+
else:
|
|
140
|
+
labels=[str(i)+ ': '+pssm_to_seq(recover_pssm(r),thr) for i,r in pssms.iterrows()]
|
|
141
|
+
|
|
142
|
+
return labels
|
katlas/common.py
ADDED