python-katlas 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- katlas/__init__.py +1 -1
- katlas/_modidx.py +68 -143
- katlas/common.py +8 -2
- katlas/compare.py +118 -0
- katlas/data.py +282 -193
- katlas/hierarchical.py +20 -0
- katlas/lo.py +69 -0
- katlas/pathway.py +13 -13
- katlas/plot.py +314 -882
- katlas/pspa.py +138 -0
- katlas/pssm.py +178 -647
- katlas/{score.py → scoring.py} +30 -27
- katlas/utils.py +92 -45
- {python_katlas-0.2.0.dist-info → python_katlas-0.2.2.dist-info}/METADATA +15 -58
- python_katlas-0.2.2.dist-info/RECORD +19 -0
- {python_katlas-0.2.0.dist-info → python_katlas-0.2.2.dist-info}/WHEEL +1 -1
- katlas/clustering.py +0 -142
- katlas/core.py +0 -6
- katlas/dnn.py +0 -384
- katlas/feature.py +0 -320
- katlas/statistics.py +0 -102
- katlas/train.py +0 -207
- python_katlas-0.2.0.dist-info/RECORD +0 -21
- {python_katlas-0.2.0.dist-info → python_katlas-0.2.2.dist-info}/entry_points.txt +0 -0
- {python_katlas-0.2.0.dist-info → python_katlas-0.2.2.dist-info}/licenses/LICENSE +0 -0
- {python_katlas-0.2.0.dist-info → python_katlas-0.2.2.dist-info}/top_level.txt +0 -0
katlas/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.2.
|
|
1
|
+
__version__ = "0.2.2"
|
katlas/_modidx.py
CHANGED
|
@@ -5,24 +5,21 @@ d = { 'settings': { 'branch': 'main',
|
|
|
5
5
|
'doc_host': 'https://sky1ove.github.io',
|
|
6
6
|
'git_url': 'https://github.com/sky1ove/katlas',
|
|
7
7
|
'lib_path': 'katlas'},
|
|
8
|
-
'syms': { 'katlas.
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
'katlas.clustering.plot_dendrogram': ('hierarchical.html#plot_dendrogram', 'katlas/clustering.py'),
|
|
18
|
-
'katlas.clustering.pssm_to_seq': ('hierarchical.html#pssm_to_seq', 'katlas/clustering.py')},
|
|
19
|
-
'katlas.common': {},
|
|
20
|
-
'katlas.core': {},
|
|
8
|
+
'syms': { 'katlas.common': {},
|
|
9
|
+
'katlas.compare': { 'katlas.compare.cosine_overall_flat': ('compare.html#cosine_overall_flat', 'katlas/compare.py'),
|
|
10
|
+
'katlas.compare.cosine_similarity': ('compare.html#cosine_similarity', 'katlas/compare.py'),
|
|
11
|
+
'katlas.compare.js_divergence': ('compare.html#js_divergence', 'katlas/compare.py'),
|
|
12
|
+
'katlas.compare.js_divergence_flat': ('compare.html#js_divergence_flat', 'katlas/compare.py'),
|
|
13
|
+
'katlas.compare.js_similarity': ('compare.html#js_similarity', 'katlas/compare.py'),
|
|
14
|
+
'katlas.compare.js_similarity_flat': ('compare.html#js_similarity_flat', 'katlas/compare.py'),
|
|
15
|
+
'katlas.compare.kl_divergence': ('compare.html#kl_divergence', 'katlas/compare.py'),
|
|
16
|
+
'katlas.compare.kl_divergence_flat': ('compare.html#kl_divergence_flat', 'katlas/compare.py')},
|
|
21
17
|
'katlas.data': { 'katlas.data.CPTAC': ('data.html#cptac', 'katlas/data.py'),
|
|
22
18
|
'katlas.data.CPTAC._read_file': ('data.html#cptac._read_file', 'katlas/data.py'),
|
|
23
19
|
'katlas.data.CPTAC.get_id': ('data.html#cptac.get_id', 'katlas/data.py'),
|
|
24
20
|
'katlas.data.CPTAC.list_cancer': ('data.html#cptac.list_cancer', 'katlas/data.py'),
|
|
25
21
|
'katlas.data.Data': ('data.html#data', 'katlas/data.py'),
|
|
22
|
+
'katlas.data.Data.clear_cache': ('data.html#data.clear_cache', 'katlas/data.py'),
|
|
26
23
|
'katlas.data.Data.download': ('data.html#data.download', 'katlas/data.py'),
|
|
27
24
|
'katlas.data.Data.get_aa_info': ('data.html#data.get_aa_info', 'katlas/data.py'),
|
|
28
25
|
'katlas.data.Data.get_aa_morgan': ('data.html#data.get_aa_morgan', 'katlas/data.py'),
|
|
@@ -48,6 +45,7 @@ d = { 'settings': { 'branch': 'main',
|
|
|
48
45
|
'katlas.data.Data.get_ochoa_site': ('data.html#data.get_ochoa_site', 'katlas/data.py'),
|
|
49
46
|
'katlas.data.Data.get_psp_human_site': ('data.html#data.get_psp_human_site', 'katlas/data.py'),
|
|
50
47
|
'katlas.data.Data.get_pspa': ('data.html#data.get_pspa', 'katlas/data.py'),
|
|
48
|
+
'katlas.data.Data.get_pspa_raw': ('data.html#data.get_pspa_raw', 'katlas/data.py'),
|
|
51
49
|
'katlas.data.Data.get_pspa_scale': ('data.html#data.get_pspa_scale', 'katlas/data.py'),
|
|
52
50
|
'katlas.data.Data.get_pspa_st': ('data.html#data.get_pspa_st', 'katlas/data.py'),
|
|
53
51
|
'katlas.data.Data.get_pspa_st_pct': ('data.html#data.get_pspa_st_pct', 'katlas/data.py'),
|
|
@@ -55,162 +53,89 @@ d = { 'settings': { 'branch': 'main',
|
|
|
55
53
|
'katlas.data.Data.get_pspa_tyr_pct': ('data.html#data.get_pspa_tyr_pct', 'katlas/data.py'),
|
|
56
54
|
'katlas.data.Data.get_reactome_pathway': ('data.html#data.get_reactome_pathway', 'katlas/data.py'),
|
|
57
55
|
'katlas.data.Data.get_reactome_pathway_lo': ('data.html#data.get_reactome_pathway_lo', 'katlas/data.py'),
|
|
58
|
-
'katlas.data.Data.read_file': ('data.html#data.read_file', 'katlas/data.py')
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
'katlas.dnn.PSSM_model': ('dnn.html#pssm_model', 'katlas/dnn.py'),
|
|
71
|
-
'katlas.dnn.PSSM_model.__init__': ('dnn.html#pssm_model.__init__', 'katlas/dnn.py'),
|
|
72
|
-
'katlas.dnn.PSSM_model.forward': ('dnn.html#pssm_model.forward', 'katlas/dnn.py'),
|
|
73
|
-
'katlas.dnn.conv_wn': ('dnn.html#conv_wn', 'katlas/dnn.py'),
|
|
74
|
-
'katlas.dnn.init_weights': ('dnn.html#init_weights', 'katlas/dnn.py'),
|
|
75
|
-
'katlas.dnn.lin_wn': ('dnn.html#lin_wn', 'katlas/dnn.py'),
|
|
76
|
-
'katlas.dnn.predict_dl': ('dnn.html#predict_dl', 'katlas/dnn.py'),
|
|
77
|
-
'katlas.dnn.seed_everything': ('dnn.html#seed_everything', 'katlas/dnn.py'),
|
|
78
|
-
'katlas.dnn.train_dl': ('dnn.html#train_dl', 'katlas/dnn.py'),
|
|
79
|
-
'katlas.dnn.train_dl_cv': ('dnn.html#train_dl_cv', 'katlas/dnn.py')},
|
|
80
|
-
'katlas.feature': { 'katlas.feature.filter_range_columns': ('feature.html#filter_range_columns', 'katlas/feature.py'),
|
|
81
|
-
'katlas.feature.get_clusters_elbow': ('feature.html#get_clusters_elbow', 'katlas/feature.py'),
|
|
82
|
-
'katlas.feature.get_esm': ('feature.html#get_esm', 'katlas/feature.py'),
|
|
83
|
-
'katlas.feature.get_morgan': ('feature.html#get_morgan', 'katlas/feature.py'),
|
|
84
|
-
'katlas.feature.get_rdkit': ('feature.html#get_rdkit', 'katlas/feature.py'),
|
|
85
|
-
'katlas.feature.get_rdkit_3d': ('feature.html#get_rdkit_3d', 'katlas/feature.py'),
|
|
86
|
-
'katlas.feature.get_rdkit_all': ('feature.html#get_rdkit_all', 'katlas/feature.py'),
|
|
87
|
-
'katlas.feature.get_rdkit_df': ('feature.html#get_rdkit_df', 'katlas/feature.py'),
|
|
88
|
-
'katlas.feature.get_t5': ('feature.html#get_t5', 'katlas/feature.py'),
|
|
89
|
-
'katlas.feature.get_t5_bfd': ('feature.html#get_t5_bfd', 'katlas/feature.py'),
|
|
90
|
-
'katlas.feature.kmeans': ('feature.html#kmeans', 'katlas/feature.py'),
|
|
91
|
-
'katlas.feature.onehot_encode': ('feature.html#onehot_encode', 'katlas/feature.py'),
|
|
92
|
-
'katlas.feature.onehot_encode_df': ('feature.html#onehot_encode_df', 'katlas/feature.py'),
|
|
93
|
-
'katlas.feature.preprocess': ('feature.html#preprocess', 'katlas/feature.py'),
|
|
94
|
-
'katlas.feature.remove_hi_corr': ('feature.html#remove_hi_corr', 'katlas/feature.py'),
|
|
95
|
-
'katlas.feature.standardize': ('feature.html#standardize', 'katlas/feature.py')},
|
|
56
|
+
'katlas.data.Data.read_file': ('data.html#data.read_file', 'katlas/data.py'),
|
|
57
|
+
'katlas.data._default_dataset_dir': ('data.html#_default_dataset_dir', 'katlas/data.py'),
|
|
58
|
+
'katlas.data._normalize_dataset_dir': ('data.html#_normalize_dataset_dir', 'katlas/data.py'),
|
|
59
|
+
'katlas.data._normalize_required_files': ('data.html#_normalize_required_files', 'katlas/data.py'),
|
|
60
|
+
'katlas.data._read_dataset_file_cached': ('data.html#_read_dataset_file_cached', 'katlas/data.py')},
|
|
61
|
+
'katlas.hierarchical': { 'katlas.hierarchical.get_1d_js': ('hierarchical.html#get_1d_js', 'katlas/hierarchical.py'),
|
|
62
|
+
'katlas.hierarchical.get_1d_js_parallel': ( 'hierarchical.html#get_1d_js_parallel',
|
|
63
|
+
'katlas/hierarchical.py')},
|
|
64
|
+
'katlas.lo': { 'katlas.lo.get_pssm_LO': ('lo.html#get_pssm_lo', 'katlas/lo.py'),
|
|
65
|
+
'katlas.lo.get_pssm_LO_flat': ('lo.html#get_pssm_lo_flat', 'katlas/lo.py'),
|
|
66
|
+
'katlas.lo.plot_logo_LO': ('lo.html#plot_logo_lo', 'katlas/lo.py'),
|
|
67
|
+
'katlas.lo.plot_logo_heatmap_LO': ('lo.html#plot_logo_heatmap_lo', 'katlas/lo.py')},
|
|
96
68
|
'katlas.pathway': { 'katlas.pathway.add_reactome_ref': ('pathway.html#add_reactome_ref', 'katlas/pathway.py'),
|
|
97
69
|
'katlas.pathway.get_overlap': ('pathway.html#get_overlap', 'katlas/pathway.py'),
|
|
98
70
|
'katlas.pathway.get_reactome': ('pathway.html#get_reactome', 'katlas/pathway.py'),
|
|
99
71
|
'katlas.pathway.get_reactome_raw': ('pathway.html#get_reactome_raw', 'katlas/pathway.py'),
|
|
100
72
|
'katlas.pathway.plot_path': ('pathway.html#plot_path', 'katlas/pathway.py'),
|
|
101
73
|
'katlas.pathway.query_reactome': ('pathway.html#query_reactome', 'katlas/pathway.py')},
|
|
102
|
-
'katlas.plot': { 'katlas.plot.
|
|
103
|
-
'katlas.plot.
|
|
104
|
-
'katlas.plot.
|
|
105
|
-
'katlas.plot.
|
|
106
|
-
'katlas.plot.
|
|
107
|
-
'katlas.plot.
|
|
108
|
-
'katlas.plot.
|
|
109
|
-
'katlas.plot.
|
|
110
|
-
'katlas.plot.
|
|
111
|
-
'katlas.plot.
|
|
112
|
-
'katlas.plot.
|
|
113
|
-
'katlas.plot.
|
|
114
|
-
'katlas.plot.
|
|
115
|
-
'katlas.plot.
|
|
116
|
-
'katlas.plot.
|
|
117
|
-
'katlas.plot.
|
|
118
|
-
'katlas.plot.
|
|
119
|
-
|
|
120
|
-
'katlas.
|
|
121
|
-
'katlas.
|
|
122
|
-
'katlas.
|
|
123
|
-
'katlas.
|
|
124
|
-
'katlas.
|
|
125
|
-
'katlas.plot.plot_stacked': ('plot.html#plot_stacked', 'katlas/plot.py'),
|
|
126
|
-
'katlas.plot.plot_violin': ('plot.html#plot_violin', 'katlas/plot.py'),
|
|
127
|
-
'katlas.plot.reduce_feature': ('plot.html#reduce_feature', 'katlas/plot.py'),
|
|
128
|
-
'katlas.plot.save_pdf': ('plot.html#save_pdf', 'katlas/plot.py'),
|
|
129
|
-
'katlas.plot.save_show': ('plot.html#save_show', 'katlas/plot.py'),
|
|
130
|
-
'katlas.plot.save_svg': ('plot.html#save_svg', 'katlas/plot.py'),
|
|
131
|
-
'katlas.plot.set_sns': ('plot.html#set_sns', 'katlas/plot.py')},
|
|
74
|
+
'katlas.plot': { 'katlas.plot.change_center_name': ('plot.html#change_center_name', 'katlas/plot.py'),
|
|
75
|
+
'katlas.plot.convert_logo_df': ('plot.html#convert_logo_df', 'katlas/plot.py'),
|
|
76
|
+
'katlas.plot.get_logo_IC': ('plot.html#get_logo_ic', 'katlas/plot.py'),
|
|
77
|
+
'katlas.plot.get_pos_min_max': ('plot.html#get_pos_min_max', 'katlas/plot.py'),
|
|
78
|
+
'katlas.plot.pSTY2sty': ('plot.html#psty2sty', 'katlas/plot.py'),
|
|
79
|
+
'katlas.plot.plot_heatmap': ('plot.html#plot_heatmap', 'katlas/plot.py'),
|
|
80
|
+
'katlas.plot.plot_heatmap_simple': ('plot.html#plot_heatmap_simple', 'katlas/plot.py'),
|
|
81
|
+
'katlas.plot.plot_logo': ('plot.html#plot_logo', 'katlas/plot.py'),
|
|
82
|
+
'katlas.plot.plot_logo_heatmap': ('plot.html#plot_logo_heatmap', 'katlas/plot.py'),
|
|
83
|
+
'katlas.plot.plot_logo_raw': ('plot.html#plot_logo_raw', 'katlas/plot.py'),
|
|
84
|
+
'katlas.plot.plot_logos': ('plot.html#plot_logos', 'katlas/plot.py'),
|
|
85
|
+
'katlas.plot.plot_logos_idx': ('plot.html#plot_logos_idx', 'katlas/plot.py'),
|
|
86
|
+
'katlas.plot.plot_two_heatmaps': ('plot.html#plot_two_heatmaps', 'katlas/plot.py'),
|
|
87
|
+
'katlas.plot.scale_pos_neg_values': ('plot.html#scale_pos_neg_values', 'katlas/plot.py'),
|
|
88
|
+
'katlas.plot.scale_zero_position': ('plot.html#scale_zero_position', 'katlas/plot.py'),
|
|
89
|
+
'katlas.plot.sty2pSTY': ('plot.html#sty2psty', 'katlas/plot.py'),
|
|
90
|
+
'katlas.plot.sty2pSTY_df': ('plot.html#sty2psty_df', 'katlas/plot.py')},
|
|
91
|
+
'katlas.pspa': { 'katlas.pspa.get_logo': ('pspa.html#get_logo', 'katlas/pspa.py'),
|
|
92
|
+
'katlas.pspa.get_one_kinase': ('pspa.html#get_one_kinase', 'katlas/pspa.py'),
|
|
93
|
+
'katlas.pspa.plot_logo_heatmap_pspa': ('pspa.html#plot_logo_heatmap_pspa', 'katlas/pspa.py'),
|
|
94
|
+
'katlas.pspa.plot_logo_pspa': ('pspa.html#plot_logo_pspa', 'katlas/pspa.py'),
|
|
95
|
+
'katlas.pspa.preprocess_pspa': ('pspa.html#preprocess_pspa', 'katlas/pspa.py'),
|
|
96
|
+
'katlas.pspa.raw2norm': ('pspa.html#raw2norm', 'katlas/pspa.py')},
|
|
132
97
|
'katlas.pssm': { 'katlas.pssm._clean_zero': ('pssm.html#_clean_zero', 'katlas/pssm.py'),
|
|
133
|
-
'katlas.pssm.change_center_name': ('pssm.html#change_center_name', 'katlas/pssm.py'),
|
|
134
98
|
'katlas.pssm.clean_zero_normalize': ('pssm.html#clean_zero_normalize', 'katlas/pssm.py'),
|
|
135
|
-
'katlas.pssm.convert_logo_df': ('pssm.html#convert_logo_df', 'katlas/pssm.py'),
|
|
136
|
-
'katlas.pssm.cosine_overall_flat': ('pssm.html#cosine_overall_flat', 'katlas/pssm.py'),
|
|
137
|
-
'katlas.pssm.cosine_similarity': ('pssm.html#cosine_similarity', 'katlas/pssm.py'),
|
|
138
99
|
'katlas.pssm.flatten_pssm': ('pssm.html#flatten_pssm', 'katlas/pssm.py'),
|
|
139
100
|
'katlas.pssm.get_IC': ('pssm.html#get_ic', 'katlas/pssm.py'),
|
|
140
101
|
'katlas.pssm.get_IC_flat': ('pssm.html#get_ic_flat', 'katlas/pssm.py'),
|
|
141
102
|
'katlas.pssm.get_cluster_pssms': ('pssm.html#get_cluster_pssms', 'katlas/pssm.py'),
|
|
142
103
|
'katlas.pssm.get_entropy': ('pssm.html#get_entropy', 'katlas/pssm.py'),
|
|
143
104
|
'katlas.pssm.get_entropy_flat': ('pssm.html#get_entropy_flat', 'katlas/pssm.py'),
|
|
144
|
-
'katlas.pssm.get_logo': ('pssm.html#get_logo', 'katlas/pssm.py'),
|
|
145
|
-
'katlas.pssm.get_logo_IC': ('pssm.html#get_logo_ic', 'katlas/pssm.py'),
|
|
146
|
-
'katlas.pssm.get_one_kinase': ('pssm.html#get_one_kinase', 'katlas/pssm.py'),
|
|
147
|
-
'katlas.pssm.get_pos_min_max': ('pssm.html#get_pos_min_max', 'katlas/pssm.py'),
|
|
148
105
|
'katlas.pssm.get_prob': ('pssm.html#get_prob', 'katlas/pssm.py'),
|
|
149
|
-
'katlas.pssm.
|
|
150
|
-
'katlas.pssm.
|
|
106
|
+
'katlas.pssm.get_pssm_seq_labels': ('pssm.html#get_pssm_seq_labels', 'katlas/pssm.py'),
|
|
107
|
+
'katlas.pssm.get_pssm_weight': ('pssm.html#get_pssm_weight', 'katlas/pssm.py'),
|
|
151
108
|
'katlas.pssm.get_specificity': ('pssm.html#get_specificity', 'katlas/pssm.py'),
|
|
152
109
|
'katlas.pssm.get_specificity_flat': ('pssm.html#get_specificity_flat', 'katlas/pssm.py'),
|
|
153
|
-
'katlas.pssm.
|
|
154
|
-
'katlas.pssm.
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
'katlas.pssm.plot_logos_idx': ('pssm.html#plot_logos_idx', 'katlas/pssm.py'),
|
|
170
|
-
'katlas.pssm.plot_two_heatmaps': ('pssm.html#plot_two_heatmaps', 'katlas/pssm.py'),
|
|
171
|
-
'katlas.pssm.preprocess_pspa': ('pssm.html#preprocess_pspa', 'katlas/pssm.py'),
|
|
172
|
-
'katlas.pssm.raw2norm': ('pssm.html#raw2norm', 'katlas/pssm.py'),
|
|
173
|
-
'katlas.pssm.recover_pssm': ('pssm.html#recover_pssm', 'katlas/pssm.py'),
|
|
174
|
-
'katlas.pssm.scale_pos_neg_values': ('pssm.html#scale_pos_neg_values', 'katlas/pssm.py'),
|
|
175
|
-
'katlas.pssm.scale_zero_position': ('pssm.html#scale_zero_position', 'katlas/pssm.py'),
|
|
176
|
-
'katlas.pssm.sty2pSTY_df': ('pssm.html#sty2psty_df', 'katlas/pssm.py')},
|
|
177
|
-
'katlas.score': { 'katlas.score.Params': ('scoring.html#params', 'katlas/score.py'),
|
|
178
|
-
'katlas.score.STY2sty': ('scoring.html#sty2sty', 'katlas/score.py'),
|
|
179
|
-
'katlas.score.cut_seq': ('scoring.html#cut_seq', 'katlas/score.py'),
|
|
180
|
-
'katlas.score.duplicate_ref_zero': ('scoring.html#duplicate_ref_zero', 'katlas/score.py'),
|
|
181
|
-
'katlas.score.get_dict': ('scoring.html#get_dict', 'katlas/score.py'),
|
|
182
|
-
'katlas.score.get_pct': ('scoring.html#get_pct', 'katlas/score.py'),
|
|
183
|
-
'katlas.score.get_pct_df': ('scoring.html#get_pct_df', 'katlas/score.py'),
|
|
184
|
-
'katlas.score.multiply': ('scoring.html#multiply', 'katlas/score.py'),
|
|
185
|
-
'katlas.score.multiply_generic': ('scoring.html#multiply_generic', 'katlas/score.py'),
|
|
186
|
-
'katlas.score.multiply_pspa': ('scoring.html#multiply_pspa', 'katlas/score.py'),
|
|
187
|
-
'katlas.score.predict_kinase': ('scoring.html#predict_kinase', 'katlas/score.py'),
|
|
188
|
-
'katlas.score.predict_kinase_df': ('scoring.html#predict_kinase_df', 'katlas/score.py'),
|
|
189
|
-
'katlas.score.preprocess_ref': ('scoring.html#preprocess_ref', 'katlas/score.py'),
|
|
190
|
-
'katlas.score.sumup': ('scoring.html#sumup', 'katlas/score.py')},
|
|
191
|
-
'katlas.statistics': { 'katlas.statistics.get_metaP': ('statistics.html#get_metap', 'katlas/statistics.py'),
|
|
192
|
-
'katlas.statistics.get_pvalue': ('statistics.html#get_pvalue', 'katlas/statistics.py')},
|
|
193
|
-
'katlas.train': { 'katlas.train.calculate_ce': ('ml.html#calculate_ce', 'katlas/train.py'),
|
|
194
|
-
'katlas.train.get_score': ('ml.html#get_score', 'katlas/train.py'),
|
|
195
|
-
'katlas.train.get_splits': ('ml.html#get_splits', 'katlas/train.py'),
|
|
196
|
-
'katlas.train.post_process': ('ml.html#post_process', 'katlas/train.py'),
|
|
197
|
-
'katlas.train.post_process_oof': ('ml.html#post_process_oof', 'katlas/train.py'),
|
|
198
|
-
'katlas.train.predict_ml': ('ml.html#predict_ml', 'katlas/train.py'),
|
|
199
|
-
'katlas.train.split_data': ('ml.html#split_data', 'katlas/train.py'),
|
|
200
|
-
'katlas.train.train_ml': ('ml.html#train_ml', 'katlas/train.py'),
|
|
201
|
-
'katlas.train.train_ml_cv': ('ml.html#train_ml_cv', 'katlas/train.py')},
|
|
110
|
+
'katlas.pssm.pssm_to_seq': ('pssm.html#pssm_to_seq', 'katlas/pssm.py'),
|
|
111
|
+
'katlas.pssm.recover_pssm': ('pssm.html#recover_pssm', 'katlas/pssm.py')},
|
|
112
|
+
'katlas.scoring': { 'katlas.scoring.Params': ('scoring.html#params', 'katlas/scoring.py'),
|
|
113
|
+
'katlas.scoring.STY2sty': ('scoring.html#sty2sty', 'katlas/scoring.py'),
|
|
114
|
+
'katlas.scoring.cut_seq': ('scoring.html#cut_seq', 'katlas/scoring.py'),
|
|
115
|
+
'katlas.scoring.duplicate_ref_zero': ('scoring.html#duplicate_ref_zero', 'katlas/scoring.py'),
|
|
116
|
+
'katlas.scoring.get_dict': ('scoring.html#get_dict', 'katlas/scoring.py'),
|
|
117
|
+
'katlas.scoring.get_pct': ('scoring.html#get_pct', 'katlas/scoring.py'),
|
|
118
|
+
'katlas.scoring.get_pct_df': ('scoring.html#get_pct_df', 'katlas/scoring.py'),
|
|
119
|
+
'katlas.scoring.multiply': ('scoring.html#multiply', 'katlas/scoring.py'),
|
|
120
|
+
'katlas.scoring.multiply_generic': ('scoring.html#multiply_generic', 'katlas/scoring.py'),
|
|
121
|
+
'katlas.scoring.multiply_pspa': ('scoring.html#multiply_pspa', 'katlas/scoring.py'),
|
|
122
|
+
'katlas.scoring.predict_kinase': ('scoring.html#predict_kinase', 'katlas/scoring.py'),
|
|
123
|
+
'katlas.scoring.predict_kinase_df': ('scoring.html#predict_kinase_df', 'katlas/scoring.py'),
|
|
124
|
+
'katlas.scoring.preprocess_ref': ('scoring.html#preprocess_ref', 'katlas/scoring.py'),
|
|
125
|
+
'katlas.scoring.sumup': ('scoring.html#sumup', 'katlas/scoring.py')},
|
|
202
126
|
'katlas.utils': { 'katlas.utils.aln2df': ('utils.html#aln2df', 'katlas/utils.py'),
|
|
203
127
|
'katlas.utils.check_seq': ('utils.html#check_seq', 'katlas/utils.py'),
|
|
204
128
|
'katlas.utils.check_seqs': ('utils.html#check_seqs', 'katlas/utils.py'),
|
|
129
|
+
'katlas.utils.clean_feat': ('utils.html#clean_feat', 'katlas/utils.py'),
|
|
205
130
|
'katlas.utils.extract_site_seq': ('utils.html#extract_site_seq', 'katlas/utils.py'),
|
|
206
131
|
'katlas.utils.get_aln_freq': ('utils.html#get_aln_freq', 'katlas/utils.py'),
|
|
207
|
-
'katlas.utils.get_diff': ('utils.html#get_diff', 'katlas/utils.py'),
|
|
208
132
|
'katlas.utils.get_fasta': ('utils.html#get_fasta', 'katlas/utils.py'),
|
|
209
|
-
'katlas.utils.
|
|
133
|
+
'katlas.utils.get_subfamily_color': ('utils.html#get_subfamily_color', 'katlas/utils.py'),
|
|
210
134
|
'katlas.utils.phosphorylate_seq': ('utils.html#phosphorylate_seq', 'katlas/utils.py'),
|
|
211
135
|
'katlas.utils.phosphorylate_seq_df': ('utils.html#phosphorylate_seq_df', 'katlas/utils.py'),
|
|
212
136
|
'katlas.utils.prepare_path': ('utils.html#prepare_path', 'katlas/utils.py'),
|
|
137
|
+
'katlas.utils.remove_hi_corr': ('utils.html#remove_hi_corr', 'katlas/utils.py'),
|
|
213
138
|
'katlas.utils.run_clustalo': ('utils.html#run_clustalo', 'katlas/utils.py'),
|
|
214
|
-
'katlas.utils.
|
|
139
|
+
'katlas.utils.standardize': ('utils.html#standardize', 'katlas/utils.py'),
|
|
215
140
|
'katlas.utils.validate_site': ('utils.html#validate_site', 'katlas/utils.py'),
|
|
216
141
|
'katlas.utils.validate_site_df': ('utils.html#validate_site_df', 'katlas/utils.py')}}}
|
katlas/common.py
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
|
+
from .compare import *
|
|
1
2
|
from .data import *
|
|
3
|
+
from .hierarchical import *
|
|
4
|
+
from .lo import *
|
|
5
|
+
from .pathway import *
|
|
6
|
+
from .plot import *
|
|
7
|
+
from .pspa import *
|
|
2
8
|
from .pssm import *
|
|
3
|
-
from .
|
|
4
|
-
from .utils import *
|
|
9
|
+
from .scoring import *
|
|
10
|
+
from .utils import *
|
katlas/compare.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""algorithms about comparing two PSSMs"""
|
|
2
|
+
|
|
3
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/06_compare.ipynb.
|
|
4
|
+
|
|
5
|
+
# %% auto #0
|
|
6
|
+
__all__ = ['kl_divergence', 'kl_divergence_flat', 'js_divergence', 'js_divergence_flat', 'js_similarity', 'js_similarity_flat',
|
|
7
|
+
'cosine_similarity', 'cosine_overall_flat']
|
|
8
|
+
|
|
9
|
+
# %% ../nbs/06_compare.ipynb #76949d54-f519-4bc6-9da5-d48a05bbfe69
|
|
10
|
+
import numpy as np, pandas as pd
|
|
11
|
+
from .pssm import EPSILON
|
|
12
|
+
|
|
13
|
+
# %% ../nbs/06_compare.ipynb #b1ef2ee7-70fd-4703-96b4-1c1a9053987f
|
|
14
|
+
def kl_divergence(p1, # target pssm p (array-like, shape: (AA, positions))
|
|
15
|
+
p2, # pred pssm q (array-like, same shape as p1)
|
|
16
|
+
):
|
|
17
|
+
"""
|
|
18
|
+
KL divergence D_KL(p1 || p2) over positions.
|
|
19
|
+
|
|
20
|
+
p1 and p2 are arrays (df or np) with index as aa and column as position.
|
|
21
|
+
Returns average divergence across positions if mean=True, else per-position.
|
|
22
|
+
"""
|
|
23
|
+
if p1.shape != p2.shape: raise ValueError("Shapes of p1 and p2 must match.")
|
|
24
|
+
p1, p2 = p1.align(p2, join='inner', axis=None)
|
|
25
|
+
# Mask invalid positions (both zero)
|
|
26
|
+
valid = (p1 + p2) > 0
|
|
27
|
+
p1 = np.where(valid, p1, 0.0)
|
|
28
|
+
p2 = np.where(valid, p2, 0.0)
|
|
29
|
+
|
|
30
|
+
# KL divergence: sum_x p1(x) log(p1(x)/p2(x))
|
|
31
|
+
kl = np.sum(p1 * np.log((p1 + EPSILON) / (p2 + EPSILON)), axis=0)
|
|
32
|
+
|
|
33
|
+
return kl
|
|
34
|
+
|
|
35
|
+
# %% ../nbs/06_compare.ipynb #71084bb3-169a-4dc2-b5dd-1b802fda8225
|
|
36
|
+
def kl_divergence_flat(p1_flat, # pd.Series of target flattened pssm p
|
|
37
|
+
p2_flat, # pd.Series of pred flattened pssm q
|
|
38
|
+
):
|
|
39
|
+
|
|
40
|
+
"p1 and p2 are two flattened pd.Series with index as aa and column as position"
|
|
41
|
+
kld = kl_divergence(p1_flat,p2_flat) # do not do js.mean() because it's 1d
|
|
42
|
+
total_position = len(p1_flat.index.str.extract(r'(-?\d+)').drop_duplicates())
|
|
43
|
+
return float(kld/total_position)
|
|
44
|
+
|
|
45
|
+
# %% ../nbs/06_compare.ipynb #eca99343-fbcd-48c2-a1ff-88af31fd2346
|
|
46
|
+
def js_divergence(p1, # pssm
|
|
47
|
+
p2, # pssm
|
|
48
|
+
index=True,
|
|
49
|
+
):
|
|
50
|
+
"p1 and p2 are two arrays (df or np) with index as aa and column as position"
|
|
51
|
+
if p1.shape != p2.shape: raise ValueError("Shapes of p1 and p2 must match.")
|
|
52
|
+
p1, p2 = p1.align(p2, join='inner', axis=None)
|
|
53
|
+
if index: positions=p1.columns
|
|
54
|
+
valid = (p1 + p2) > 0
|
|
55
|
+
p1 = np.where(valid, p1, 0.0)
|
|
56
|
+
p2 = np.where(valid, p2, 0.0)
|
|
57
|
+
|
|
58
|
+
m = 0.5 * (p1 + p2)
|
|
59
|
+
|
|
60
|
+
js = 0.5 * np.sum(p1 * np.log((p1+ EPSILON) / (m + EPSILON)), axis=0) + \
|
|
61
|
+
0.5 * np.sum(p2 * np.log((p2+ EPSILON) / (m + EPSILON)), axis=0)
|
|
62
|
+
return pd.Series(js,index=positions) if index else js
|
|
63
|
+
|
|
64
|
+
# %% ../nbs/06_compare.ipynb #37553737-13b3-4461-ad93-fe4cf863f25b
|
|
65
|
+
def js_divergence_flat(p1_flat, # pd.Series of flattened pssm
|
|
66
|
+
p2_flat, # pd.Series of flattened pssm
|
|
67
|
+
):
|
|
68
|
+
|
|
69
|
+
"p1 and p2 are two flattened pd.Series with index as aa and column as position"
|
|
70
|
+
js = js_divergence(p1_flat,p2_flat,index=False)
|
|
71
|
+
total_position = len(p1_flat.index.str.extract(r'(-?\d+)').drop_duplicates())
|
|
72
|
+
return float(js/total_position)
|
|
73
|
+
|
|
74
|
+
# %% ../nbs/06_compare.ipynb #7e480e54-c5de-4726-b208-c531e07a2adc
|
|
75
|
+
def js_similarity(pssm1,pssm2):
|
|
76
|
+
"Convert JSD to bits to be in range (0,1) then 1-JSD."
|
|
77
|
+
distance = js_divergence(pssm1,pssm2)/np.log(2)
|
|
78
|
+
similarity = 1-distance
|
|
79
|
+
return similarity
|
|
80
|
+
|
|
81
|
+
# %% ../nbs/06_compare.ipynb #0c51e8b5-df29-431b-8013-ff46388b4872
|
|
82
|
+
def js_similarity_flat(p1_flat,p2_flat):
|
|
83
|
+
"Convert JSD to bits to be in range (0,1) then 1-JSD. "
|
|
84
|
+
return 1-(js_divergence_flat(p1_flat,p2_flat)/np.log(2))
|
|
85
|
+
|
|
86
|
+
# %% ../nbs/06_compare.ipynb #8d2e228a-8543-4174-8c24-ce6ded317c8d
|
|
87
|
+
def cosine_similarity(pssm1: pd.DataFrame, pssm2: pd.DataFrame) -> pd.Series:
|
|
88
|
+
"Compute cosine similarity per position (column) between two PSSMs."
|
|
89
|
+
|
|
90
|
+
if pssm1.shape != pssm2.shape: raise ValueError("PSSMs must have the same shape")
|
|
91
|
+
|
|
92
|
+
sims = {}
|
|
93
|
+
for pos in pssm1.columns:
|
|
94
|
+
v1 = pssm1[pos]
|
|
95
|
+
v2 = pssm2[pos]
|
|
96
|
+
v1,v2 = v1.align(v2, join='inner') # make sure the aa index match with each other
|
|
97
|
+
|
|
98
|
+
norm1 = np.linalg.norm(v1)
|
|
99
|
+
norm2 = np.linalg.norm(v2)
|
|
100
|
+
|
|
101
|
+
if norm1 == 0 or norm2 == 0:
|
|
102
|
+
sims[pos] = 0.0
|
|
103
|
+
else:
|
|
104
|
+
dot_product = np.dot(v1,v2) # sum(v1*v2)
|
|
105
|
+
sims[pos] = dot_product / (norm1 * norm2)
|
|
106
|
+
|
|
107
|
+
return pd.Series(sims)
|
|
108
|
+
|
|
109
|
+
# %% ../nbs/06_compare.ipynb #d830fbaa-4a9f-4d5d-98ba-289fc91bff8e
|
|
110
|
+
def cosine_overall_flat(pssm1_flat, pssm2_flat):
|
|
111
|
+
"""Compute overall cosine similarity between two PSSMs (flattened)."""
|
|
112
|
+
# match index for dot product
|
|
113
|
+
pssm1_flat, pssm2_flat = pssm1_flat.align(pssm2_flat, join='inner')
|
|
114
|
+
norm1 = np.linalg.norm(pssm1_flat)
|
|
115
|
+
norm2 = np.linalg.norm(pssm2_flat)
|
|
116
|
+
if norm1 == 0 or norm2 == 0: return 0.0
|
|
117
|
+
dot_product = sum(pssm1_flat*pssm2_flat) # np.dot(pssm1_flat, pssm2_flat)
|
|
118
|
+
return dot_product/ (norm1 * norm2)
|