python-katlas 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
katlas/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.2.0"
1
+ __version__ = "0.2.2"
katlas/_modidx.py CHANGED
@@ -5,24 +5,21 @@ d = { 'settings': { 'branch': 'main',
5
5
  'doc_host': 'https://sky1ove.github.io',
6
6
  'git_url': 'https://github.com/sky1ove/katlas',
7
7
  'lib_path': 'katlas'},
8
- 'syms': { 'katlas.clustering': { 'katlas.clustering.get_1d_distance': ('hierarchical.html#get_1d_distance', 'katlas/clustering.py'),
9
- 'katlas.clustering.get_1d_distance_parallel': ( 'hierarchical.html#get_1d_distance_parallel',
10
- 'katlas/clustering.py'),
11
- 'katlas.clustering.get_1d_js': ('hierarchical.html#get_1d_js', 'katlas/clustering.py'),
12
- 'katlas.clustering.get_1d_js_parallel': ('hierarchical.html#get_1d_js_parallel', 'katlas/clustering.py'),
13
- 'katlas.clustering.get_Z': ('hierarchical.html#get_z', 'katlas/clustering.py'),
14
- 'katlas.clustering.get_distance': ('hierarchical.html#get_distance', 'katlas/clustering.py'),
15
- 'katlas.clustering.get_pssm_seq_labels': ( 'hierarchical.html#get_pssm_seq_labels',
16
- 'katlas/clustering.py'),
17
- 'katlas.clustering.plot_dendrogram': ('hierarchical.html#plot_dendrogram', 'katlas/clustering.py'),
18
- 'katlas.clustering.pssm_to_seq': ('hierarchical.html#pssm_to_seq', 'katlas/clustering.py')},
19
- 'katlas.common': {},
20
- 'katlas.core': {},
8
+ 'syms': { 'katlas.common': {},
9
+ 'katlas.compare': { 'katlas.compare.cosine_overall_flat': ('compare.html#cosine_overall_flat', 'katlas/compare.py'),
10
+ 'katlas.compare.cosine_similarity': ('compare.html#cosine_similarity', 'katlas/compare.py'),
11
+ 'katlas.compare.js_divergence': ('compare.html#js_divergence', 'katlas/compare.py'),
12
+ 'katlas.compare.js_divergence_flat': ('compare.html#js_divergence_flat', 'katlas/compare.py'),
13
+ 'katlas.compare.js_similarity': ('compare.html#js_similarity', 'katlas/compare.py'),
14
+ 'katlas.compare.js_similarity_flat': ('compare.html#js_similarity_flat', 'katlas/compare.py'),
15
+ 'katlas.compare.kl_divergence': ('compare.html#kl_divergence', 'katlas/compare.py'),
16
+ 'katlas.compare.kl_divergence_flat': ('compare.html#kl_divergence_flat', 'katlas/compare.py')},
21
17
  'katlas.data': { 'katlas.data.CPTAC': ('data.html#cptac', 'katlas/data.py'),
22
18
  'katlas.data.CPTAC._read_file': ('data.html#cptac._read_file', 'katlas/data.py'),
23
19
  'katlas.data.CPTAC.get_id': ('data.html#cptac.get_id', 'katlas/data.py'),
24
20
  'katlas.data.CPTAC.list_cancer': ('data.html#cptac.list_cancer', 'katlas/data.py'),
25
21
  'katlas.data.Data': ('data.html#data', 'katlas/data.py'),
22
+ 'katlas.data.Data.clear_cache': ('data.html#data.clear_cache', 'katlas/data.py'),
26
23
  'katlas.data.Data.download': ('data.html#data.download', 'katlas/data.py'),
27
24
  'katlas.data.Data.get_aa_info': ('data.html#data.get_aa_info', 'katlas/data.py'),
28
25
  'katlas.data.Data.get_aa_morgan': ('data.html#data.get_aa_morgan', 'katlas/data.py'),
@@ -48,6 +45,7 @@ d = { 'settings': { 'branch': 'main',
48
45
  'katlas.data.Data.get_ochoa_site': ('data.html#data.get_ochoa_site', 'katlas/data.py'),
49
46
  'katlas.data.Data.get_psp_human_site': ('data.html#data.get_psp_human_site', 'katlas/data.py'),
50
47
  'katlas.data.Data.get_pspa': ('data.html#data.get_pspa', 'katlas/data.py'),
48
+ 'katlas.data.Data.get_pspa_raw': ('data.html#data.get_pspa_raw', 'katlas/data.py'),
51
49
  'katlas.data.Data.get_pspa_scale': ('data.html#data.get_pspa_scale', 'katlas/data.py'),
52
50
  'katlas.data.Data.get_pspa_st': ('data.html#data.get_pspa_st', 'katlas/data.py'),
53
51
  'katlas.data.Data.get_pspa_st_pct': ('data.html#data.get_pspa_st_pct', 'katlas/data.py'),
@@ -55,162 +53,89 @@ d = { 'settings': { 'branch': 'main',
55
53
  'katlas.data.Data.get_pspa_tyr_pct': ('data.html#data.get_pspa_tyr_pct', 'katlas/data.py'),
56
54
  'katlas.data.Data.get_reactome_pathway': ('data.html#data.get_reactome_pathway', 'katlas/data.py'),
57
55
  'katlas.data.Data.get_reactome_pathway_lo': ('data.html#data.get_reactome_pathway_lo', 'katlas/data.py'),
58
- 'katlas.data.Data.read_file': ('data.html#data.read_file', 'katlas/data.py')},
59
- 'katlas.dnn': { 'katlas.dnn.CE': ('dnn.html#ce', 'katlas/dnn.py'),
60
- 'katlas.dnn.CNN1D': ('dnn.html#cnn1d', 'katlas/dnn.py'),
61
- 'katlas.dnn.CNN1D.__init__': ('dnn.html#cnn1d.__init__', 'katlas/dnn.py'),
62
- 'katlas.dnn.CNN1D.forward': ('dnn.html#cnn1d.forward', 'katlas/dnn.py'),
63
- 'katlas.dnn.GeneralDataset': ('dnn.html#generaldataset', 'katlas/dnn.py'),
64
- 'katlas.dnn.GeneralDataset.__getitem__': ('dnn.html#generaldataset.__getitem__', 'katlas/dnn.py'),
65
- 'katlas.dnn.GeneralDataset.__init__': ('dnn.html#generaldataset.__init__', 'katlas/dnn.py'),
66
- 'katlas.dnn.GeneralDataset.__len__': ('dnn.html#generaldataset.__len__', 'katlas/dnn.py'),
67
- 'katlas.dnn.JSD': ('dnn.html#jsd', 'katlas/dnn.py'),
68
- 'katlas.dnn.KLD': ('dnn.html#kld', 'katlas/dnn.py'),
69
- 'katlas.dnn.MLP': ('dnn.html#mlp', 'katlas/dnn.py'),
70
- 'katlas.dnn.PSSM_model': ('dnn.html#pssm_model', 'katlas/dnn.py'),
71
- 'katlas.dnn.PSSM_model.__init__': ('dnn.html#pssm_model.__init__', 'katlas/dnn.py'),
72
- 'katlas.dnn.PSSM_model.forward': ('dnn.html#pssm_model.forward', 'katlas/dnn.py'),
73
- 'katlas.dnn.conv_wn': ('dnn.html#conv_wn', 'katlas/dnn.py'),
74
- 'katlas.dnn.init_weights': ('dnn.html#init_weights', 'katlas/dnn.py'),
75
- 'katlas.dnn.lin_wn': ('dnn.html#lin_wn', 'katlas/dnn.py'),
76
- 'katlas.dnn.predict_dl': ('dnn.html#predict_dl', 'katlas/dnn.py'),
77
- 'katlas.dnn.seed_everything': ('dnn.html#seed_everything', 'katlas/dnn.py'),
78
- 'katlas.dnn.train_dl': ('dnn.html#train_dl', 'katlas/dnn.py'),
79
- 'katlas.dnn.train_dl_cv': ('dnn.html#train_dl_cv', 'katlas/dnn.py')},
80
- 'katlas.feature': { 'katlas.feature.filter_range_columns': ('feature.html#filter_range_columns', 'katlas/feature.py'),
81
- 'katlas.feature.get_clusters_elbow': ('feature.html#get_clusters_elbow', 'katlas/feature.py'),
82
- 'katlas.feature.get_esm': ('feature.html#get_esm', 'katlas/feature.py'),
83
- 'katlas.feature.get_morgan': ('feature.html#get_morgan', 'katlas/feature.py'),
84
- 'katlas.feature.get_rdkit': ('feature.html#get_rdkit', 'katlas/feature.py'),
85
- 'katlas.feature.get_rdkit_3d': ('feature.html#get_rdkit_3d', 'katlas/feature.py'),
86
- 'katlas.feature.get_rdkit_all': ('feature.html#get_rdkit_all', 'katlas/feature.py'),
87
- 'katlas.feature.get_rdkit_df': ('feature.html#get_rdkit_df', 'katlas/feature.py'),
88
- 'katlas.feature.get_t5': ('feature.html#get_t5', 'katlas/feature.py'),
89
- 'katlas.feature.get_t5_bfd': ('feature.html#get_t5_bfd', 'katlas/feature.py'),
90
- 'katlas.feature.kmeans': ('feature.html#kmeans', 'katlas/feature.py'),
91
- 'katlas.feature.onehot_encode': ('feature.html#onehot_encode', 'katlas/feature.py'),
92
- 'katlas.feature.onehot_encode_df': ('feature.html#onehot_encode_df', 'katlas/feature.py'),
93
- 'katlas.feature.preprocess': ('feature.html#preprocess', 'katlas/feature.py'),
94
- 'katlas.feature.remove_hi_corr': ('feature.html#remove_hi_corr', 'katlas/feature.py'),
95
- 'katlas.feature.standardize': ('feature.html#standardize', 'katlas/feature.py')},
56
+ 'katlas.data.Data.read_file': ('data.html#data.read_file', 'katlas/data.py'),
57
+ 'katlas.data._default_dataset_dir': ('data.html#_default_dataset_dir', 'katlas/data.py'),
58
+ 'katlas.data._normalize_dataset_dir': ('data.html#_normalize_dataset_dir', 'katlas/data.py'),
59
+ 'katlas.data._normalize_required_files': ('data.html#_normalize_required_files', 'katlas/data.py'),
60
+ 'katlas.data._read_dataset_file_cached': ('data.html#_read_dataset_file_cached', 'katlas/data.py')},
61
+ 'katlas.hierarchical': { 'katlas.hierarchical.get_1d_js': ('hierarchical.html#get_1d_js', 'katlas/hierarchical.py'),
62
+ 'katlas.hierarchical.get_1d_js_parallel': ( 'hierarchical.html#get_1d_js_parallel',
63
+ 'katlas/hierarchical.py')},
64
+ 'katlas.lo': { 'katlas.lo.get_pssm_LO': ('lo.html#get_pssm_lo', 'katlas/lo.py'),
65
+ 'katlas.lo.get_pssm_LO_flat': ('lo.html#get_pssm_lo_flat', 'katlas/lo.py'),
66
+ 'katlas.lo.plot_logo_LO': ('lo.html#plot_logo_lo', 'katlas/lo.py'),
67
+ 'katlas.lo.plot_logo_heatmap_LO': ('lo.html#plot_logo_heatmap_lo', 'katlas/lo.py')},
96
68
  'katlas.pathway': { 'katlas.pathway.add_reactome_ref': ('pathway.html#add_reactome_ref', 'katlas/pathway.py'),
97
69
  'katlas.pathway.get_overlap': ('pathway.html#get_overlap', 'katlas/pathway.py'),
98
70
  'katlas.pathway.get_reactome': ('pathway.html#get_reactome', 'katlas/pathway.py'),
99
71
  'katlas.pathway.get_reactome_raw': ('pathway.html#get_reactome_raw', 'katlas/pathway.py'),
100
72
  'katlas.pathway.plot_path': ('pathway.html#plot_path', 'katlas/pathway.py'),
101
73
  'katlas.pathway.query_reactome': ('pathway.html#query_reactome', 'katlas/pathway.py')},
102
- 'katlas.plot': { 'katlas.plot.add_stats': ('plot.html#add_stats', 'katlas/plot.py'),
103
- 'katlas.plot.calculate_pct': ('plot.html#calculate_pct', 'katlas/plot.py'),
104
- 'katlas.plot.get_AUCDF': ('plot.html#get_aucdf', 'katlas/plot.py'),
105
- 'katlas.plot.get_color_dict': ('plot.html#get_color_dict', 'katlas/plot.py'),
106
- 'katlas.plot.get_hue_big': ('plot.html#get_hue_big', 'katlas/plot.py'),
107
- 'katlas.plot.get_plt_color': ('plot.html#get_plt_color', 'katlas/plot.py'),
108
- 'katlas.plot.get_similarity': ('plot.html#get_similarity', 'katlas/plot.py'),
109
- 'katlas.plot.get_subfamily_color': ('plot.html#get_subfamily_color', 'katlas/plot.py'),
110
- 'katlas.plot.plot_2d': ('plot.html#plot_2d', 'katlas/plot.py'),
111
- 'katlas.plot.plot_bar': ('plot.html#plot_bar', 'katlas/plot.py'),
112
- 'katlas.plot.plot_bokeh': ('plot.html#plot_bokeh', 'katlas/plot.py'),
113
- 'katlas.plot.plot_box': ('plot.html#plot_box', 'katlas/plot.py'),
114
- 'katlas.plot.plot_cluster': ('plot.html#plot_cluster', 'katlas/plot.py'),
115
- 'katlas.plot.plot_cnt': ('plot.html#plot_cnt', 'katlas/plot.py'),
116
- 'katlas.plot.plot_composition': ('plot.html#plot_composition', 'katlas/plot.py'),
117
- 'katlas.plot.plot_confusion_matrix': ('plot.html#plot_confusion_matrix', 'katlas/plot.py'),
118
- 'katlas.plot.plot_corr': ('plot.html#plot_corr', 'katlas/plot.py'),
119
- 'katlas.plot.plot_count': ('plot.html#plot_count', 'katlas/plot.py'),
120
- 'katlas.plot.plot_group_bar': ('plot.html#plot_group_bar', 'katlas/plot.py'),
121
- 'katlas.plot.plot_hist': ('plot.html#plot_hist', 'katlas/plot.py'),
122
- 'katlas.plot.plot_pie': ('plot.html#plot_pie', 'katlas/plot.py'),
123
- 'katlas.plot.plot_rank': ('plot.html#plot_rank', 'katlas/plot.py'),
124
- 'katlas.plot.plot_rel': ('plot.html#plot_rel', 'katlas/plot.py'),
125
- 'katlas.plot.plot_stacked': ('plot.html#plot_stacked', 'katlas/plot.py'),
126
- 'katlas.plot.plot_violin': ('plot.html#plot_violin', 'katlas/plot.py'),
127
- 'katlas.plot.reduce_feature': ('plot.html#reduce_feature', 'katlas/plot.py'),
128
- 'katlas.plot.save_pdf': ('plot.html#save_pdf', 'katlas/plot.py'),
129
- 'katlas.plot.save_show': ('plot.html#save_show', 'katlas/plot.py'),
130
- 'katlas.plot.save_svg': ('plot.html#save_svg', 'katlas/plot.py'),
131
- 'katlas.plot.set_sns': ('plot.html#set_sns', 'katlas/plot.py')},
74
+ 'katlas.plot': { 'katlas.plot.change_center_name': ('plot.html#change_center_name', 'katlas/plot.py'),
75
+ 'katlas.plot.convert_logo_df': ('plot.html#convert_logo_df', 'katlas/plot.py'),
76
+ 'katlas.plot.get_logo_IC': ('plot.html#get_logo_ic', 'katlas/plot.py'),
77
+ 'katlas.plot.get_pos_min_max': ('plot.html#get_pos_min_max', 'katlas/plot.py'),
78
+ 'katlas.plot.pSTY2sty': ('plot.html#psty2sty', 'katlas/plot.py'),
79
+ 'katlas.plot.plot_heatmap': ('plot.html#plot_heatmap', 'katlas/plot.py'),
80
+ 'katlas.plot.plot_heatmap_simple': ('plot.html#plot_heatmap_simple', 'katlas/plot.py'),
81
+ 'katlas.plot.plot_logo': ('plot.html#plot_logo', 'katlas/plot.py'),
82
+ 'katlas.plot.plot_logo_heatmap': ('plot.html#plot_logo_heatmap', 'katlas/plot.py'),
83
+ 'katlas.plot.plot_logo_raw': ('plot.html#plot_logo_raw', 'katlas/plot.py'),
84
+ 'katlas.plot.plot_logos': ('plot.html#plot_logos', 'katlas/plot.py'),
85
+ 'katlas.plot.plot_logos_idx': ('plot.html#plot_logos_idx', 'katlas/plot.py'),
86
+ 'katlas.plot.plot_two_heatmaps': ('plot.html#plot_two_heatmaps', 'katlas/plot.py'),
87
+ 'katlas.plot.scale_pos_neg_values': ('plot.html#scale_pos_neg_values', 'katlas/plot.py'),
88
+ 'katlas.plot.scale_zero_position': ('plot.html#scale_zero_position', 'katlas/plot.py'),
89
+ 'katlas.plot.sty2pSTY': ('plot.html#sty2psty', 'katlas/plot.py'),
90
+ 'katlas.plot.sty2pSTY_df': ('plot.html#sty2psty_df', 'katlas/plot.py')},
91
+ 'katlas.pspa': { 'katlas.pspa.get_logo': ('pspa.html#get_logo', 'katlas/pspa.py'),
92
+ 'katlas.pspa.get_one_kinase': ('pspa.html#get_one_kinase', 'katlas/pspa.py'),
93
+ 'katlas.pspa.plot_logo_heatmap_pspa': ('pspa.html#plot_logo_heatmap_pspa', 'katlas/pspa.py'),
94
+ 'katlas.pspa.plot_logo_pspa': ('pspa.html#plot_logo_pspa', 'katlas/pspa.py'),
95
+ 'katlas.pspa.preprocess_pspa': ('pspa.html#preprocess_pspa', 'katlas/pspa.py'),
96
+ 'katlas.pspa.raw2norm': ('pspa.html#raw2norm', 'katlas/pspa.py')},
132
97
  'katlas.pssm': { 'katlas.pssm._clean_zero': ('pssm.html#_clean_zero', 'katlas/pssm.py'),
133
- 'katlas.pssm.change_center_name': ('pssm.html#change_center_name', 'katlas/pssm.py'),
134
98
  'katlas.pssm.clean_zero_normalize': ('pssm.html#clean_zero_normalize', 'katlas/pssm.py'),
135
- 'katlas.pssm.convert_logo_df': ('pssm.html#convert_logo_df', 'katlas/pssm.py'),
136
- 'katlas.pssm.cosine_overall_flat': ('pssm.html#cosine_overall_flat', 'katlas/pssm.py'),
137
- 'katlas.pssm.cosine_similarity': ('pssm.html#cosine_similarity', 'katlas/pssm.py'),
138
99
  'katlas.pssm.flatten_pssm': ('pssm.html#flatten_pssm', 'katlas/pssm.py'),
139
100
  'katlas.pssm.get_IC': ('pssm.html#get_ic', 'katlas/pssm.py'),
140
101
  'katlas.pssm.get_IC_flat': ('pssm.html#get_ic_flat', 'katlas/pssm.py'),
141
102
  'katlas.pssm.get_cluster_pssms': ('pssm.html#get_cluster_pssms', 'katlas/pssm.py'),
142
103
  'katlas.pssm.get_entropy': ('pssm.html#get_entropy', 'katlas/pssm.py'),
143
104
  'katlas.pssm.get_entropy_flat': ('pssm.html#get_entropy_flat', 'katlas/pssm.py'),
144
- 'katlas.pssm.get_logo': ('pssm.html#get_logo', 'katlas/pssm.py'),
145
- 'katlas.pssm.get_logo_IC': ('pssm.html#get_logo_ic', 'katlas/pssm.py'),
146
- 'katlas.pssm.get_one_kinase': ('pssm.html#get_one_kinase', 'katlas/pssm.py'),
147
- 'katlas.pssm.get_pos_min_max': ('pssm.html#get_pos_min_max', 'katlas/pssm.py'),
148
105
  'katlas.pssm.get_prob': ('pssm.html#get_prob', 'katlas/pssm.py'),
149
- 'katlas.pssm.get_pssm_LO': ('pssm.html#get_pssm_lo', 'katlas/pssm.py'),
150
- 'katlas.pssm.get_pssm_LO_flat': ('pssm.html#get_pssm_lo_flat', 'katlas/pssm.py'),
106
+ 'katlas.pssm.get_pssm_seq_labels': ('pssm.html#get_pssm_seq_labels', 'katlas/pssm.py'),
107
+ 'katlas.pssm.get_pssm_weight': ('pssm.html#get_pssm_weight', 'katlas/pssm.py'),
151
108
  'katlas.pssm.get_specificity': ('pssm.html#get_specificity', 'katlas/pssm.py'),
152
109
  'katlas.pssm.get_specificity_flat': ('pssm.html#get_specificity_flat', 'katlas/pssm.py'),
153
- 'katlas.pssm.js_divergence': ('pssm.html#js_divergence', 'katlas/pssm.py'),
154
- 'katlas.pssm.js_divergence_flat': ('pssm.html#js_divergence_flat', 'katlas/pssm.py'),
155
- 'katlas.pssm.js_similarity': ('pssm.html#js_similarity', 'katlas/pssm.py'),
156
- 'katlas.pssm.js_similarity_flat': ('pssm.html#js_similarity_flat', 'katlas/pssm.py'),
157
- 'katlas.pssm.kl_divergence': ('pssm.html#kl_divergence', 'katlas/pssm.py'),
158
- 'katlas.pssm.kl_divergence_flat': ('pssm.html#kl_divergence_flat', 'katlas/pssm.py'),
159
- 'katlas.pssm.plot_heatmap': ('pssm.html#plot_heatmap', 'katlas/pssm.py'),
160
- 'katlas.pssm.plot_heatmap_simple': ('pssm.html#plot_heatmap_simple', 'katlas/pssm.py'),
161
- 'katlas.pssm.plot_logo': ('pssm.html#plot_logo', 'katlas/pssm.py'),
162
- 'katlas.pssm.plot_logo_LO': ('pssm.html#plot_logo_lo', 'katlas/pssm.py'),
163
- 'katlas.pssm.plot_logo_heatmap': ('pssm.html#plot_logo_heatmap', 'katlas/pssm.py'),
164
- 'katlas.pssm.plot_logo_heatmap_LO': ('pssm.html#plot_logo_heatmap_lo', 'katlas/pssm.py'),
165
- 'katlas.pssm.plot_logo_heatmap_pspa': ('pssm.html#plot_logo_heatmap_pspa', 'katlas/pssm.py'),
166
- 'katlas.pssm.plot_logo_pspa': ('pssm.html#plot_logo_pspa', 'katlas/pssm.py'),
167
- 'katlas.pssm.plot_logo_raw': ('pssm.html#plot_logo_raw', 'katlas/pssm.py'),
168
- 'katlas.pssm.plot_logos': ('pssm.html#plot_logos', 'katlas/pssm.py'),
169
- 'katlas.pssm.plot_logos_idx': ('pssm.html#plot_logos_idx', 'katlas/pssm.py'),
170
- 'katlas.pssm.plot_two_heatmaps': ('pssm.html#plot_two_heatmaps', 'katlas/pssm.py'),
171
- 'katlas.pssm.preprocess_pspa': ('pssm.html#preprocess_pspa', 'katlas/pssm.py'),
172
- 'katlas.pssm.raw2norm': ('pssm.html#raw2norm', 'katlas/pssm.py'),
173
- 'katlas.pssm.recover_pssm': ('pssm.html#recover_pssm', 'katlas/pssm.py'),
174
- 'katlas.pssm.scale_pos_neg_values': ('pssm.html#scale_pos_neg_values', 'katlas/pssm.py'),
175
- 'katlas.pssm.scale_zero_position': ('pssm.html#scale_zero_position', 'katlas/pssm.py'),
176
- 'katlas.pssm.sty2pSTY_df': ('pssm.html#sty2psty_df', 'katlas/pssm.py')},
177
- 'katlas.score': { 'katlas.score.Params': ('scoring.html#params', 'katlas/score.py'),
178
- 'katlas.score.STY2sty': ('scoring.html#sty2sty', 'katlas/score.py'),
179
- 'katlas.score.cut_seq': ('scoring.html#cut_seq', 'katlas/score.py'),
180
- 'katlas.score.duplicate_ref_zero': ('scoring.html#duplicate_ref_zero', 'katlas/score.py'),
181
- 'katlas.score.get_dict': ('scoring.html#get_dict', 'katlas/score.py'),
182
- 'katlas.score.get_pct': ('scoring.html#get_pct', 'katlas/score.py'),
183
- 'katlas.score.get_pct_df': ('scoring.html#get_pct_df', 'katlas/score.py'),
184
- 'katlas.score.multiply': ('scoring.html#multiply', 'katlas/score.py'),
185
- 'katlas.score.multiply_generic': ('scoring.html#multiply_generic', 'katlas/score.py'),
186
- 'katlas.score.multiply_pspa': ('scoring.html#multiply_pspa', 'katlas/score.py'),
187
- 'katlas.score.predict_kinase': ('scoring.html#predict_kinase', 'katlas/score.py'),
188
- 'katlas.score.predict_kinase_df': ('scoring.html#predict_kinase_df', 'katlas/score.py'),
189
- 'katlas.score.preprocess_ref': ('scoring.html#preprocess_ref', 'katlas/score.py'),
190
- 'katlas.score.sumup': ('scoring.html#sumup', 'katlas/score.py')},
191
- 'katlas.statistics': { 'katlas.statistics.get_metaP': ('statistics.html#get_metap', 'katlas/statistics.py'),
192
- 'katlas.statistics.get_pvalue': ('statistics.html#get_pvalue', 'katlas/statistics.py')},
193
- 'katlas.train': { 'katlas.train.calculate_ce': ('ml.html#calculate_ce', 'katlas/train.py'),
194
- 'katlas.train.get_score': ('ml.html#get_score', 'katlas/train.py'),
195
- 'katlas.train.get_splits': ('ml.html#get_splits', 'katlas/train.py'),
196
- 'katlas.train.post_process': ('ml.html#post_process', 'katlas/train.py'),
197
- 'katlas.train.post_process_oof': ('ml.html#post_process_oof', 'katlas/train.py'),
198
- 'katlas.train.predict_ml': ('ml.html#predict_ml', 'katlas/train.py'),
199
- 'katlas.train.split_data': ('ml.html#split_data', 'katlas/train.py'),
200
- 'katlas.train.train_ml': ('ml.html#train_ml', 'katlas/train.py'),
201
- 'katlas.train.train_ml_cv': ('ml.html#train_ml_cv', 'katlas/train.py')},
110
+ 'katlas.pssm.pssm_to_seq': ('pssm.html#pssm_to_seq', 'katlas/pssm.py'),
111
+ 'katlas.pssm.recover_pssm': ('pssm.html#recover_pssm', 'katlas/pssm.py')},
112
+ 'katlas.scoring': { 'katlas.scoring.Params': ('scoring.html#params', 'katlas/scoring.py'),
113
+ 'katlas.scoring.STY2sty': ('scoring.html#sty2sty', 'katlas/scoring.py'),
114
+ 'katlas.scoring.cut_seq': ('scoring.html#cut_seq', 'katlas/scoring.py'),
115
+ 'katlas.scoring.duplicate_ref_zero': ('scoring.html#duplicate_ref_zero', 'katlas/scoring.py'),
116
+ 'katlas.scoring.get_dict': ('scoring.html#get_dict', 'katlas/scoring.py'),
117
+ 'katlas.scoring.get_pct': ('scoring.html#get_pct', 'katlas/scoring.py'),
118
+ 'katlas.scoring.get_pct_df': ('scoring.html#get_pct_df', 'katlas/scoring.py'),
119
+ 'katlas.scoring.multiply': ('scoring.html#multiply', 'katlas/scoring.py'),
120
+ 'katlas.scoring.multiply_generic': ('scoring.html#multiply_generic', 'katlas/scoring.py'),
121
+ 'katlas.scoring.multiply_pspa': ('scoring.html#multiply_pspa', 'katlas/scoring.py'),
122
+ 'katlas.scoring.predict_kinase': ('scoring.html#predict_kinase', 'katlas/scoring.py'),
123
+ 'katlas.scoring.predict_kinase_df': ('scoring.html#predict_kinase_df', 'katlas/scoring.py'),
124
+ 'katlas.scoring.preprocess_ref': ('scoring.html#preprocess_ref', 'katlas/scoring.py'),
125
+ 'katlas.scoring.sumup': ('scoring.html#sumup', 'katlas/scoring.py')},
202
126
  'katlas.utils': { 'katlas.utils.aln2df': ('utils.html#aln2df', 'katlas/utils.py'),
203
127
  'katlas.utils.check_seq': ('utils.html#check_seq', 'katlas/utils.py'),
204
128
  'katlas.utils.check_seqs': ('utils.html#check_seqs', 'katlas/utils.py'),
129
+ 'katlas.utils.clean_feat': ('utils.html#clean_feat', 'katlas/utils.py'),
205
130
  'katlas.utils.extract_site_seq': ('utils.html#extract_site_seq', 'katlas/utils.py'),
206
131
  'katlas.utils.get_aln_freq': ('utils.html#get_aln_freq', 'katlas/utils.py'),
207
- 'katlas.utils.get_diff': ('utils.html#get_diff', 'katlas/utils.py'),
208
132
  'katlas.utils.get_fasta': ('utils.html#get_fasta', 'katlas/utils.py'),
209
- 'katlas.utils.pSTY2sty': ('utils.html#psty2sty', 'katlas/utils.py'),
133
+ 'katlas.utils.get_subfamily_color': ('utils.html#get_subfamily_color', 'katlas/utils.py'),
210
134
  'katlas.utils.phosphorylate_seq': ('utils.html#phosphorylate_seq', 'katlas/utils.py'),
211
135
  'katlas.utils.phosphorylate_seq_df': ('utils.html#phosphorylate_seq_df', 'katlas/utils.py'),
212
136
  'katlas.utils.prepare_path': ('utils.html#prepare_path', 'katlas/utils.py'),
137
+ 'katlas.utils.remove_hi_corr': ('utils.html#remove_hi_corr', 'katlas/utils.py'),
213
138
  'katlas.utils.run_clustalo': ('utils.html#run_clustalo', 'katlas/utils.py'),
214
- 'katlas.utils.sty2pSTY': ('utils.html#sty2psty', 'katlas/utils.py'),
139
+ 'katlas.utils.standardize': ('utils.html#standardize', 'katlas/utils.py'),
215
140
  'katlas.utils.validate_site': ('utils.html#validate_site', 'katlas/utils.py'),
216
141
  'katlas.utils.validate_site_df': ('utils.html#validate_site_df', 'katlas/utils.py')}}}
katlas/common.py CHANGED
@@ -1,4 +1,10 @@
1
+ from .compare import *
1
2
  from .data import *
3
+ from .hierarchical import *
4
+ from .lo import *
5
+ from .pathway import *
6
+ from .plot import *
7
+ from .pspa import *
2
8
  from .pssm import *
3
- from .score import *
4
- from .utils import *
9
+ from .scoring import *
10
+ from .utils import *
katlas/compare.py ADDED
@@ -0,0 +1,118 @@
1
+ """algorithms about comparing two PSSMs"""
2
+
3
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/06_compare.ipynb.
4
+
5
+ # %% auto #0
6
+ __all__ = ['kl_divergence', 'kl_divergence_flat', 'js_divergence', 'js_divergence_flat', 'js_similarity', 'js_similarity_flat',
7
+ 'cosine_similarity', 'cosine_overall_flat']
8
+
9
+ # %% ../nbs/06_compare.ipynb #76949d54-f519-4bc6-9da5-d48a05bbfe69
10
+ import numpy as np, pandas as pd
11
+ from .pssm import EPSILON
12
+
13
+ # %% ../nbs/06_compare.ipynb #b1ef2ee7-70fd-4703-96b4-1c1a9053987f
14
+ def kl_divergence(p1, # target pssm p (array-like, shape: (AA, positions))
15
+ p2, # pred pssm q (array-like, same shape as p1)
16
+ ):
17
+ """
18
+ KL divergence D_KL(p1 || p2) over positions.
19
+
20
+ p1 and p2 are arrays (df or np) with index as aa and column as position.
21
+ Returns average divergence across positions if mean=True, else per-position.
22
+ """
23
+ if p1.shape != p2.shape: raise ValueError("Shapes of p1 and p2 must match.")
24
+ p1, p2 = p1.align(p2, join='inner', axis=None)
25
+ # Mask invalid positions (both zero)
26
+ valid = (p1 + p2) > 0
27
+ p1 = np.where(valid, p1, 0.0)
28
+ p2 = np.where(valid, p2, 0.0)
29
+
30
+ # KL divergence: sum_x p1(x) log(p1(x)/p2(x))
31
+ kl = np.sum(p1 * np.log((p1 + EPSILON) / (p2 + EPSILON)), axis=0)
32
+
33
+ return kl
34
+
35
+ # %% ../nbs/06_compare.ipynb #71084bb3-169a-4dc2-b5dd-1b802fda8225
36
+ def kl_divergence_flat(p1_flat, # pd.Series of target flattened pssm p
37
+ p2_flat, # pd.Series of pred flattened pssm q
38
+ ):
39
+
40
+ "p1 and p2 are two flattened pd.Series with index as aa and column as position"
41
+ kld = kl_divergence(p1_flat,p2_flat) # do not do js.mean() because it's 1d
42
+ total_position = len(p1_flat.index.str.extract(r'(-?\d+)').drop_duplicates())
43
+ return float(kld/total_position)
44
+
45
+ # %% ../nbs/06_compare.ipynb #eca99343-fbcd-48c2-a1ff-88af31fd2346
46
+ def js_divergence(p1, # pssm
47
+ p2, # pssm
48
+ index=True,
49
+ ):
50
+ "p1 and p2 are two arrays (df or np) with index as aa and column as position"
51
+ if p1.shape != p2.shape: raise ValueError("Shapes of p1 and p2 must match.")
52
+ p1, p2 = p1.align(p2, join='inner', axis=None)
53
+ if index: positions=p1.columns
54
+ valid = (p1 + p2) > 0
55
+ p1 = np.where(valid, p1, 0.0)
56
+ p2 = np.where(valid, p2, 0.0)
57
+
58
+ m = 0.5 * (p1 + p2)
59
+
60
+ js = 0.5 * np.sum(p1 * np.log((p1+ EPSILON) / (m + EPSILON)), axis=0) + \
61
+ 0.5 * np.sum(p2 * np.log((p2+ EPSILON) / (m + EPSILON)), axis=0)
62
+ return pd.Series(js,index=positions) if index else js
63
+
64
+ # %% ../nbs/06_compare.ipynb #37553737-13b3-4461-ad93-fe4cf863f25b
65
+ def js_divergence_flat(p1_flat, # pd.Series of flattened pssm
66
+ p2_flat, # pd.Series of flattened pssm
67
+ ):
68
+
69
+ "p1 and p2 are two flattened pd.Series with index as aa and column as position"
70
+ js = js_divergence(p1_flat,p2_flat,index=False)
71
+ total_position = len(p1_flat.index.str.extract(r'(-?\d+)').drop_duplicates())
72
+ return float(js/total_position)
73
+
74
+ # %% ../nbs/06_compare.ipynb #7e480e54-c5de-4726-b208-c531e07a2adc
75
+ def js_similarity(pssm1,pssm2):
76
+ "Convert JSD to bits to be in range (0,1) then 1-JSD."
77
+ distance = js_divergence(pssm1,pssm2)/np.log(2)
78
+ similarity = 1-distance
79
+ return similarity
80
+
81
+ # %% ../nbs/06_compare.ipynb #0c51e8b5-df29-431b-8013-ff46388b4872
82
+ def js_similarity_flat(p1_flat,p2_flat):
83
+ "Convert JSD to bits to be in range (0,1) then 1-JSD. "
84
+ return 1-(js_divergence_flat(p1_flat,p2_flat)/np.log(2))
85
+
86
+ # %% ../nbs/06_compare.ipynb #8d2e228a-8543-4174-8c24-ce6ded317c8d
87
+ def cosine_similarity(pssm1: pd.DataFrame, pssm2: pd.DataFrame) -> pd.Series:
88
+ "Compute cosine similarity per position (column) between two PSSMs."
89
+
90
+ if pssm1.shape != pssm2.shape: raise ValueError("PSSMs must have the same shape")
91
+
92
+ sims = {}
93
+ for pos in pssm1.columns:
94
+ v1 = pssm1[pos]
95
+ v2 = pssm2[pos]
96
+ v1,v2 = v1.align(v2, join='inner') # make sure the aa index match with each other
97
+
98
+ norm1 = np.linalg.norm(v1)
99
+ norm2 = np.linalg.norm(v2)
100
+
101
+ if norm1 == 0 or norm2 == 0:
102
+ sims[pos] = 0.0
103
+ else:
104
+ dot_product = np.dot(v1,v2) # sum(v1*v2)
105
+ sims[pos] = dot_product / (norm1 * norm2)
106
+
107
+ return pd.Series(sims)
108
+
109
+ # %% ../nbs/06_compare.ipynb #d830fbaa-4a9f-4d5d-98ba-289fc91bff8e
110
+ def cosine_overall_flat(pssm1_flat, pssm2_flat):
111
+ """Compute overall cosine similarity between two PSSMs (flattened)."""
112
+ # match index for dot product
113
+ pssm1_flat, pssm2_flat = pssm1_flat.align(pssm2_flat, join='inner')
114
+ norm1 = np.linalg.norm(pssm1_flat)
115
+ norm2 = np.linalg.norm(pssm2_flat)
116
+ if norm1 == 0 or norm2 == 0: return 0.0
117
+ dot_product = sum(pssm1_flat*pssm2_flat) # np.dot(pssm1_flat, pssm2_flat)
118
+ return dot_product/ (norm1 * norm2)