python-katlas 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
katlas/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.4"
1
+ __version__ = "0.2.0"
katlas/_modidx.py CHANGED
@@ -5,105 +5,212 @@ d = { 'settings': { 'branch': 'main',
5
5
  'doc_host': 'https://sky1ove.github.io',
6
6
  'git_url': 'https://github.com/sky1ove/katlas',
7
7
  'lib_path': 'katlas'},
8
- 'syms': { 'katlas.core': { 'katlas.core.CPTAC': ('core.html#cptac', 'katlas/core.py'),
9
- 'katlas.core.CPTAC._fetch_data': ('core.html#cptac._fetch_data', 'katlas/core.py'),
10
- 'katlas.core.CPTAC.get_id': ('core.html#cptac.get_id', 'katlas/core.py'),
11
- 'katlas.core.CPTAC.list_cancer': ('core.html#cptac.list_cancer', 'katlas/core.py'),
12
- 'katlas.core.Data': ('core.html#data', 'katlas/core.py'),
13
- 'katlas.core.Data.fetch_data': ('core.html#data.fetch_data', 'katlas/core.py'),
14
- 'katlas.core.Data.get_aa_info': ('core.html#data.get_aa_info', 'katlas/core.py'),
15
- 'katlas.core.Data.get_aa_morgan': ('core.html#data.get_aa_morgan', 'katlas/core.py'),
16
- 'katlas.core.Data.get_aa_rdkit': ('core.html#data.get_aa_rdkit', 'katlas/core.py'),
17
- 'katlas.core.Data.get_cddm': ('core.html#data.get_cddm', 'katlas/core.py'),
18
- 'katlas.core.Data.get_cddm_others': ('core.html#data.get_cddm_others', 'katlas/core.py'),
19
- 'katlas.core.Data.get_cddm_others_info': ('core.html#data.get_cddm_others_info', 'katlas/core.py'),
20
- 'katlas.core.Data.get_cddm_upper': ('core.html#data.get_cddm_upper', 'katlas/core.py'),
21
- 'katlas.core.Data.get_combine': ('core.html#data.get_combine', 'katlas/core.py'),
22
- 'katlas.core.Data.get_combine_site_phosphorylated': ( 'core.html#data.get_combine_site_phosphorylated',
23
- 'katlas/core.py'),
24
- 'katlas.core.Data.get_combine_site_psp_ochoa': ('core.html#data.get_combine_site_psp_ochoa', 'katlas/core.py'),
25
- 'katlas.core.Data.get_cptac_ensembl_site': ('core.html#data.get_cptac_ensembl_site', 'katlas/core.py'),
26
- 'katlas.core.Data.get_cptac_gene_site': ('core.html#data.get_cptac_gene_site', 'katlas/core.py'),
27
- 'katlas.core.Data.get_cptac_unique_site': ('core.html#data.get_cptac_unique_site', 'katlas/core.py'),
28
- 'katlas.core.Data.get_kinase_info': ('core.html#data.get_kinase_info', 'katlas/core.py'),
29
- 'katlas.core.Data.get_ks_dataset': ('core.html#data.get_ks_dataset', 'katlas/core.py'),
30
- 'katlas.core.Data.get_num_dict': ('core.html#data.get_num_dict', 'katlas/core.py'),
31
- 'katlas.core.Data.get_ochoa_site': ('core.html#data.get_ochoa_site', 'katlas/core.py'),
32
- 'katlas.core.Data.get_psp_human_site': ('core.html#data.get_psp_human_site', 'katlas/core.py'),
33
- 'katlas.core.Data.get_pspa_all_norm': ('core.html#data.get_pspa_all_norm', 'katlas/core.py'),
34
- 'katlas.core.Data.get_pspa_st_norm': ('core.html#data.get_pspa_st_norm', 'katlas/core.py'),
35
- 'katlas.core.Data.get_pspa_st_pct': ('core.html#data.get_pspa_st_pct', 'katlas/core.py'),
36
- 'katlas.core.Data.get_pspa_tyr_norm': ('core.html#data.get_pspa_tyr_norm', 'katlas/core.py'),
37
- 'katlas.core.Data.get_pspa_tyr_pct': ('core.html#data.get_pspa_tyr_pct', 'katlas/core.py'),
38
- 'katlas.core.STY2sty': ('core.html#sty2sty', 'katlas/core.py'),
39
- 'katlas.core.checker': ('core.html#checker', 'katlas/core.py'),
40
- 'katlas.core.convert_string': ('core.html#convert_string', 'katlas/core.py'),
41
- 'katlas.core.cut_seq': ('core.html#cut_seq', 'katlas/core.py'),
42
- 'katlas.core.extract_site_seq': ('core.html#extract_site_seq', 'katlas/core.py'),
43
- 'katlas.core.get_dict': ('core.html#get_dict', 'katlas/core.py'),
44
- 'katlas.core.get_freq': ('core.html#get_freq', 'katlas/core.py'),
45
- 'katlas.core.get_metaP': ('core.html#get_metap', 'katlas/core.py'),
46
- 'katlas.core.get_one_kinase': ('core.html#get_one_kinase', 'katlas/core.py'),
47
- 'katlas.core.get_pct': ('core.html#get_pct', 'katlas/core.py'),
48
- 'katlas.core.get_pct_df': ('core.html#get_pct_df', 'katlas/core.py'),
49
- 'katlas.core.get_pvalue': ('core.html#get_pvalue', 'katlas/core.py'),
50
- 'katlas.core.get_unique_site': ('core.html#get_unique_site', 'katlas/core.py'),
51
- 'katlas.core.multiply': ('core.html#multiply', 'katlas/core.py'),
52
- 'katlas.core.multiply_func': ('core.html#multiply_func', 'katlas/core.py'),
53
- 'katlas.core.predict_kinase': ('core.html#predict_kinase', 'katlas/core.py'),
54
- 'katlas.core.predict_kinase_df': ('core.html#predict_kinase_df', 'katlas/core.py'),
55
- 'katlas.core.raw2norm': ('core.html#raw2norm', 'katlas/core.py'),
56
- 'katlas.core.sumup': ('core.html#sumup', 'katlas/core.py')},
57
- 'katlas.dl': { 'katlas.dl.CNN1D_1': ('dl.html#cnn1d_1', 'katlas/dl.py'),
58
- 'katlas.dl.CNN1D_1.__init__': ('dl.html#cnn1d_1.__init__', 'katlas/dl.py'),
59
- 'katlas.dl.CNN1D_1.forward': ('dl.html#cnn1d_1.forward', 'katlas/dl.py'),
60
- 'katlas.dl.CNN1D_2': ('dl.html#cnn1d_2', 'katlas/dl.py'),
61
- 'katlas.dl.CNN1D_2.__init__': ('dl.html#cnn1d_2.__init__', 'katlas/dl.py'),
62
- 'katlas.dl.CNN1D_2.forward': ('dl.html#cnn1d_2.forward', 'katlas/dl.py'),
63
- 'katlas.dl.GeneralDataset': ('dl.html#generaldataset', 'katlas/dl.py'),
64
- 'katlas.dl.GeneralDataset.__getitem__': ('dl.html#generaldataset.__getitem__', 'katlas/dl.py'),
65
- 'katlas.dl.GeneralDataset.__init__': ('dl.html#generaldataset.__init__', 'katlas/dl.py'),
66
- 'katlas.dl.GeneralDataset.__len__': ('dl.html#generaldataset.__len__', 'katlas/dl.py'),
67
- 'katlas.dl.MLP_1': ('dl.html#mlp_1', 'katlas/dl.py'),
68
- 'katlas.dl.conv_wn': ('dl.html#conv_wn', 'katlas/dl.py'),
69
- 'katlas.dl.get_sampler': ('dl.html#get_sampler', 'katlas/dl.py'),
70
- 'katlas.dl.init_weights': ('dl.html#init_weights', 'katlas/dl.py'),
71
- 'katlas.dl.lin_wn': ('dl.html#lin_wn', 'katlas/dl.py'),
72
- 'katlas.dl.predict_dl': ('dl.html#predict_dl', 'katlas/dl.py'),
73
- 'katlas.dl.seed_everything': ('dl.html#seed_everything', 'katlas/dl.py'),
74
- 'katlas.dl.train_dl': ('dl.html#train_dl', 'katlas/dl.py'),
75
- 'katlas.dl.train_dl_cv': ('dl.html#train_dl_cv', 'katlas/dl.py')},
76
- 'katlas.feature': { 'katlas.feature.get_esm': ('feature.html#get_esm', 'katlas/feature.py'),
8
+ 'syms': { 'katlas.clustering': { 'katlas.clustering.get_1d_distance': ('hierarchical.html#get_1d_distance', 'katlas/clustering.py'),
9
+ 'katlas.clustering.get_1d_distance_parallel': ( 'hierarchical.html#get_1d_distance_parallel',
10
+ 'katlas/clustering.py'),
11
+ 'katlas.clustering.get_1d_js': ('hierarchical.html#get_1d_js', 'katlas/clustering.py'),
12
+ 'katlas.clustering.get_1d_js_parallel': ('hierarchical.html#get_1d_js_parallel', 'katlas/clustering.py'),
13
+ 'katlas.clustering.get_Z': ('hierarchical.html#get_z', 'katlas/clustering.py'),
14
+ 'katlas.clustering.get_distance': ('hierarchical.html#get_distance', 'katlas/clustering.py'),
15
+ 'katlas.clustering.get_pssm_seq_labels': ( 'hierarchical.html#get_pssm_seq_labels',
16
+ 'katlas/clustering.py'),
17
+ 'katlas.clustering.plot_dendrogram': ('hierarchical.html#plot_dendrogram', 'katlas/clustering.py'),
18
+ 'katlas.clustering.pssm_to_seq': ('hierarchical.html#pssm_to_seq', 'katlas/clustering.py')},
19
+ 'katlas.common': {},
20
+ 'katlas.core': {},
21
+ 'katlas.data': { 'katlas.data.CPTAC': ('data.html#cptac', 'katlas/data.py'),
22
+ 'katlas.data.CPTAC._read_file': ('data.html#cptac._read_file', 'katlas/data.py'),
23
+ 'katlas.data.CPTAC.get_id': ('data.html#cptac.get_id', 'katlas/data.py'),
24
+ 'katlas.data.CPTAC.list_cancer': ('data.html#cptac.list_cancer', 'katlas/data.py'),
25
+ 'katlas.data.Data': ('data.html#data', 'katlas/data.py'),
26
+ 'katlas.data.Data.download': ('data.html#data.download', 'katlas/data.py'),
27
+ 'katlas.data.Data.get_aa_info': ('data.html#data.get_aa_info', 'katlas/data.py'),
28
+ 'katlas.data.Data.get_aa_morgan': ('data.html#data.get_aa_morgan', 'katlas/data.py'),
29
+ 'katlas.data.Data.get_aa_rdkit': ('data.html#data.get_aa_rdkit', 'katlas/data.py'),
30
+ 'katlas.data.Data.get_cddm': ('data.html#data.get_cddm', 'katlas/data.py'),
31
+ 'katlas.data.Data.get_cddm_LO': ('data.html#data.get_cddm_lo', 'katlas/data.py'),
32
+ 'katlas.data.Data.get_cddm_LO_upper': ('data.html#data.get_cddm_lo_upper', 'katlas/data.py'),
33
+ 'katlas.data.Data.get_cddm_upper': ('data.html#data.get_cddm_upper', 'katlas/data.py'),
34
+ 'katlas.data.Data.get_combine_site_phosphorylated': ( 'data.html#data.get_combine_site_phosphorylated',
35
+ 'katlas/data.py'),
36
+ 'katlas.data.Data.get_combine_site_psp_ochoa': ('data.html#data.get_combine_site_psp_ochoa', 'katlas/data.py'),
37
+ 'katlas.data.Data.get_cptac_ensembl_site': ('data.html#data.get_cptac_ensembl_site', 'katlas/data.py'),
38
+ 'katlas.data.Data.get_cptac_gene_site': ('data.html#data.get_cptac_gene_site', 'katlas/data.py'),
39
+ 'katlas.data.Data.get_cptac_unique_site': ('data.html#data.get_cptac_unique_site', 'katlas/data.py'),
40
+ 'katlas.data.Data.get_human_site': ('data.html#data.get_human_site', 'katlas/data.py'),
41
+ 'katlas.data.Data.get_kd_uniprot': ('data.html#data.get_kd_uniprot', 'katlas/data.py'),
42
+ 'katlas.data.Data.get_kinase_info': ('data.html#data.get_kinase_info', 'katlas/data.py'),
43
+ 'katlas.data.Data.get_kinase_uniprot': ('data.html#data.get_kinase_uniprot', 'katlas/data.py'),
44
+ 'katlas.data.Data.get_ks_background': ('data.html#data.get_ks_background', 'katlas/data.py'),
45
+ 'katlas.data.Data.get_ks_dataset': ('data.html#data.get_ks_dataset', 'katlas/data.py'),
46
+ 'katlas.data.Data.get_ks_unique': ('data.html#data.get_ks_unique', 'katlas/data.py'),
47
+ 'katlas.data.Data.get_num_dict': ('data.html#data.get_num_dict', 'katlas/data.py'),
48
+ 'katlas.data.Data.get_ochoa_site': ('data.html#data.get_ochoa_site', 'katlas/data.py'),
49
+ 'katlas.data.Data.get_psp_human_site': ('data.html#data.get_psp_human_site', 'katlas/data.py'),
50
+ 'katlas.data.Data.get_pspa': ('data.html#data.get_pspa', 'katlas/data.py'),
51
+ 'katlas.data.Data.get_pspa_scale': ('data.html#data.get_pspa_scale', 'katlas/data.py'),
52
+ 'katlas.data.Data.get_pspa_st': ('data.html#data.get_pspa_st', 'katlas/data.py'),
53
+ 'katlas.data.Data.get_pspa_st_pct': ('data.html#data.get_pspa_st_pct', 'katlas/data.py'),
54
+ 'katlas.data.Data.get_pspa_tyr': ('data.html#data.get_pspa_tyr', 'katlas/data.py'),
55
+ 'katlas.data.Data.get_pspa_tyr_pct': ('data.html#data.get_pspa_tyr_pct', 'katlas/data.py'),
56
+ 'katlas.data.Data.get_reactome_pathway': ('data.html#data.get_reactome_pathway', 'katlas/data.py'),
57
+ 'katlas.data.Data.get_reactome_pathway_lo': ('data.html#data.get_reactome_pathway_lo', 'katlas/data.py'),
58
+ 'katlas.data.Data.read_file': ('data.html#data.read_file', 'katlas/data.py')},
59
+ 'katlas.dnn': { 'katlas.dnn.CE': ('dnn.html#ce', 'katlas/dnn.py'),
60
+ 'katlas.dnn.CNN1D': ('dnn.html#cnn1d', 'katlas/dnn.py'),
61
+ 'katlas.dnn.CNN1D.__init__': ('dnn.html#cnn1d.__init__', 'katlas/dnn.py'),
62
+ 'katlas.dnn.CNN1D.forward': ('dnn.html#cnn1d.forward', 'katlas/dnn.py'),
63
+ 'katlas.dnn.GeneralDataset': ('dnn.html#generaldataset', 'katlas/dnn.py'),
64
+ 'katlas.dnn.GeneralDataset.__getitem__': ('dnn.html#generaldataset.__getitem__', 'katlas/dnn.py'),
65
+ 'katlas.dnn.GeneralDataset.__init__': ('dnn.html#generaldataset.__init__', 'katlas/dnn.py'),
66
+ 'katlas.dnn.GeneralDataset.__len__': ('dnn.html#generaldataset.__len__', 'katlas/dnn.py'),
67
+ 'katlas.dnn.JSD': ('dnn.html#jsd', 'katlas/dnn.py'),
68
+ 'katlas.dnn.KLD': ('dnn.html#kld', 'katlas/dnn.py'),
69
+ 'katlas.dnn.MLP': ('dnn.html#mlp', 'katlas/dnn.py'),
70
+ 'katlas.dnn.PSSM_model': ('dnn.html#pssm_model', 'katlas/dnn.py'),
71
+ 'katlas.dnn.PSSM_model.__init__': ('dnn.html#pssm_model.__init__', 'katlas/dnn.py'),
72
+ 'katlas.dnn.PSSM_model.forward': ('dnn.html#pssm_model.forward', 'katlas/dnn.py'),
73
+ 'katlas.dnn.conv_wn': ('dnn.html#conv_wn', 'katlas/dnn.py'),
74
+ 'katlas.dnn.init_weights': ('dnn.html#init_weights', 'katlas/dnn.py'),
75
+ 'katlas.dnn.lin_wn': ('dnn.html#lin_wn', 'katlas/dnn.py'),
76
+ 'katlas.dnn.predict_dl': ('dnn.html#predict_dl', 'katlas/dnn.py'),
77
+ 'katlas.dnn.seed_everything': ('dnn.html#seed_everything', 'katlas/dnn.py'),
78
+ 'katlas.dnn.train_dl': ('dnn.html#train_dl', 'katlas/dnn.py'),
79
+ 'katlas.dnn.train_dl_cv': ('dnn.html#train_dl_cv', 'katlas/dnn.py')},
80
+ 'katlas.feature': { 'katlas.feature.filter_range_columns': ('feature.html#filter_range_columns', 'katlas/feature.py'),
81
+ 'katlas.feature.get_clusters_elbow': ('feature.html#get_clusters_elbow', 'katlas/feature.py'),
82
+ 'katlas.feature.get_esm': ('feature.html#get_esm', 'katlas/feature.py'),
77
83
  'katlas.feature.get_morgan': ('feature.html#get_morgan', 'katlas/feature.py'),
78
84
  'katlas.feature.get_rdkit': ('feature.html#get_rdkit', 'katlas/feature.py'),
85
+ 'katlas.feature.get_rdkit_3d': ('feature.html#get_rdkit_3d', 'katlas/feature.py'),
86
+ 'katlas.feature.get_rdkit_all': ('feature.html#get_rdkit_all', 'katlas/feature.py'),
87
+ 'katlas.feature.get_rdkit_df': ('feature.html#get_rdkit_df', 'katlas/feature.py'),
79
88
  'katlas.feature.get_t5': ('feature.html#get_t5', 'katlas/feature.py'),
80
89
  'katlas.feature.get_t5_bfd': ('feature.html#get_t5_bfd', 'katlas/feature.py'),
90
+ 'katlas.feature.kmeans': ('feature.html#kmeans', 'katlas/feature.py'),
91
+ 'katlas.feature.onehot_encode': ('feature.html#onehot_encode', 'katlas/feature.py'),
92
+ 'katlas.feature.onehot_encode_df': ('feature.html#onehot_encode_df', 'katlas/feature.py'),
81
93
  'katlas.feature.preprocess': ('feature.html#preprocess', 'katlas/feature.py'),
82
- 'katlas.feature.reduce_feature': ('feature.html#reduce_feature', 'katlas/feature.py'),
83
- 'katlas.feature.remove_hi_corr': ('feature.html#remove_hi_corr', 'katlas/feature.py')},
84
- 'katlas.imports': {},
85
- 'katlas.plot': { 'katlas.plot.draw_corr': ('plot.html#draw_corr', 'katlas/plot.py'),
94
+ 'katlas.feature.remove_hi_corr': ('feature.html#remove_hi_corr', 'katlas/feature.py'),
95
+ 'katlas.feature.standardize': ('feature.html#standardize', 'katlas/feature.py')},
96
+ 'katlas.pathway': { 'katlas.pathway.add_reactome_ref': ('pathway.html#add_reactome_ref', 'katlas/pathway.py'),
97
+ 'katlas.pathway.get_overlap': ('pathway.html#get_overlap', 'katlas/pathway.py'),
98
+ 'katlas.pathway.get_reactome': ('pathway.html#get_reactome', 'katlas/pathway.py'),
99
+ 'katlas.pathway.get_reactome_raw': ('pathway.html#get_reactome_raw', 'katlas/pathway.py'),
100
+ 'katlas.pathway.plot_path': ('pathway.html#plot_path', 'katlas/pathway.py'),
101
+ 'katlas.pathway.query_reactome': ('pathway.html#query_reactome', 'katlas/pathway.py')},
102
+ 'katlas.plot': { 'katlas.plot.add_stats': ('plot.html#add_stats', 'katlas/plot.py'),
103
+ 'katlas.plot.calculate_pct': ('plot.html#calculate_pct', 'katlas/plot.py'),
86
104
  'katlas.plot.get_AUCDF': ('plot.html#get_aucdf', 'katlas/plot.py'),
87
105
  'katlas.plot.get_color_dict': ('plot.html#get_color_dict', 'katlas/plot.py'),
88
- 'katlas.plot.get_logo': ('plot.html#get_logo', 'katlas/plot.py'),
89
- 'katlas.plot.get_logo2': ('plot.html#get_logo2', 'katlas/plot.py'),
90
- 'katlas.plot.logo_func': ('plot.html#logo_func', 'katlas/plot.py'),
106
+ 'katlas.plot.get_hue_big': ('plot.html#get_hue_big', 'katlas/plot.py'),
107
+ 'katlas.plot.get_plt_color': ('plot.html#get_plt_color', 'katlas/plot.py'),
108
+ 'katlas.plot.get_similarity': ('plot.html#get_similarity', 'katlas/plot.py'),
109
+ 'katlas.plot.get_subfamily_color': ('plot.html#get_subfamily_color', 'katlas/plot.py'),
91
110
  'katlas.plot.plot_2d': ('plot.html#plot_2d', 'katlas/plot.py'),
92
111
  'katlas.plot.plot_bar': ('plot.html#plot_bar', 'katlas/plot.py'),
93
112
  'katlas.plot.plot_bokeh': ('plot.html#plot_bokeh', 'katlas/plot.py'),
94
113
  'katlas.plot.plot_box': ('plot.html#plot_box', 'katlas/plot.py'),
95
114
  'katlas.plot.plot_cluster': ('plot.html#plot_cluster', 'katlas/plot.py'),
115
+ 'katlas.plot.plot_cnt': ('plot.html#plot_cnt', 'katlas/plot.py'),
116
+ 'katlas.plot.plot_composition': ('plot.html#plot_composition', 'katlas/plot.py'),
96
117
  'katlas.plot.plot_confusion_matrix': ('plot.html#plot_confusion_matrix', 'katlas/plot.py'),
97
118
  'katlas.plot.plot_corr': ('plot.html#plot_corr', 'katlas/plot.py'),
98
119
  'katlas.plot.plot_count': ('plot.html#plot_count', 'katlas/plot.py'),
99
120
  'katlas.plot.plot_group_bar': ('plot.html#plot_group_bar', 'katlas/plot.py'),
100
- 'katlas.plot.plot_heatmap': ('plot.html#plot_heatmap', 'katlas/plot.py'),
101
121
  'katlas.plot.plot_hist': ('plot.html#plot_hist', 'katlas/plot.py'),
122
+ 'katlas.plot.plot_pie': ('plot.html#plot_pie', 'katlas/plot.py'),
102
123
  'katlas.plot.plot_rank': ('plot.html#plot_rank', 'katlas/plot.py'),
124
+ 'katlas.plot.plot_rel': ('plot.html#plot_rel', 'katlas/plot.py'),
125
+ 'katlas.plot.plot_stacked': ('plot.html#plot_stacked', 'katlas/plot.py'),
126
+ 'katlas.plot.plot_violin': ('plot.html#plot_violin', 'katlas/plot.py'),
127
+ 'katlas.plot.reduce_feature': ('plot.html#reduce_feature', 'katlas/plot.py'),
128
+ 'katlas.plot.save_pdf': ('plot.html#save_pdf', 'katlas/plot.py'),
129
+ 'katlas.plot.save_show': ('plot.html#save_show', 'katlas/plot.py'),
130
+ 'katlas.plot.save_svg': ('plot.html#save_svg', 'katlas/plot.py'),
103
131
  'katlas.plot.set_sns': ('plot.html#set_sns', 'katlas/plot.py')},
104
- 'katlas.train': { 'katlas.train.get_splits': ('ml.html#get_splits', 'katlas/train.py'),
132
+ 'katlas.pssm': { 'katlas.pssm._clean_zero': ('pssm.html#_clean_zero', 'katlas/pssm.py'),
133
+ 'katlas.pssm.change_center_name': ('pssm.html#change_center_name', 'katlas/pssm.py'),
134
+ 'katlas.pssm.clean_zero_normalize': ('pssm.html#clean_zero_normalize', 'katlas/pssm.py'),
135
+ 'katlas.pssm.convert_logo_df': ('pssm.html#convert_logo_df', 'katlas/pssm.py'),
136
+ 'katlas.pssm.cosine_overall_flat': ('pssm.html#cosine_overall_flat', 'katlas/pssm.py'),
137
+ 'katlas.pssm.cosine_similarity': ('pssm.html#cosine_similarity', 'katlas/pssm.py'),
138
+ 'katlas.pssm.flatten_pssm': ('pssm.html#flatten_pssm', 'katlas/pssm.py'),
139
+ 'katlas.pssm.get_IC': ('pssm.html#get_ic', 'katlas/pssm.py'),
140
+ 'katlas.pssm.get_IC_flat': ('pssm.html#get_ic_flat', 'katlas/pssm.py'),
141
+ 'katlas.pssm.get_cluster_pssms': ('pssm.html#get_cluster_pssms', 'katlas/pssm.py'),
142
+ 'katlas.pssm.get_entropy': ('pssm.html#get_entropy', 'katlas/pssm.py'),
143
+ 'katlas.pssm.get_entropy_flat': ('pssm.html#get_entropy_flat', 'katlas/pssm.py'),
144
+ 'katlas.pssm.get_logo': ('pssm.html#get_logo', 'katlas/pssm.py'),
145
+ 'katlas.pssm.get_logo_IC': ('pssm.html#get_logo_ic', 'katlas/pssm.py'),
146
+ 'katlas.pssm.get_one_kinase': ('pssm.html#get_one_kinase', 'katlas/pssm.py'),
147
+ 'katlas.pssm.get_pos_min_max': ('pssm.html#get_pos_min_max', 'katlas/pssm.py'),
148
+ 'katlas.pssm.get_prob': ('pssm.html#get_prob', 'katlas/pssm.py'),
149
+ 'katlas.pssm.get_pssm_LO': ('pssm.html#get_pssm_lo', 'katlas/pssm.py'),
150
+ 'katlas.pssm.get_pssm_LO_flat': ('pssm.html#get_pssm_lo_flat', 'katlas/pssm.py'),
151
+ 'katlas.pssm.get_specificity': ('pssm.html#get_specificity', 'katlas/pssm.py'),
152
+ 'katlas.pssm.get_specificity_flat': ('pssm.html#get_specificity_flat', 'katlas/pssm.py'),
153
+ 'katlas.pssm.js_divergence': ('pssm.html#js_divergence', 'katlas/pssm.py'),
154
+ 'katlas.pssm.js_divergence_flat': ('pssm.html#js_divergence_flat', 'katlas/pssm.py'),
155
+ 'katlas.pssm.js_similarity': ('pssm.html#js_similarity', 'katlas/pssm.py'),
156
+ 'katlas.pssm.js_similarity_flat': ('pssm.html#js_similarity_flat', 'katlas/pssm.py'),
157
+ 'katlas.pssm.kl_divergence': ('pssm.html#kl_divergence', 'katlas/pssm.py'),
158
+ 'katlas.pssm.kl_divergence_flat': ('pssm.html#kl_divergence_flat', 'katlas/pssm.py'),
159
+ 'katlas.pssm.plot_heatmap': ('pssm.html#plot_heatmap', 'katlas/pssm.py'),
160
+ 'katlas.pssm.plot_heatmap_simple': ('pssm.html#plot_heatmap_simple', 'katlas/pssm.py'),
161
+ 'katlas.pssm.plot_logo': ('pssm.html#plot_logo', 'katlas/pssm.py'),
162
+ 'katlas.pssm.plot_logo_LO': ('pssm.html#plot_logo_lo', 'katlas/pssm.py'),
163
+ 'katlas.pssm.plot_logo_heatmap': ('pssm.html#plot_logo_heatmap', 'katlas/pssm.py'),
164
+ 'katlas.pssm.plot_logo_heatmap_LO': ('pssm.html#plot_logo_heatmap_lo', 'katlas/pssm.py'),
165
+ 'katlas.pssm.plot_logo_heatmap_pspa': ('pssm.html#plot_logo_heatmap_pspa', 'katlas/pssm.py'),
166
+ 'katlas.pssm.plot_logo_pspa': ('pssm.html#plot_logo_pspa', 'katlas/pssm.py'),
167
+ 'katlas.pssm.plot_logo_raw': ('pssm.html#plot_logo_raw', 'katlas/pssm.py'),
168
+ 'katlas.pssm.plot_logos': ('pssm.html#plot_logos', 'katlas/pssm.py'),
169
+ 'katlas.pssm.plot_logos_idx': ('pssm.html#plot_logos_idx', 'katlas/pssm.py'),
170
+ 'katlas.pssm.plot_two_heatmaps': ('pssm.html#plot_two_heatmaps', 'katlas/pssm.py'),
171
+ 'katlas.pssm.preprocess_pspa': ('pssm.html#preprocess_pspa', 'katlas/pssm.py'),
172
+ 'katlas.pssm.raw2norm': ('pssm.html#raw2norm', 'katlas/pssm.py'),
173
+ 'katlas.pssm.recover_pssm': ('pssm.html#recover_pssm', 'katlas/pssm.py'),
174
+ 'katlas.pssm.scale_pos_neg_values': ('pssm.html#scale_pos_neg_values', 'katlas/pssm.py'),
175
+ 'katlas.pssm.scale_zero_position': ('pssm.html#scale_zero_position', 'katlas/pssm.py'),
176
+ 'katlas.pssm.sty2pSTY_df': ('pssm.html#sty2psty_df', 'katlas/pssm.py')},
177
+ 'katlas.score': { 'katlas.score.Params': ('scoring.html#params', 'katlas/score.py'),
178
+ 'katlas.score.STY2sty': ('scoring.html#sty2sty', 'katlas/score.py'),
179
+ 'katlas.score.cut_seq': ('scoring.html#cut_seq', 'katlas/score.py'),
180
+ 'katlas.score.duplicate_ref_zero': ('scoring.html#duplicate_ref_zero', 'katlas/score.py'),
181
+ 'katlas.score.get_dict': ('scoring.html#get_dict', 'katlas/score.py'),
182
+ 'katlas.score.get_pct': ('scoring.html#get_pct', 'katlas/score.py'),
183
+ 'katlas.score.get_pct_df': ('scoring.html#get_pct_df', 'katlas/score.py'),
184
+ 'katlas.score.multiply': ('scoring.html#multiply', 'katlas/score.py'),
185
+ 'katlas.score.multiply_generic': ('scoring.html#multiply_generic', 'katlas/score.py'),
186
+ 'katlas.score.multiply_pspa': ('scoring.html#multiply_pspa', 'katlas/score.py'),
187
+ 'katlas.score.predict_kinase': ('scoring.html#predict_kinase', 'katlas/score.py'),
188
+ 'katlas.score.predict_kinase_df': ('scoring.html#predict_kinase_df', 'katlas/score.py'),
189
+ 'katlas.score.preprocess_ref': ('scoring.html#preprocess_ref', 'katlas/score.py'),
190
+ 'katlas.score.sumup': ('scoring.html#sumup', 'katlas/score.py')},
191
+ 'katlas.statistics': { 'katlas.statistics.get_metaP': ('statistics.html#get_metap', 'katlas/statistics.py'),
192
+ 'katlas.statistics.get_pvalue': ('statistics.html#get_pvalue', 'katlas/statistics.py')},
193
+ 'katlas.train': { 'katlas.train.calculate_ce': ('ml.html#calculate_ce', 'katlas/train.py'),
194
+ 'katlas.train.get_score': ('ml.html#get_score', 'katlas/train.py'),
195
+ 'katlas.train.get_splits': ('ml.html#get_splits', 'katlas/train.py'),
196
+ 'katlas.train.post_process': ('ml.html#post_process', 'katlas/train.py'),
197
+ 'katlas.train.post_process_oof': ('ml.html#post_process_oof', 'katlas/train.py'),
105
198
  'katlas.train.predict_ml': ('ml.html#predict_ml', 'katlas/train.py'),
106
- 'katlas.train.score_each': ('ml.html#score_each', 'katlas/train.py'),
107
199
  'katlas.train.split_data': ('ml.html#split_data', 'katlas/train.py'),
108
200
  'katlas.train.train_ml': ('ml.html#train_ml', 'katlas/train.py'),
109
- 'katlas.train.train_ml_cv': ('ml.html#train_ml_cv', 'katlas/train.py')}}}
201
+ 'katlas.train.train_ml_cv': ('ml.html#train_ml_cv', 'katlas/train.py')},
202
+ 'katlas.utils': { 'katlas.utils.aln2df': ('utils.html#aln2df', 'katlas/utils.py'),
203
+ 'katlas.utils.check_seq': ('utils.html#check_seq', 'katlas/utils.py'),
204
+ 'katlas.utils.check_seqs': ('utils.html#check_seqs', 'katlas/utils.py'),
205
+ 'katlas.utils.extract_site_seq': ('utils.html#extract_site_seq', 'katlas/utils.py'),
206
+ 'katlas.utils.get_aln_freq': ('utils.html#get_aln_freq', 'katlas/utils.py'),
207
+ 'katlas.utils.get_diff': ('utils.html#get_diff', 'katlas/utils.py'),
208
+ 'katlas.utils.get_fasta': ('utils.html#get_fasta', 'katlas/utils.py'),
209
+ 'katlas.utils.pSTY2sty': ('utils.html#psty2sty', 'katlas/utils.py'),
210
+ 'katlas.utils.phosphorylate_seq': ('utils.html#phosphorylate_seq', 'katlas/utils.py'),
211
+ 'katlas.utils.phosphorylate_seq_df': ('utils.html#phosphorylate_seq_df', 'katlas/utils.py'),
212
+ 'katlas.utils.prepare_path': ('utils.html#prepare_path', 'katlas/utils.py'),
213
+ 'katlas.utils.run_clustalo': ('utils.html#run_clustalo', 'katlas/utils.py'),
214
+ 'katlas.utils.sty2pSTY': ('utils.html#sty2psty', 'katlas/utils.py'),
215
+ 'katlas.utils.validate_site': ('utils.html#validate_site', 'katlas/utils.py'),
216
+ 'katlas.utils.validate_site_df': ('utils.html#validate_site_df', 'katlas/utils.py')}}}
katlas/clustering.py ADDED
@@ -0,0 +1,142 @@
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/03_hierarchical.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['get_1d_distance', 'get_1d_js', 'get_distance', 'get_1d_distance_parallel', 'get_1d_js_parallel', 'get_Z',
5
+ 'plot_dendrogram', 'pssm_to_seq', 'get_pssm_seq_labels']
6
+
7
+ # %% ../nbs/03_hierarchical.ipynb 3
8
+ import pandas as pd,numpy as np
9
+ from .data import *
10
+ from .pssm import *
11
+ from .plot import set_sns
12
+ from tqdm import tqdm
13
+ from functools import partial
14
+ from tqdm.contrib.concurrent import process_map
15
+ from fastcore.meta import delegates
16
+ from scipy.cluster.hierarchy import linkage, fcluster,dendrogram
17
+ from matplotlib import pyplot as plt
18
+
19
+ # %% ../nbs/03_hierarchical.ipynb 6
20
+ def get_1d_distance(df,func_flat):
21
+ "Compute 1D distance for each row in a dataframe given a distance function "
22
+ n = len(df)
23
+ dist = []
24
+ for i in tqdm(range(n)):
25
+ for j in range(i+1, n):
26
+ d = func_flat(df.iloc[i], df.iloc[j])
27
+ dist.append(d)
28
+ return np.array(dist)
29
+
30
+ # %% ../nbs/03_hierarchical.ipynb 8
31
+ def get_1d_js(df):
32
+ "Compute 1D distance using JS divergence."
33
+ return get_1d_distance(df,js_divergence_flat)
34
+
35
+ # %% ../nbs/03_hierarchical.ipynb 11
36
+ def get_distance(pair, df, func):
37
+ i, j = pair
38
+ return func(df.iloc[i], df.iloc[j])
39
+
40
+ # %% ../nbs/03_hierarchical.ipynb 12
41
+ def get_1d_distance_parallel(df, func_flat, max_workers=4, chunksize=100):
42
+ "Parallel compute 1D distance for each row in a dataframe given a distance function "
43
+ n = len(df)
44
+ index_pairs = [(i, j) for i in range(n) for j in range(i + 1, n)]
45
+
46
+ bound_worker = partial(get_distance, df=df, func=func_flat)
47
+
48
+ dist = process_map(bound_worker, index_pairs, max_workers=max_workers, chunksize=chunksize)
49
+ return np.array(dist)
50
+
51
+ # %% ../nbs/03_hierarchical.ipynb 14
52
+ @delegates(get_1d_distance_parallel)
53
+ def get_1d_js_parallel(df, func_flat=js_divergence_flat, **kwargs):
54
+ "Compute 1D distance matrix using JS divergence."
55
+ return get_1d_distance_parallel(df, func_flat=func_flat, **kwargs)
56
+
57
+ # %% ../nbs/03_hierarchical.ipynb 16
58
+ def get_Z(pssms,func_flat=js_divergence_flat,parallel=True):
59
+ "Get linkage matrix Z from pssms dataframe"
60
+ distance = get_1d_distance_parallel(pssms,func_flat=func_flat) if parallel else get_1d_distance(pssms,func_flat=func_flat)
61
+ Z = linkage(distance, method='ward')
62
+ return Z
63
+
64
+ # %% ../nbs/03_hierarchical.ipynb 19
65
+ def plot_dendrogram(Z,
66
+ color_thr=0.07,
67
+ dense=7, # the higher the more dense for each row
68
+ line_width=1,
69
+ title=None,
70
+ scale=1,
71
+ **kwargs):
72
+ length = (len(Z) + 1) // dense
73
+
74
+ plt.figure(figsize=(5*scale,length*scale))
75
+ with plt.rc_context({'lines.linewidth': line_width}):
76
+ dendrogram(
77
+ Z,
78
+ orientation='left',
79
+ leaf_font_size=7,
80
+ color_threshold=color_thr,
81
+ **kwargs
82
+ )
83
+ if title is not None: plt.title(title)
84
+ plt.xlabel('Distance')
85
+ # plt.savefig(output, bbox_inches='tight')
86
+ # plt.close()
87
+ ax = plt.gca()
88
+ for spine in ['top', 'right', 'left', 'bottom']:
89
+ ax.spines[spine].set_visible(False)
90
+
91
+ # %% ../nbs/03_hierarchical.ipynb 23
92
+ def pssm_to_seq(pssm_df,
93
+ thr=0.2, # threshold of probability to show in sequence
94
+ clean_center=True, # if true, zero out non-last three values in position 0 (keep only s,t,y values at center)
95
+ ):
96
+ "Represent PSSM in string sequence of amino acids"
97
+
98
+ pssm_df = pssm_df.copy()
99
+ if clean_center:
100
+ pssm_df.loc[pssm_df.index[:-3], 0] = 0 # keep only s,t,y in center 0 position
101
+
102
+ pssm_df.index = pssm_df.index.map(lambda x: x.replace('pS', 's').replace('pT', 't').replace('pY', 'y'))
103
+
104
+ consensus = []
105
+ for i, col in enumerate(pssm_df.columns):
106
+ # consider the case where sum for the position is 0
107
+ column_vals = pssm_df[col]
108
+ if column_vals.sum() == 0:
109
+ symbol = '_'
110
+ else:
111
+ top = column_vals.nlargest(3)
112
+ passing = [aa for aa, prob in zip(top.index, top.values) if prob > thr]
113
+
114
+ if not passing:
115
+ symbol = '.'
116
+ elif len(passing) == 1:
117
+ symbol = passing[0]
118
+ else:
119
+ symbol = f"[{'/'.join(passing)}]"
120
+
121
+ if col == 0: # center position
122
+ if symbol.startswith('['):
123
+ symbol = symbol[:-1] + ']*'
124
+ else:
125
+ symbol += '*'
126
+
127
+ consensus.append(symbol)
128
+
129
+ return ''.join(consensus)
130
+
131
+ # %% ../nbs/03_hierarchical.ipynb 26
132
+ def get_pssm_seq_labels(pssms,
133
+ count_map=None, # df index as key, counts as value
134
+ thr=0.3, # threshold of probability to show in sequence
135
+ ):
136
+ "Use index of pssms and the pssm to seq to represent pssm."
137
+ if count_map is not None:
138
+ labels=[str(i)+f' (n={count_map[i]:,})' + ': '+pssm_to_seq(recover_pssm(r),thr=thr) for i,r in pssms.iterrows()]
139
+ else:
140
+ labels=[str(i)+ ': '+pssm_to_seq(recover_pssm(r),thr) for i,r in pssms.iterrows()]
141
+
142
+ return labels
katlas/common.py ADDED
@@ -0,0 +1,4 @@
1
+ from .data import *
2
+ from .pssm import *
3
+ from .score import *
4
+ from .utils import *