cpgtools 2.0.0__py3-none-any.whl → 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cpgtools might be problematic. Click here for more details.
- cpgmodule/_version.py +1 -0
- cpgmodule/utils.py +35 -0
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_aggregation.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_anno_position.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_anno_probe.py +1 -2
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_density_gene_centered.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_distrb_chrom.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_distrb_gene_centered.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_distrb_region.py +1 -3
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_logo.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_to_gene.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_PCA.py +31 -23
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_UMAP.py +29 -22
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_jitter_plot.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_m_conversion.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_profile_gene_centered.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_profile_region.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_selectNBest.py +9 -6
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_stacked_barplot.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_stats.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_tSNE.py +31 -24
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_topN.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_trichotmize.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/dmc_Bayes.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/dmc_bb.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/dmc_fisher.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/dmc_glm.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/dmc_logit.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/dmc_nonparametric.py +1 -1
- {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/dmc_ttest.py +6 -2
- cpgtools-2.0.3.data/scripts/predict_missing.py +673 -0
- cpgtools-2.0.3.data/scripts/predict_sex.py +126 -0
- cpgtools-2.0.3.dist-info/LICENSE +19 -0
- cpgtools-2.0.3.dist-info/METADATA +76 -0
- cpgtools-2.0.3.dist-info/RECORD +101 -0
- {cpgtools-2.0.0.dist-info → cpgtools-2.0.3.dist-info}/WHEEL +1 -1
- cpgtools-2.0.3.dist-info/top_level.txt +3 -0
- impyute/__init__.py +3 -0
- impyute/contrib/__init__.py +7 -0
- impyute/contrib/compare.py +69 -0
- impyute/contrib/count_missing.py +30 -0
- impyute/contrib/describe.py +63 -0
- impyute/cs/__init__.py +11 -0
- impyute/cs/buck_iterative.py +82 -0
- impyute/cs/central_tendency.py +84 -0
- impyute/cs/em.py +52 -0
- impyute/cs/fast_knn.py +130 -0
- impyute/cs/random.py +27 -0
- impyute/dataset/__init__.py +6 -0
- impyute/dataset/base.py +137 -0
- impyute/dataset/corrupt.py +55 -0
- impyute/deletion/__init__.py +5 -0
- impyute/deletion/complete_case.py +21 -0
- impyute/ops/__init__.py +12 -0
- impyute/ops/error.py +9 -0
- impyute/ops/inverse_distance_weighting.py +31 -0
- impyute/ops/matrix.py +47 -0
- impyute/ops/testing.py +20 -0
- impyute/ops/util.py +96 -0
- impyute/ops/wrapper.py +179 -0
- impyute/ts/__init__.py +6 -0
- impyute/ts/locf.py +57 -0
- impyute/ts/moving_window.py +128 -0
- missingpy/__init__.py +4 -0
- missingpy/knnimpute.py +328 -0
- missingpy/missforest.py +556 -0
- missingpy/pairwise_external.py +315 -0
- missingpy/tests/__init__.py +0 -0
- missingpy/tests/test_knnimpute.py +605 -0
- missingpy/tests/test_missforest.py +409 -0
- missingpy/utils.py +124 -0
- cpgtools-2.0.0.dist-info/LICENSE.txt +0 -674
- cpgtools-2.0.0.dist-info/METADATA +0 -28
- cpgtools-2.0.0.dist-info/RECORD +0 -64
- cpgtools-2.0.0.dist-info/top_level.txt +0 -2
|
@@ -33,6 +33,7 @@ import sys
|
|
|
33
33
|
import subprocess
|
|
34
34
|
from optparse import OptionParser
|
|
35
35
|
from cpgmodule.utils import *
|
|
36
|
+
from cpgmodule._version import __version__
|
|
36
37
|
import pandas as pd
|
|
37
38
|
from sklearn.preprocessing import StandardScaler
|
|
38
39
|
from sklearn.manifold import TSNE
|
|
@@ -41,15 +42,15 @@ __author__ = "Liguo Wang"
|
|
|
41
42
|
__copyright__ = "Copyleft"
|
|
42
43
|
__credits__ = []
|
|
43
44
|
__license__ = "GPL"
|
|
44
|
-
__version__="2.0.0"
|
|
45
45
|
__maintainer__ = "Liguo Wang"
|
|
46
46
|
__email__ = "wang.liguo@mayo.edu"
|
|
47
47
|
__status__ = "Development"
|
|
48
48
|
|
|
49
49
|
def pick_colors(n):
|
|
50
|
-
my_colors = [
|
|
50
|
+
my_colors = [
|
|
51
|
+
"#F0A3FF", "#0075DC", "#993F00", "#4C005C", "#191919", "#005C31", "#2BCE48", "#FFCC99", "#808080", "#94FFB5", "#8F7C00", "#9DCC00", "#C20088", "#003380", "#FFA405", "#FFA8BB", "#426600", "#FF0010", "#5EF1F2", "#00998F", "#E0FF66", "#740AFF", "#990000", "#FFFF80", "#FFE100", "#FF5005"]
|
|
51
52
|
if n > len(my_colors):
|
|
52
|
-
print ("Only support
|
|
53
|
+
print ("Only support 26 different colors", file = sys.stderr)
|
|
53
54
|
sys.exit()
|
|
54
55
|
return my_colors[0:n]
|
|
55
56
|
|
|
@@ -99,27 +100,31 @@ def main():
|
|
|
99
100
|
printlog("Perplexigty value is set to %d" % options.perplexity_value)
|
|
100
101
|
|
|
101
102
|
#remove NA and transpose
|
|
102
|
-
df2 = df1.dropna(axis=0, how='any')
|
|
103
|
-
printlog("%d rows with missing values were removed." % (len(df1) - len(df2)))
|
|
103
|
+
df2 = df1.dropna(axis=0, how='any').T
|
|
104
|
+
printlog("%d rows with missing values were removed." % (len(df1.index) - len(df2.columns)))
|
|
104
105
|
#print (df2.head())
|
|
105
|
-
|
|
106
|
-
printlog("Transposing data frame ...")
|
|
107
|
-
df2 = df2.T
|
|
108
|
-
#print (df2.index)
|
|
109
|
-
|
|
110
|
-
printlog("Standarizing values ...")
|
|
111
|
-
x = df2.values
|
|
112
|
-
x = StandardScaler().fit_transform(x)
|
|
113
|
-
#print (x.shape)
|
|
114
|
-
|
|
106
|
+
|
|
115
107
|
printlog("Reading group file: \"%s\" ..." % (options.group_file))
|
|
116
108
|
group = pd.read_csv(options.group_file, index_col=0, header=0,names=['Sample_ID', 'Group_ID'])
|
|
117
|
-
group.index = group.index.map(str)
|
|
118
|
-
|
|
119
109
|
#check if sample IDs are unique
|
|
120
110
|
if len(group.index) != len(group.index.unique()):
|
|
121
111
|
print ("Sample IDs are not unique", file = sys.stderr)
|
|
122
|
-
sys.exit()
|
|
112
|
+
sys.exit()
|
|
113
|
+
group.index = group.index.map(str)
|
|
114
|
+
printlog("Group file \"%s\" contains %d samples" % (options.group_file, len(group.index)))
|
|
115
|
+
|
|
116
|
+
printlog("Find common sample IDs between group file and data file ...")
|
|
117
|
+
common_samples = list(set(group.index) & set(df2.index))
|
|
118
|
+
used_df = df2.loc[common_samples]
|
|
119
|
+
(usable_sample, usable_cpg) = used_df.shape
|
|
120
|
+
printlog("Used CpGs: %d, Used samples: %d" % (usable_cpg, usable_sample))
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
printlog("Standarizing values ...")
|
|
124
|
+
x = used_df.to_numpy()
|
|
125
|
+
x = StandardScaler().fit_transform(x)
|
|
126
|
+
|
|
127
|
+
|
|
123
128
|
group_names = group['Group_ID'].unique().tolist() # a list of unique group names
|
|
124
129
|
color_names = pick_colors(len(group_names)) # a list of unique colors
|
|
125
130
|
group_to_col = dict(zip(group_names, color_names))
|
|
@@ -127,13 +132,13 @@ def main():
|
|
|
127
132
|
group['Colors'] = color_list
|
|
128
133
|
|
|
129
134
|
|
|
130
|
-
tsne = TSNE(n_components = options.n_components, random_state = 0, perplexity = options.perplexity_value, learning_rate = options.learning_rate,
|
|
135
|
+
tsne = TSNE(n_components = options.n_components, random_state = 0, perplexity = options.perplexity_value, learning_rate = options.learning_rate, max_iter = options.n_iterations)
|
|
131
136
|
tsne_components = tsne.fit_transform(x)
|
|
132
137
|
pc_names = [str(i)+str(j) for i,j in zip(['PC']*options.n_components,range(1,options.n_components+1))]
|
|
133
|
-
principalDf = pd.DataFrame(data = tsne_components, columns = pc_names, index =
|
|
138
|
+
principalDf = pd.DataFrame(data = tsne_components, columns = pc_names, index = used_df.index)
|
|
134
139
|
principalDf.index.name = 'Sample_ID'
|
|
135
140
|
|
|
136
|
-
finalDf = pd.concat([principalDf, group], axis=1,sort=False)
|
|
141
|
+
finalDf = pd.concat([principalDf, group], axis=1,sort=False, join='inner')
|
|
137
142
|
finalDf.index.name = 'Sample_ID'
|
|
138
143
|
|
|
139
144
|
printlog("Writing t-SNE results to file: \"%s\" ..." % (options.out_file + '.t-SNE.tsv'))
|
|
@@ -149,10 +154,12 @@ def main():
|
|
|
149
154
|
|
|
150
155
|
if options.plot_alpha:
|
|
151
156
|
print ('library(scales)', file=ROUT)
|
|
152
|
-
print ('plot(PC1, PC2, col = alpha(Colors, %f), pch=%d, cex=1.5, main="
|
|
157
|
+
print ('plot(PC1, PC2, col = alpha(Colors, %f), pch=%d, cex=1.5, main="tSNE 2D map", xlab="tSNE1", ylab="tSNE2")'
|
|
158
|
+
% (options.plot_alpha, pch[options.plot_char]), file=ROUT)
|
|
153
159
|
else:
|
|
154
|
-
print ('plot(PC1, PC2, col = Colors, pch=%d, cex=1.2, main="
|
|
155
|
-
|
|
160
|
+
print ('plot(PC1, PC2, col = Colors, pch=%d, cex=1.2, main="tSNE 2D map", xlab="tSNE1", ylab="tSNE2")'
|
|
161
|
+
% (pch[options.plot_char]), file=ROUT)
|
|
162
|
+
|
|
156
163
|
if options.text_label:
|
|
157
164
|
print ('text(PC1, PC2, labels=Sample_ID, col = Colors, cex=0.5, pos=1)', file=ROUT)
|
|
158
165
|
print ('legend("%s", legend=c(%s), col=c(%s), pch=%d,cex=1)' % (legend_pos[options.legend_location], ','.join(['"' + str(i) + '"' for i in group_names]), ','.join(['"' + str(group_to_col[i]) + '"' for i in group_names]), pch[options.plot_char]), file=ROUT)
|
|
@@ -18,6 +18,7 @@ import collections
|
|
|
18
18
|
import subprocess
|
|
19
19
|
import numpy as np
|
|
20
20
|
from optparse import OptionParser
|
|
21
|
+
from cpgmodule._version import __version__
|
|
21
22
|
from cpgmodule import ireader
|
|
22
23
|
from cpgmodule.utils import *
|
|
23
24
|
from cpgmodule import BED
|
|
@@ -27,7 +28,6 @@ __author__ = "Liguo Wang"
|
|
|
27
28
|
__copyright__ = "Copyleft"
|
|
28
29
|
__credits__ = []
|
|
29
30
|
__license__ = "GPL"
|
|
30
|
-
__version__="2.0.0"
|
|
31
31
|
__maintainer__ = "Liguo Wang"
|
|
32
32
|
__email__ = "wang.liguo@mayo.edu"
|
|
33
33
|
__status__ = "Development"
|
|
@@ -18,6 +18,7 @@ import numpy as np
|
|
|
18
18
|
from optparse import OptionParser
|
|
19
19
|
from sklearn import mixture
|
|
20
20
|
from time import strftime
|
|
21
|
+
from cpgmodule._version import __version__
|
|
21
22
|
from cpgmodule import ireader
|
|
22
23
|
from cpgmodule.utils import *
|
|
23
24
|
import pandas as pd
|
|
@@ -26,7 +27,6 @@ __author__ = "Liguo Wang"
|
|
|
26
27
|
__copyright__ = "Copyleft"
|
|
27
28
|
__credits__ = []
|
|
28
29
|
__license__ = "GPL"
|
|
29
|
-
__version__="2.0.0"
|
|
30
30
|
__maintainer__ = "Liguo Wang"
|
|
31
31
|
__email__ = "wang.liguo@mayo.edu"
|
|
32
32
|
__status__ = "Development"
|
|
@@ -27,13 +27,13 @@ from cpgmodule import ireader
|
|
|
27
27
|
from cpgmodule.utils import *
|
|
28
28
|
from cpgmodule import BED
|
|
29
29
|
from cpgmodule import padjust
|
|
30
|
+
from cpgmodule._version import __version__
|
|
30
31
|
from multiprocessing import Process, Manager, current_process
|
|
31
32
|
|
|
32
33
|
__author__ = "Liguo Wang"
|
|
33
34
|
__copyright__ = "Copyleft"
|
|
34
35
|
__credits__ = []
|
|
35
36
|
__license__ = "GPL"
|
|
36
|
-
__version__="2.0.0"
|
|
37
37
|
__maintainer__ = "Liguo Wang"
|
|
38
38
|
__email__ = "wang.liguo@mayo.edu"
|
|
39
39
|
__status__ = "Development"
|
|
@@ -36,12 +36,12 @@ from cpgmodule import ireader
|
|
|
36
36
|
from cpgmodule.utils import *
|
|
37
37
|
from cpgmodule import BED
|
|
38
38
|
from cpgmodule import padjust
|
|
39
|
+
from cpgmodule._version import __version__
|
|
39
40
|
|
|
40
41
|
__author__ = "Liguo Wang"
|
|
41
42
|
__copyright__ = "Copyleft"
|
|
42
43
|
__credits__ = []
|
|
43
44
|
__license__ = "GPL"
|
|
44
|
-
__version__="2.0.0"
|
|
45
45
|
__maintainer__ = "Liguo Wang"
|
|
46
46
|
__email__ = "wang.liguo@mayo.edu"
|
|
47
47
|
__status__ = "Development"
|
|
@@ -34,12 +34,12 @@ from cpgmodule import ireader
|
|
|
34
34
|
from cpgmodule.utils import *
|
|
35
35
|
from cpgmodule import BED
|
|
36
36
|
from cpgmodule import padjust
|
|
37
|
+
from cpgmodule._version import __version__
|
|
37
38
|
|
|
38
39
|
__author__ = "Liguo Wang"
|
|
39
40
|
__copyright__ = "Copyleft"
|
|
40
41
|
__credits__ = []
|
|
41
42
|
__license__ = "GPL"
|
|
42
|
-
__version__="2.0.0"
|
|
43
43
|
__maintainer__ = "Liguo Wang"
|
|
44
44
|
__email__ = "wang.liguo@mayo.edu"
|
|
45
45
|
__status__ = "Development"
|
|
@@ -18,12 +18,12 @@ from cpgmodule import ireader
|
|
|
18
18
|
from cpgmodule.utils import *
|
|
19
19
|
from cpgmodule import BED
|
|
20
20
|
from cpgmodule import padjust
|
|
21
|
+
from cpgmodule._version import __version__
|
|
21
22
|
|
|
22
23
|
__author__ = "Liguo Wang"
|
|
23
24
|
__copyright__ = "Copyleft"
|
|
24
25
|
__credits__ = []
|
|
25
26
|
__license__ = "GPL"
|
|
26
|
-
__version__="2.0.0"
|
|
27
27
|
__maintainer__ = "Liguo Wang"
|
|
28
28
|
__email__ = "wang.liguo@mayo.edu"
|
|
29
29
|
__status__ = "Development"
|
|
@@ -31,12 +31,12 @@ from cpgmodule import ireader
|
|
|
31
31
|
from cpgmodule.utils import *
|
|
32
32
|
from cpgmodule import BED
|
|
33
33
|
from cpgmodule import padjust
|
|
34
|
+
from cpgmodule._version import __version__
|
|
34
35
|
|
|
35
36
|
__author__ = "Liguo Wang"
|
|
36
37
|
__copyright__ = "Copyleft"
|
|
37
38
|
__credits__ = []
|
|
38
39
|
__license__ = "GPL"
|
|
39
|
-
__version__="2.0.0"
|
|
40
40
|
__maintainer__ = "Liguo Wang"
|
|
41
41
|
__email__ = "wang.liguo@mayo.edu"
|
|
42
42
|
__status__ = "Development"
|
|
@@ -19,12 +19,12 @@ from cpgmodule import ireader
|
|
|
19
19
|
from cpgmodule.utils import *
|
|
20
20
|
from cpgmodule import BED
|
|
21
21
|
from cpgmodule import padjust
|
|
22
|
+
from cpgmodule._version import __version__
|
|
22
23
|
|
|
23
24
|
__author__ = "Liguo Wang"
|
|
24
25
|
__copyright__ = "Copyleft"
|
|
25
26
|
__credits__ = []
|
|
26
27
|
__license__ = "GPL"
|
|
27
|
-
__version__="2.0.0"
|
|
28
28
|
__maintainer__ = "Liguo Wang"
|
|
29
29
|
__email__ = "wang.liguo@mayo.edu"
|
|
30
30
|
__status__ = "Development"
|
|
@@ -18,12 +18,12 @@ from cpgmodule import ireader
|
|
|
18
18
|
from cpgmodule.utils import *
|
|
19
19
|
from cpgmodule import BED
|
|
20
20
|
from cpgmodule import padjust
|
|
21
|
+
from cpgmodule._version import __version__
|
|
21
22
|
|
|
22
23
|
__author__ = "Liguo Wang"
|
|
23
24
|
__copyright__ = "Copyleft"
|
|
24
25
|
__credits__ = []
|
|
25
26
|
__license__ = "GPL"
|
|
26
|
-
__version__="2.0.0"
|
|
27
27
|
__maintainer__ = "Liguo Wang"
|
|
28
28
|
__email__ = "wang.liguo@mayo.edu"
|
|
29
29
|
__status__ = "Development"
|
|
@@ -189,7 +189,7 @@ def main():
|
|
|
189
189
|
else:
|
|
190
190
|
continue
|
|
191
191
|
line_num += 1
|
|
192
|
-
|
|
192
|
+
|
|
193
193
|
printlog("Perfrom Benjamini-Hochberg (aka FDR) correction ...")
|
|
194
194
|
adjusted_p = {}
|
|
195
195
|
q_list = padjust.multiple_testing_correction(p_list)
|
|
@@ -204,6 +204,10 @@ def main():
|
|
|
204
204
|
else:
|
|
205
205
|
f = l.split()
|
|
206
206
|
probe_ID = f[0]
|
|
207
|
+
if probe_ID in delta_beta:
|
|
208
|
+
pass
|
|
209
|
+
else:
|
|
210
|
+
delta_beta[probe_ID] = 'n/a'
|
|
207
211
|
try:
|
|
208
212
|
print (l + '\t' + str(delta_beta[probe_ID]) + '\t' + adjusted_p[probe_ID], file=FOUT)
|
|
209
213
|
except:
|