cpgtools 2.0.0__py3-none-any.whl → 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cpgtools might be problematic. Click here for more details.

Files changed (75) hide show
  1. cpgmodule/_version.py +1 -0
  2. cpgmodule/utils.py +35 -0
  3. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_aggregation.py +1 -1
  4. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_anno_position.py +1 -1
  5. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_anno_probe.py +1 -2
  6. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_density_gene_centered.py +1 -1
  7. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_distrb_chrom.py +1 -1
  8. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_distrb_gene_centered.py +1 -1
  9. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_distrb_region.py +1 -3
  10. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_logo.py +1 -1
  11. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/CpG_to_gene.py +1 -1
  12. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_PCA.py +31 -23
  13. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_UMAP.py +29 -22
  14. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_jitter_plot.py +1 -1
  15. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_m_conversion.py +1 -1
  16. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_profile_gene_centered.py +1 -1
  17. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_profile_region.py +1 -1
  18. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_selectNBest.py +9 -6
  19. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_stacked_barplot.py +1 -1
  20. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_stats.py +1 -1
  21. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_tSNE.py +31 -24
  22. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_topN.py +1 -1
  23. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/beta_trichotmize.py +1 -1
  24. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/dmc_Bayes.py +1 -1
  25. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/dmc_bb.py +1 -1
  26. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/dmc_fisher.py +1 -1
  27. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/dmc_glm.py +1 -1
  28. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/dmc_logit.py +1 -1
  29. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/dmc_nonparametric.py +1 -1
  30. {cpgtools-2.0.0.data → cpgtools-2.0.3.data}/scripts/dmc_ttest.py +6 -2
  31. cpgtools-2.0.3.data/scripts/predict_missing.py +673 -0
  32. cpgtools-2.0.3.data/scripts/predict_sex.py +126 -0
  33. cpgtools-2.0.3.dist-info/LICENSE +19 -0
  34. cpgtools-2.0.3.dist-info/METADATA +76 -0
  35. cpgtools-2.0.3.dist-info/RECORD +101 -0
  36. {cpgtools-2.0.0.dist-info → cpgtools-2.0.3.dist-info}/WHEEL +1 -1
  37. cpgtools-2.0.3.dist-info/top_level.txt +3 -0
  38. impyute/__init__.py +3 -0
  39. impyute/contrib/__init__.py +7 -0
  40. impyute/contrib/compare.py +69 -0
  41. impyute/contrib/count_missing.py +30 -0
  42. impyute/contrib/describe.py +63 -0
  43. impyute/cs/__init__.py +11 -0
  44. impyute/cs/buck_iterative.py +82 -0
  45. impyute/cs/central_tendency.py +84 -0
  46. impyute/cs/em.py +52 -0
  47. impyute/cs/fast_knn.py +130 -0
  48. impyute/cs/random.py +27 -0
  49. impyute/dataset/__init__.py +6 -0
  50. impyute/dataset/base.py +137 -0
  51. impyute/dataset/corrupt.py +55 -0
  52. impyute/deletion/__init__.py +5 -0
  53. impyute/deletion/complete_case.py +21 -0
  54. impyute/ops/__init__.py +12 -0
  55. impyute/ops/error.py +9 -0
  56. impyute/ops/inverse_distance_weighting.py +31 -0
  57. impyute/ops/matrix.py +47 -0
  58. impyute/ops/testing.py +20 -0
  59. impyute/ops/util.py +96 -0
  60. impyute/ops/wrapper.py +179 -0
  61. impyute/ts/__init__.py +6 -0
  62. impyute/ts/locf.py +57 -0
  63. impyute/ts/moving_window.py +128 -0
  64. missingpy/__init__.py +4 -0
  65. missingpy/knnimpute.py +328 -0
  66. missingpy/missforest.py +556 -0
  67. missingpy/pairwise_external.py +315 -0
  68. missingpy/tests/__init__.py +0 -0
  69. missingpy/tests/test_knnimpute.py +605 -0
  70. missingpy/tests/test_missforest.py +409 -0
  71. missingpy/utils.py +124 -0
  72. cpgtools-2.0.0.dist-info/LICENSE.txt +0 -674
  73. cpgtools-2.0.0.dist-info/METADATA +0 -28
  74. cpgtools-2.0.0.dist-info/RECORD +0 -64
  75. cpgtools-2.0.0.dist-info/top_level.txt +0 -2
@@ -33,6 +33,7 @@ import sys
33
33
  import subprocess
34
34
  from optparse import OptionParser
35
35
  from cpgmodule.utils import *
36
+ from cpgmodule._version import __version__
36
37
  import pandas as pd
37
38
  from sklearn.preprocessing import StandardScaler
38
39
  from sklearn.manifold import TSNE
@@ -41,15 +42,15 @@ __author__ = "Liguo Wang"
41
42
  __copyright__ = "Copyleft"
42
43
  __credits__ = []
43
44
  __license__ = "GPL"
44
- __version__="2.0.0"
45
45
  __maintainer__ = "Liguo Wang"
46
46
  __email__ = "wang.liguo@mayo.edu"
47
47
  __status__ = "Development"
48
48
 
49
49
  def pick_colors(n):
50
- my_colors = ['#e6194B', '#3cb44b', '#4363d8', '#f58231', '#911eb4', '#42d4f4', '#f032e6', '#bfef45', '#fabebe', '#469990', '#e6beff', '#9A6324', '#fffac8', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#a9a9a9','#ffe119']
50
+ my_colors = [
51
+ "#F0A3FF", "#0075DC", "#993F00", "#4C005C", "#191919", "#005C31", "#2BCE48", "#FFCC99", "#808080", "#94FFB5", "#8F7C00", "#9DCC00", "#C20088", "#003380", "#FFA405", "#FFA8BB", "#426600", "#FF0010", "#5EF1F2", "#00998F", "#E0FF66", "#740AFF", "#990000", "#FFFF80", "#FFE100", "#FF5005"]
51
52
  if n > len(my_colors):
52
- print ("Only support 21 different colors", file = sys.stderr)
53
+ print ("Only support 26 different colors", file = sys.stderr)
53
54
  sys.exit()
54
55
  return my_colors[0:n]
55
56
 
@@ -99,27 +100,31 @@ def main():
99
100
  printlog("Perplexigty value is set to %d" % options.perplexity_value)
100
101
 
101
102
  #remove NA and transpose
102
- df2 = df1.dropna(axis=0, how='any')
103
- printlog("%d rows with missing values were removed." % (len(df1) - len(df2)))
103
+ df2 = df1.dropna(axis=0, how='any').T
104
+ printlog("%d rows with missing values were removed." % (len(df1.index) - len(df2.columns)))
104
105
  #print (df2.head())
105
-
106
- printlog("Transposing data frame ...")
107
- df2 = df2.T
108
- #print (df2.index)
109
-
110
- printlog("Standarizing values ...")
111
- x = df2.values
112
- x = StandardScaler().fit_transform(x)
113
- #print (x.shape)
114
-
106
+
115
107
  printlog("Reading group file: \"%s\" ..." % (options.group_file))
116
108
  group = pd.read_csv(options.group_file, index_col=0, header=0,names=['Sample_ID', 'Group_ID'])
117
- group.index = group.index.map(str)
118
-
119
109
  #check if sample IDs are unique
120
110
  if len(group.index) != len(group.index.unique()):
121
111
  print ("Sample IDs are not unique", file = sys.stderr)
122
- sys.exit()
112
+ sys.exit()
113
+ group.index = group.index.map(str)
114
+ printlog("Group file \"%s\" contains %d samples" % (options.group_file, len(group.index)))
115
+
116
+ printlog("Find common sample IDs between group file and data file ...")
117
+ common_samples = list(set(group.index) & set(df2.index))
118
+ used_df = df2.loc[common_samples]
119
+ (usable_sample, usable_cpg) = used_df.shape
120
+ printlog("Used CpGs: %d, Used samples: %d" % (usable_cpg, usable_sample))
121
+
122
+
123
+ printlog("Standarizing values ...")
124
+ x = used_df.to_numpy()
125
+ x = StandardScaler().fit_transform(x)
126
+
127
+
123
128
  group_names = group['Group_ID'].unique().tolist() # a list of unique group names
124
129
  color_names = pick_colors(len(group_names)) # a list of unique colors
125
130
  group_to_col = dict(zip(group_names, color_names))
@@ -127,13 +132,13 @@ def main():
127
132
  group['Colors'] = color_list
128
133
 
129
134
 
130
- tsne = TSNE(n_components = options.n_components, random_state = 0, perplexity = options.perplexity_value, learning_rate = options.learning_rate, n_iter = options.n_iterations)
135
+ tsne = TSNE(n_components = options.n_components, random_state = 0, perplexity = options.perplexity_value, learning_rate = options.learning_rate, max_iter = options.n_iterations)
131
136
  tsne_components = tsne.fit_transform(x)
132
137
  pc_names = [str(i)+str(j) for i,j in zip(['PC']*options.n_components,range(1,options.n_components+1))]
133
- principalDf = pd.DataFrame(data = tsne_components, columns = pc_names, index = df2.index)
138
+ principalDf = pd.DataFrame(data = tsne_components, columns = pc_names, index = used_df.index)
134
139
  principalDf.index.name = 'Sample_ID'
135
140
 
136
- finalDf = pd.concat([principalDf, group], axis=1,sort=False)
141
+ finalDf = pd.concat([principalDf, group], axis=1,sort=False, join='inner')
137
142
  finalDf.index.name = 'Sample_ID'
138
143
 
139
144
  printlog("Writing t-SNE results to file: \"%s\" ..." % (options.out_file + '.t-SNE.tsv'))
@@ -149,10 +154,12 @@ def main():
149
154
 
150
155
  if options.plot_alpha:
151
156
  print ('library(scales)', file=ROUT)
152
- print ('plot(PC1, PC2, col = alpha(Colors, %f), pch=%d, cex=1.5, main="t-SNE 2D map")' % (options.plot_alpha, pch[options.plot_char]), file=ROUT)
157
+ print ('plot(PC1, PC2, col = alpha(Colors, %f), pch=%d, cex=1.5, main="tSNE 2D map", xlab="tSNE1", ylab="tSNE2")'
158
+ % (options.plot_alpha, pch[options.plot_char]), file=ROUT)
153
159
  else:
154
- print ('plot(PC1, PC2, col = Colors, pch=%d, cex=1.2, main="t-SNE 2D map")' % pch[options.plot_char], file=ROUT)
155
- #print ('plot(PC1, PC2, col = Colors, pch=%d, cex=1, main="t-SNE 2D map")' % pch[options.plot_char], file=ROUT)
160
+ print ('plot(PC1, PC2, col = Colors, pch=%d, cex=1.2, main="tSNE 2D map", xlab="tSNE1", ylab="tSNE2")'
161
+ % (pch[options.plot_char]), file=ROUT)
162
+
156
163
  if options.text_label:
157
164
  print ('text(PC1, PC2, labels=Sample_ID, col = Colors, cex=0.5, pos=1)', file=ROUT)
158
165
  print ('legend("%s", legend=c(%s), col=c(%s), pch=%d,cex=1)' % (legend_pos[options.legend_location], ','.join(['"' + str(i) + '"' for i in group_names]), ','.join(['"' + str(group_to_col[i]) + '"' for i in group_names]), pch[options.plot_char]), file=ROUT)
@@ -18,6 +18,7 @@ import collections
18
18
  import subprocess
19
19
  import numpy as np
20
20
  from optparse import OptionParser
21
+ from cpgmodule._version import __version__
21
22
  from cpgmodule import ireader
22
23
  from cpgmodule.utils import *
23
24
  from cpgmodule import BED
@@ -27,7 +28,6 @@ __author__ = "Liguo Wang"
27
28
  __copyright__ = "Copyleft"
28
29
  __credits__ = []
29
30
  __license__ = "GPL"
30
- __version__="2.0.0"
31
31
  __maintainer__ = "Liguo Wang"
32
32
  __email__ = "wang.liguo@mayo.edu"
33
33
  __status__ = "Development"
@@ -18,6 +18,7 @@ import numpy as np
18
18
  from optparse import OptionParser
19
19
  from sklearn import mixture
20
20
  from time import strftime
21
+ from cpgmodule._version import __version__
21
22
  from cpgmodule import ireader
22
23
  from cpgmodule.utils import *
23
24
  import pandas as pd
@@ -26,7 +27,6 @@ __author__ = "Liguo Wang"
26
27
  __copyright__ = "Copyleft"
27
28
  __credits__ = []
28
29
  __license__ = "GPL"
29
- __version__="2.0.0"
30
30
  __maintainer__ = "Liguo Wang"
31
31
  __email__ = "wang.liguo@mayo.edu"
32
32
  __status__ = "Development"
@@ -27,13 +27,13 @@ from cpgmodule import ireader
27
27
  from cpgmodule.utils import *
28
28
  from cpgmodule import BED
29
29
  from cpgmodule import padjust
30
+ from cpgmodule._version import __version__
30
31
  from multiprocessing import Process, Manager, current_process
31
32
 
32
33
  __author__ = "Liguo Wang"
33
34
  __copyright__ = "Copyleft"
34
35
  __credits__ = []
35
36
  __license__ = "GPL"
36
- __version__="2.0.0"
37
37
  __maintainer__ = "Liguo Wang"
38
38
  __email__ = "wang.liguo@mayo.edu"
39
39
  __status__ = "Development"
@@ -36,12 +36,12 @@ from cpgmodule import ireader
36
36
  from cpgmodule.utils import *
37
37
  from cpgmodule import BED
38
38
  from cpgmodule import padjust
39
+ from cpgmodule._version import __version__
39
40
 
40
41
  __author__ = "Liguo Wang"
41
42
  __copyright__ = "Copyleft"
42
43
  __credits__ = []
43
44
  __license__ = "GPL"
44
- __version__="2.0.0"
45
45
  __maintainer__ = "Liguo Wang"
46
46
  __email__ = "wang.liguo@mayo.edu"
47
47
  __status__ = "Development"
@@ -34,12 +34,12 @@ from cpgmodule import ireader
34
34
  from cpgmodule.utils import *
35
35
  from cpgmodule import BED
36
36
  from cpgmodule import padjust
37
+ from cpgmodule._version import __version__
37
38
 
38
39
  __author__ = "Liguo Wang"
39
40
  __copyright__ = "Copyleft"
40
41
  __credits__ = []
41
42
  __license__ = "GPL"
42
- __version__="2.0.0"
43
43
  __maintainer__ = "Liguo Wang"
44
44
  __email__ = "wang.liguo@mayo.edu"
45
45
  __status__ = "Development"
@@ -18,12 +18,12 @@ from cpgmodule import ireader
18
18
  from cpgmodule.utils import *
19
19
  from cpgmodule import BED
20
20
  from cpgmodule import padjust
21
+ from cpgmodule._version import __version__
21
22
 
22
23
  __author__ = "Liguo Wang"
23
24
  __copyright__ = "Copyleft"
24
25
  __credits__ = []
25
26
  __license__ = "GPL"
26
- __version__="2.0.0"
27
27
  __maintainer__ = "Liguo Wang"
28
28
  __email__ = "wang.liguo@mayo.edu"
29
29
  __status__ = "Development"
@@ -31,12 +31,12 @@ from cpgmodule import ireader
31
31
  from cpgmodule.utils import *
32
32
  from cpgmodule import BED
33
33
  from cpgmodule import padjust
34
+ from cpgmodule._version import __version__
34
35
 
35
36
  __author__ = "Liguo Wang"
36
37
  __copyright__ = "Copyleft"
37
38
  __credits__ = []
38
39
  __license__ = "GPL"
39
- __version__="2.0.0"
40
40
  __maintainer__ = "Liguo Wang"
41
41
  __email__ = "wang.liguo@mayo.edu"
42
42
  __status__ = "Development"
@@ -19,12 +19,12 @@ from cpgmodule import ireader
19
19
  from cpgmodule.utils import *
20
20
  from cpgmodule import BED
21
21
  from cpgmodule import padjust
22
+ from cpgmodule._version import __version__
22
23
 
23
24
  __author__ = "Liguo Wang"
24
25
  __copyright__ = "Copyleft"
25
26
  __credits__ = []
26
27
  __license__ = "GPL"
27
- __version__="2.0.0"
28
28
  __maintainer__ = "Liguo Wang"
29
29
  __email__ = "wang.liguo@mayo.edu"
30
30
  __status__ = "Development"
@@ -18,12 +18,12 @@ from cpgmodule import ireader
18
18
  from cpgmodule.utils import *
19
19
  from cpgmodule import BED
20
20
  from cpgmodule import padjust
21
+ from cpgmodule._version import __version__
21
22
 
22
23
  __author__ = "Liguo Wang"
23
24
  __copyright__ = "Copyleft"
24
25
  __credits__ = []
25
26
  __license__ = "GPL"
26
- __version__="2.0.0"
27
27
  __maintainer__ = "Liguo Wang"
28
28
  __email__ = "wang.liguo@mayo.edu"
29
29
  __status__ = "Development"
@@ -189,7 +189,7 @@ def main():
189
189
  else:
190
190
  continue
191
191
  line_num += 1
192
-
192
+
193
193
  printlog("Perfrom Benjamini-Hochberg (aka FDR) correction ...")
194
194
  adjusted_p = {}
195
195
  q_list = padjust.multiple_testing_correction(p_list)
@@ -204,6 +204,10 @@ def main():
204
204
  else:
205
205
  f = l.split()
206
206
  probe_ID = f[0]
207
+ if probe_ID in delta_beta:
208
+ pass
209
+ else:
210
+ delta_beta[probe_ID] = 'n/a'
207
211
  try:
208
212
  print (l + '\t' + str(delta_beta[probe_ID]) + '\t' + adjusted_p[probe_ID], file=FOUT)
209
213
  except: