cpgtools 2.0.2__tar.gz → 2.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cpgtools might be problematic. Click here for more details.

Files changed (110) hide show
  1. {cpgtools-2.0.2 → cpgtools-2.0.3}/MANIFEST.in +4 -4
  2. {cpgtools-2.0.2/src/cpgtools.egg-info → cpgtools-2.0.3}/PKG-INFO +1 -1
  3. {cpgtools-2.0.2 → cpgtools-2.0.3}/pyproject.toml +1 -1
  4. cpgtools-2.0.3/scripts/.DS_Store +0 -0
  5. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/dmc_ttest.py +5 -1
  6. cpgtools-2.0.2/scripts/beta_imputation.py → cpgtools-2.0.3/scripts/predict_missing.py +88 -19
  7. {cpgtools-2.0.2 → cpgtools-2.0.3}/setup.py +1 -1
  8. cpgtools-2.0.3/src/cpgmodule/data/AltumAge_cpg.pkl +0 -0
  9. cpgtools-2.0.3/src/cpgmodule/data/AltumAge_multi_platform_cpgs.pkl +0 -0
  10. cpgtools-2.0.3/src/cpgmodule/data/AltumAge_scaler.pkl +0 -0
  11. cpgtools-2.0.3/src/cpgmodule/data/GA_Bohlin.pkl +0 -0
  12. cpgtools-2.0.3/src/cpgmodule/data/GA_Haftorn.pkl +0 -0
  13. cpgtools-2.0.3/src/cpgmodule/data/GA_Knight.pkl +0 -0
  14. cpgtools-2.0.3/src/cpgmodule/data/GA_Lee_CPC.pkl +0 -0
  15. cpgtools-2.0.3/src/cpgmodule/data/GA_Lee_RPC.pkl +0 -0
  16. cpgtools-2.0.3/src/cpgmodule/data/GA_Lee_refined_RPC.pkl +0 -0
  17. cpgtools-2.0.3/src/cpgmodule/data/GA_Mayne.pkl +0 -0
  18. cpgtools-2.0.3/src/cpgmodule/data/Hannum.pkl +0 -0
  19. cpgtools-2.0.3/src/cpgmodule/data/Horvath_2013.pkl +0 -0
  20. cpgtools-2.0.3/src/cpgmodule/data/Horvath_2018.pkl +0 -0
  21. cpgtools-2.0.3/src/cpgmodule/data/Levine.pkl +0 -0
  22. cpgtools-2.0.3/src/cpgmodule/data/Lu_DNAmTL.pkl +0 -0
  23. cpgtools-2.0.3/src/cpgmodule/data/Ped_McEwen.pkl +0 -0
  24. cpgtools-2.0.3/src/cpgmodule/data/Ped_Wu.pkl +0 -0
  25. cpgtools-2.0.3/src/cpgmodule/data/Zhang_BLUP.pkl +0 -0
  26. cpgtools-2.0.3/src/cpgmodule/data/Zhang_EN.pkl +0 -0
  27. {cpgtools-2.0.2 → cpgtools-2.0.3/src/cpgtools.egg-info}/PKG-INFO +1 -1
  28. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgtools.egg-info/SOURCES.txt +21 -1
  29. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ops/util.py +34 -14
  30. {cpgtools-2.0.2 → cpgtools-2.0.3}/LICENSE +0 -0
  31. {cpgtools-2.0.2 → cpgtools-2.0.3}/README.md +0 -0
  32. {cpgtools-2.0.2 → cpgtools-2.0.3}/distribute_setup.py +0 -0
  33. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_aggregation.py +0 -0
  34. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_anno_position.py +0 -0
  35. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_anno_probe.py +0 -0
  36. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_density_gene_centered.py +0 -0
  37. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_distrb_chrom.py +0 -0
  38. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_distrb_gene_centered.py +0 -0
  39. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_distrb_region.py +0 -0
  40. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_logo.py +0 -0
  41. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_to_gene.py +0 -0
  42. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_PCA.py +0 -0
  43. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_UMAP.py +0 -0
  44. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_jitter_plot.py +0 -0
  45. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_m_conversion.py +0 -0
  46. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_profile_gene_centered.py +0 -0
  47. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_profile_region.py +0 -0
  48. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_selectNBest.py +0 -0
  49. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_stacked_barplot.py +0 -0
  50. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_stats.py +0 -0
  51. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_tSNE.py +0 -0
  52. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_topN.py +0 -0
  53. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_trichotmize.py +0 -0
  54. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/dmc_Bayes.py +0 -0
  55. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/dmc_bb.py +0 -0
  56. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/dmc_fisher.py +0 -0
  57. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/dmc_glm.py +0 -0
  58. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/dmc_logit.py +0 -0
  59. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/dmc_nonparametric.py +0 -0
  60. {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/predict_sex.py +0 -0
  61. {cpgtools-2.0.2 → cpgtools-2.0.3}/setup.cfg +0 -0
  62. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/BED.py +0 -0
  63. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/MI.py +0 -0
  64. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/__init__.py +0 -0
  65. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/_version.py +0 -0
  66. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/cgID.py +0 -0
  67. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/data/__init__.py +0 -0
  68. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/extend_bed.py +0 -0
  69. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/imotif.py +0 -0
  70. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/ireader.py +0 -0
  71. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/methylClock.py +0 -0
  72. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/padjust.py +0 -0
  73. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/region2gene.py +0 -0
  74. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/utils.py +0 -0
  75. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgtools.egg-info/dependency_links.txt +0 -0
  76. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgtools.egg-info/requires.txt +0 -0
  77. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgtools.egg-info/top_level.txt +0 -0
  78. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/__init__.py +0 -0
  79. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/contrib/__init__.py +0 -0
  80. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/contrib/compare.py +0 -0
  81. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/contrib/count_missing.py +0 -0
  82. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/contrib/describe.py +0 -0
  83. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/cs/__init__.py +0 -0
  84. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/cs/buck_iterative.py +0 -0
  85. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/cs/central_tendency.py +0 -0
  86. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/cs/em.py +0 -0
  87. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/cs/fast_knn.py +0 -0
  88. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/cs/random.py +0 -0
  89. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/dataset/__init__.py +0 -0
  90. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/dataset/base.py +0 -0
  91. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/dataset/corrupt.py +0 -0
  92. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/deletion/__init__.py +0 -0
  93. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/deletion/complete_case.py +0 -0
  94. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ops/__init__.py +0 -0
  95. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ops/error.py +0 -0
  96. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ops/inverse_distance_weighting.py +0 -0
  97. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ops/matrix.py +0 -0
  98. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ops/testing.py +0 -0
  99. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ops/wrapper.py +0 -0
  100. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ts/__init__.py +0 -0
  101. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ts/locf.py +0 -0
  102. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ts/moving_window.py +0 -0
  103. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/__init__.py +0 -0
  104. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/knnimpute.py +0 -0
  105. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/missforest.py +0 -0
  106. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/pairwise_external.py +0 -0
  107. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/tests/__init__.py +0 -0
  108. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/tests/test_knnimpute.py +0 -0
  109. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/tests/test_missforest.py +0 -0
  110. {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/utils.py +0 -0
@@ -4,8 +4,8 @@ include PKG-INFO
4
4
  include LICENSE
5
5
 
6
6
  include distribute_setup.py
7
- recursive-include lib *.pyx
8
- recursive-include lib *.py
9
- recursive-include lib *.pkl
10
- recursive-include bin *
7
+ recursive-include src *.pyx
8
+ recursive-include src *.py
9
+ recursive-include src *.pkl
10
+ recursive-include scripts *
11
11
  recursive-include doc *
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cpgtools
3
- Version: 2.0.2
3
+ Version: 2.0.3
4
4
  Summary: Tools to analyze and visualize DNA methylation data
5
5
  Author-email: Liguo Wang <wangliguo78@gmail.com>
6
6
  Maintainer-email: Liguo Wang <wangliguo78@gmail.com>
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  #Project's meta data
7
7
  [project]
8
- version = "2.0.2"
8
+ version = "2.0.3"
9
9
  name = "cpgtools"
10
10
  authors = [
11
11
  {name="Liguo Wang", email="wangliguo78@gmail.com"},
Binary file
@@ -189,7 +189,7 @@ def main():
189
189
  else:
190
190
  continue
191
191
  line_num += 1
192
-
192
+
193
193
  printlog("Perfrom Benjamini-Hochberg (aka FDR) correction ...")
194
194
  adjusted_p = {}
195
195
  q_list = padjust.multiple_testing_correction(p_list)
@@ -204,6 +204,10 @@ def main():
204
204
  else:
205
205
  f = l.split()
206
206
  probe_ID = f[0]
207
+ if probe_ID in delta_beta:
208
+ pass
209
+ else:
210
+ delta_beta[probe_ID] = 'n/a'
207
211
  try:
208
212
  print (l + '\t' + str(delta_beta[probe_ID]) + '\t' + adjusted_p[probe_ID], file=FOUT)
209
213
  except:
@@ -20,6 +20,7 @@ from impyute.cs.em import em
20
20
  from impyute.ops.util import toy_df,insert_na
21
21
  from impyute.cs.random import random_impute
22
22
  from impyute.cs.buck_iterative import buck_iterative
23
+ from impyute.ts.moving_window import moving_window
23
24
  from missingpy import MissForest
24
25
 
25
26
  #use pip to install fancyimpute
@@ -64,6 +65,7 @@ def nafiller():
64
65
  the same row or column.",
65
66
  'FillRef': "Impute missing values using values from an external \
66
67
  reference dataset.",
68
+ 'MW': "Interpolate the missing values with moving window.",
67
69
  'KNN': "Impute missing values using scikit-learn's KNNImputer function. \
68
70
  Note: slow for large datasets.",
69
71
  'KNN2': "Impute missing values using KNN2",
@@ -98,6 +100,7 @@ def nafiller():
98
100
  FillMax_parser = sub_parsers.add_parser('FillMax', help=commands['FillMax'])
99
101
  FillRand_parser = sub_parsers.add_parser('FillRand', help=commands['FillRand'])
100
102
  FillRef_parser = sub_parsers.add_parser('FillRef', help=commands['FillRef'])
103
+ MW_parser = sub_parsers.add_parser('MW', help=commands['MW'])
101
104
  KNN_parser = sub_parsers.add_parser('KNN', help=commands['KNN'])
102
105
  fKNN_parser = sub_parsers.add_parser('fKNN', help=commands['fKNN'])
103
106
  EM_parser = sub_parsers.add_parser('EM', help=commands['EM'])
@@ -116,7 +119,7 @@ def nafiller():
116
119
  'output', type=str, metavar='out_df',
117
120
  help="Output data frame.")
118
121
  DropNA_parser.add_argument(
119
- '-a', '--axis', type=int, choices=range(2), default=0,
122
+ '--axis', type=int, choices=range(2), default=0,
120
123
  help="0 : drop rows with any missing values, 1 : drop columns with \
121
124
  missing values. Default: 0")
122
125
  DropNA_parser.add_argument(
@@ -130,7 +133,7 @@ def nafiller():
130
133
  'output', type=str, metavar='out_df',
131
134
  help="Output data frame.")
132
135
  FillValue_parser.add_argument(
133
- '-s', '--score', type=float, default=0.0,
136
+ '--score', type=float, default=0.0,
134
137
  help="The value uesd to fill all NAs.")
135
138
  FillValue_parser.add_argument(
136
139
  '--decimal', type=int, default=5,
@@ -143,7 +146,7 @@ def nafiller():
143
146
  'output', type=str, metavar='out_df',
144
147
  help="Output data frame.")
145
148
  FillMean_parser.add_argument(
146
- '-a', '--axis', type=int, choices=range(2), default=1,
149
+ '--axis', type=int, choices=range(2), default=1,
147
150
  help="0 means column, 1 means row. Default: fill NAs with row means")
148
151
  FillMean_parser.add_argument(
149
152
  '--decimal', type=int, default=5,
@@ -156,7 +159,7 @@ def nafiller():
156
159
  'output', type=str, metavar='out_df',
157
160
  help="Output data frame.")
158
161
  FillMedian_parser.add_argument(
159
- '-a', '--axis', type=int, choices=range(2), default=1,
162
+ '--axis', type=int, choices=range(2), default=1,
160
163
  help="0 means column, 1 means row. Default: fill NAs with row medians")
161
164
  FillMedian_parser.add_argument(
162
165
  '--decimal', type=int, default=5,
@@ -169,7 +172,7 @@ def nafiller():
169
172
  'output', type=str, metavar='out_df',
170
173
  help="Output data frame.")
171
174
  FillMin_parser.add_argument(
172
- '-a', '--axis', type=int, choices=range(2), default=1,
175
+ '--axis', type=int, choices=range(2), default=1,
173
176
  help="0 means column, 1 means row. Default: fill NAs with the minimum value of the rows.")
174
177
  FillMin_parser.add_argument(
175
178
  '--decimal', type=int, default=5,
@@ -182,7 +185,7 @@ def nafiller():
182
185
  'output', type=str, metavar='out_df',
183
186
  help="Output data frame.")
184
187
  FillMax_parser.add_argument(
185
- '-a', '--axis', type=int, choices=range(2), default=1,
188
+ '--axis', type=int, choices=range(2), default=1,
186
189
  help="0 means column, 1 means row. Default: fill NAs with the maximum value of the rows.")
187
190
  FillMax_parser.add_argument(
188
191
  '--decimal', type=int, default=5,
@@ -195,7 +198,7 @@ def nafiller():
195
198
  'output', type=str, metavar='out_df',
196
199
  help="Output data frame.")
197
200
  FillRand_parser.add_argument(
198
- '-a', '--axis', type=int, choices=range(2), default=1,
201
+ '--axis', type=int, choices=range(2), default=1,
199
202
  help="0 means column, 1 means row. Default: fill NAs with values randomly selected from rows.")
200
203
  FillRand_parser.add_argument(
201
204
  '--decimal', type=int, default=5,
@@ -208,12 +211,50 @@ def nafiller():
208
211
  'output', type=str, metavar='out_df',
209
212
  help="Output data frame.")
210
213
  FillRef_parser.add_argument(
211
- '-r', '--ref', type=str,
214
+ '--ref', type=str,
212
215
  help="File name of the external reference.")
213
216
  FillRef_parser.add_argument(
214
217
  '--decimal', type=int, default=5,
215
218
  help="Number of decimal places to round each column to. default: %(default)s")
216
219
 
220
+ MW_parser.add_argument(
221
+ 'input', type=str, metavar='input_df',
222
+ help="Input data frame.")
223
+ MW_parser.add_argument(
224
+ 'output', type=str, metavar='out_df',
225
+ help="Output data frame.")
226
+ MW_parser.add_argument(
227
+ '--nindex', type=int, choices=[0, -1, None],
228
+ default=None,
229
+ help="Null index. Index of the null value inside the moving average window. \
230
+ See impyute documentation for details. default: %(default)s")
231
+ MW_parser.add_argument(
232
+ '--wsize', type=int, default=5,
233
+ help="Size of the moving average window/area of values being used \
234
+ for each local imputation. This number includes the missing value. \
235
+ default: %(default)s")
236
+ MW_parser.add_argument(
237
+ '--errors', type=str, choices=["raise", "coerce", "ignore"],
238
+ default='coerce',
239
+ help="Errors will occur with the indexing of the windows - for \
240
+ example if there is a nan at data[x][0] and `nindex` is set to \
241
+ -1 or there is a nan at data[x][-1] and `nindex` is set to 0. `\
242
+ 'raise' will raise an error, `coerce` will try again using an \
243
+ nindex set to the middle and `ignore` will just leave it as a \
244
+ nan default: %(default)s")
245
+ MW_parser.add_argument(
246
+ '--func', type=str, choices=["mean", "median"],
247
+ default='mean',
248
+ help="Function to summerzie values within the moving window. \
249
+ default: %(default)s")
250
+ MW_parser.add_argument(
251
+ '--axis', type=int, choices=range(2), default=1,
252
+ help="0 means column, 1 means row. Default: fill missing value \
253
+ with windows moving on rows.")
254
+ MW_parser.add_argument(
255
+ '--decimal', type=int, default=5,
256
+ help="Number of decimal places to round each column to. default: %(default)s")
257
+
217
258
  KNN_parser.add_argument(
218
259
  'input', type=str, metavar='input_df',
219
260
  help="Input data frame.")
@@ -306,20 +347,28 @@ def nafiller():
306
347
  'output', type=str, metavar='out_df',
307
348
  help="Name of the output data frame.")
308
349
  ToyDf_parser.add_argument(
309
- '-r', '--nrow', type=int, default=10,
350
+ '--nrow', type=int, default=10,
310
351
  help="Number of rows. default: %(default)s")
311
352
  ToyDf_parser.add_argument(
312
- '-c', '--ncol', type=int, default=10,
353
+ '--ncol', type=int, default=10,
313
354
  help="Number of columns. default: %(default)s")
314
355
  ToyDf_parser.add_argument(
315
- '--na', type=int, default=5,
356
+ '--nmiss', type=float, default=5,
316
357
  help="Number of missing values ingested into the dataframe. default: %(default)s")
317
358
  ToyDf_parser.add_argument(
318
- '-s', '--seed', type=int, default=123,
359
+ '--seed', type=int, default=123,
319
360
  help="Seed used to initialize a pseudorandom number generator. default: %(default)s")
320
361
  ToyDf_parser.add_argument(
321
362
  '--prefix', type=str, default='s',
322
- help="Prefix of the column names, a series numbers will be appended to the prefix. default: %(default)s")
363
+ help="Prefix of the column names, a series numbers will be appended to \
364
+ the prefix. If this is set to None, as np.ndarray rather than pd.dataframe \
365
+ will be returned. default: %(default)s")
366
+ ToyDf_parser.add_argument(
367
+ '--min', type=float, default=0.0,
368
+ help="The minimum value. default: %(default)s")
369
+ ToyDf_parser.add_argument(
370
+ '--max', type=float, default=1.0,
371
+ help="The maximum value. default: %(default)s")
323
372
  ToyDf_parser.add_argument(
324
373
  '--decimal', type=int, default=5,
325
374
  help="Number of decimal places to round each column to. default: %(default)s")
@@ -331,10 +380,10 @@ def nafiller():
331
380
  'output', type=str, metavar='out_df',
332
381
  help="Output data frame.")
333
382
  InsertNA_parser.add_argument(
334
- '--na', type=int,
383
+ '--nmiss', type=int,
335
384
  help="Number of missing values ingested into the dataframe.")
336
385
  InsertNA_parser.add_argument(
337
- '-s', '--seed', type=int, default=123,
386
+ '--seed', type=int, default=123,
338
387
  help="Seed used to initialize a pseudorandom number generator. default: %(default)s")
339
388
  InsertNA_parser.add_argument(
340
389
  '--decimal', type=int, default=5,
@@ -483,6 +532,25 @@ def nafiller():
483
532
  continue
484
533
  output_df = input_df
485
534
  output_df = output_df.round(args.decimal)
535
+ output_df.to_csv(args.output, sep="\t", na_rep="NaN")
536
+ logging.info("File \"%s\" contains %d missing values ..." %
537
+ (args.output, output_df.isna().sum().sum()))
538
+ elif command.lower() == 'mw':
539
+ input_df = read_df(args.input)
540
+ logging.info("File \"%s\" contains %d missing values ..." %
541
+ (args.input, input_df.isna().sum().sum()))
542
+ logging.info("Replace missing values using moving window on %s ..." % axis_name[args.axis])
543
+ if args.axis == 1:
544
+ output_df = moving_window(
545
+ input_df, nindex = args.nindex, wsize=args.wsize,
546
+ errors=args.errors, func=getattr(np, args.func))
547
+ output_df = output_df.round(args.decimal)
548
+ elif args.axis == 0:
549
+ output_df = moving_window(
550
+ input_df.T, nindex = args.nindex, wsize=args.wsize,
551
+ errors=args.errors, func=getattr(np, args.func))
552
+ output_df = output_df.round(args.decimal).T
553
+
486
554
  output_df.to_csv(args.output, sep="\t", na_rep="NaN")
487
555
  logging.info("File \"%s\" contains %d missing values ..." %
488
556
  (args.output, output_df.isna().sum().sum()))
@@ -576,9 +644,10 @@ def nafiller():
576
644
 
577
645
  elif command.lower() == 'toydf':
578
646
  logging.info("Generate toy dataframe ...")
579
- output_df = toy_df(nrow = args.nrow, ncol = args.ncol,
580
- n_miss = args.na, sample_prefix=args.prefix,
581
- seed=args.seed)
647
+ output_df = toy_df(n_rows = args.nrow, n_cols = args.ncol,
648
+ missingness = args.nmiss, sample_prefix=args.prefix,
649
+ min_val = args.min, max_val = args.max,
650
+ rand_seed=args.seed)
582
651
  #print(output_df)
583
652
  output_df = output_df.round(args.decimal)
584
653
  output_df.to_csv(args.output, sep="\t", na_rep="NaN")
@@ -589,7 +658,7 @@ def nafiller():
589
658
  logging.info("File \"%s\" contains %d missing values ..." %
590
659
  (args.input, input_df.isna().sum().sum()))
591
660
  logging.info("Insert %d NAs into dataframe ..." % args.na)
592
- output_df = insert_na(df=input_df, n_miss=args.na, seed=args.seed)
661
+ output_df = insert_na(df=input_df, n_miss=args.nmiss, seed=args.seed)
593
662
  output_df = output_df.round(args.decimal)
594
663
  output_df.to_csv(args.output, sep="\t", na_rep="NaN")
595
664
  logging.info("File \"%s\" contains %d missing values ..." %
@@ -11,7 +11,6 @@ script_files = [
11
11
  "scripts/CpG_to_gene.py",
12
12
  "scripts/beta_PCA.py",
13
13
  "scripts/beta_UMAP.py",
14
- "scripts/beta_imputation.py",
15
14
  "scripts/beta_jitter_plot.py",
16
15
  "scripts/beta_m_conversion.py",
17
16
  "scripts/beta_profile_gene_centered.py",
@@ -30,6 +29,7 @@ script_files = [
30
29
  "scripts/dmc_nonparametric.py",
31
30
  "scripts/dmc_ttest.py",
32
31
  "scripts/predict_sex.py",
32
+ "scripts/predict_missing.py",
33
33
  ]
34
34
 
35
35
  if __name__ == "__main__":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cpgtools
3
- Version: 2.0.2
3
+ Version: 2.0.3
4
4
  Summary: Tools to analyze and visualize DNA methylation data
5
5
  Author-email: Liguo Wang <wangliguo78@gmail.com>
6
6
  Maintainer-email: Liguo Wang <wangliguo78@gmail.com>
@@ -4,6 +4,7 @@ README.md
4
4
  distribute_setup.py
5
5
  pyproject.toml
6
6
  setup.py
7
+ scripts/.DS_Store
7
8
  scripts/CpG_aggregation.py
8
9
  scripts/CpG_anno_position.py
9
10
  scripts/CpG_anno_probe.py
@@ -15,7 +16,6 @@ scripts/CpG_logo.py
15
16
  scripts/CpG_to_gene.py
16
17
  scripts/beta_PCA.py
17
18
  scripts/beta_UMAP.py
18
- scripts/beta_imputation.py
19
19
  scripts/beta_jitter_plot.py
20
20
  scripts/beta_m_conversion.py
21
21
  scripts/beta_profile_gene_centered.py
@@ -33,6 +33,7 @@ scripts/dmc_glm.py
33
33
  scripts/dmc_logit.py
34
34
  scripts/dmc_nonparametric.py
35
35
  scripts/dmc_ttest.py
36
+ scripts/predict_missing.py
36
37
  scripts/predict_sex.py
37
38
  src/cpgmodule/BED.py
38
39
  src/cpgmodule/MI.py
@@ -46,6 +47,25 @@ src/cpgmodule/methylClock.py
46
47
  src/cpgmodule/padjust.py
47
48
  src/cpgmodule/region2gene.py
48
49
  src/cpgmodule/utils.py
50
+ src/cpgmodule/data/AltumAge_cpg.pkl
51
+ src/cpgmodule/data/AltumAge_multi_platform_cpgs.pkl
52
+ src/cpgmodule/data/AltumAge_scaler.pkl
53
+ src/cpgmodule/data/GA_Bohlin.pkl
54
+ src/cpgmodule/data/GA_Haftorn.pkl
55
+ src/cpgmodule/data/GA_Knight.pkl
56
+ src/cpgmodule/data/GA_Lee_CPC.pkl
57
+ src/cpgmodule/data/GA_Lee_RPC.pkl
58
+ src/cpgmodule/data/GA_Lee_refined_RPC.pkl
59
+ src/cpgmodule/data/GA_Mayne.pkl
60
+ src/cpgmodule/data/Hannum.pkl
61
+ src/cpgmodule/data/Horvath_2013.pkl
62
+ src/cpgmodule/data/Horvath_2018.pkl
63
+ src/cpgmodule/data/Levine.pkl
64
+ src/cpgmodule/data/Lu_DNAmTL.pkl
65
+ src/cpgmodule/data/Ped_McEwen.pkl
66
+ src/cpgmodule/data/Ped_Wu.pkl
67
+ src/cpgmodule/data/Zhang_BLUP.pkl
68
+ src/cpgmodule/data/Zhang_EN.pkl
49
69
  src/cpgmodule/data/__init__.py
50
70
  src/cpgtools.egg-info/PKG-INFO
51
71
  src/cpgtools.egg-info/SOURCES.txt
@@ -41,20 +41,40 @@ def execute_fn_with_args_and_or_kwargs(fn, args, kwargs):
41
41
  except TypeError:
42
42
  return fn(*args)
43
43
 
44
- def toy_df(nrow, ncol, n_miss, sample_prefix, seed):
45
- """
46
- Make a dataFrame (nrow x ncol) with random values between 0 and 1, add
47
- some missing values (n_miss). Generate a toy dataframe for testing purposes.
48
- """
49
- np.random.seed(seed)
50
- data = np.random.rand(nrow*ncol).reshape((nrow, ncol)).astype(float)
51
- x_ind = np.random.choice(nrow, n_miss)
52
- y_ind = np.random.choice(ncol, n_miss)
53
- for x,y in zip(x_ind, y_ind):
54
- data[x][y] = np.nan
55
- colNames = [sample_prefix + '_' + str(i) for i in range(0,ncol)]
56
- df = pd.DataFrame(data, columns=colNames)
57
- return df
44
+ def toy_df(n_rows=20, n_cols=5, missingness=0.2, min_val=0, max_val=1,
45
+ missing_value=np.nan, rand_seed=1234, sample_prefix=None):
46
+ """Generate an array or DataFrame with NaNs"""
47
+ np.random.seed(rand_seed)
48
+ X = np.random.uniform(
49
+ low = min_val, high = max_val, size = n_rows * n_cols).reshape(n_rows, n_cols).astype(
50
+ float)
51
+ # check missingness
52
+ if missingness > 0:
53
+ # If missingness >= 1 then use it as approximate (see below) count
54
+ if missingness >= 1:
55
+ n_missing = int(missingness)
56
+ else:
57
+ n_missing = int(missingness * n_rows * n_cols)
58
+ print(n_missing)
59
+
60
+ # Introduce NaNs until n_miss "NAs" are inserted.
61
+ missing_count = 0
62
+ for i,j in zip(np.random.choice(n_rows, n_missing), np.random.choice(n_cols, n_missing)):
63
+ if np.isnan(X[i][j]):
64
+ continue
65
+ else:
66
+ X[i][j] = missing_value
67
+ missing_count += 1
68
+ if missing_count >= n_missing:
69
+ break
70
+
71
+ # check sample_prefix
72
+ if sample_prefix is None:
73
+ return X
74
+ else:
75
+ colNames = [sample_prefix + '_' + str(i) for i in range(0, n_cols)]
76
+ return pd.DataFrame(X, columns=colNames)
77
+
58
78
 
59
79
  def insert_na(df, n_miss, seed):
60
80
  np.random.seed(seed)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes