cpgtools 2.0.2__py3-none-any.whl → 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cpgtools might be problematic. Click here for more details.

Files changed (55) hide show
  1. cpgmodule/data/AltumAge_cpg.pkl +0 -0
  2. cpgmodule/data/AltumAge_multi_platform_cpgs.pkl +0 -0
  3. cpgmodule/data/AltumAge_scaler.pkl +0 -0
  4. cpgmodule/data/GA_Bohlin.pkl +0 -0
  5. cpgmodule/data/GA_Haftorn.pkl +0 -0
  6. cpgmodule/data/GA_Knight.pkl +0 -0
  7. cpgmodule/data/GA_Lee_CPC.pkl +0 -0
  8. cpgmodule/data/GA_Lee_RPC.pkl +0 -0
  9. cpgmodule/data/GA_Lee_refined_RPC.pkl +0 -0
  10. cpgmodule/data/GA_Mayne.pkl +0 -0
  11. cpgmodule/data/Hannum.pkl +0 -0
  12. cpgmodule/data/Horvath_2013.pkl +0 -0
  13. cpgmodule/data/Horvath_2018.pkl +0 -0
  14. cpgmodule/data/Levine.pkl +0 -0
  15. cpgmodule/data/Lu_DNAmTL.pkl +0 -0
  16. cpgmodule/data/Ped_McEwen.pkl +0 -0
  17. cpgmodule/data/Ped_Wu.pkl +0 -0
  18. cpgmodule/data/Zhang_BLUP.pkl +0 -0
  19. cpgmodule/data/Zhang_EN.pkl +0 -0
  20. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/dmc_ttest.py +5 -1
  21. cpgtools-2.0.2.data/scripts/beta_imputation.py → cpgtools-2.0.3.data/scripts/predict_missing.py +88 -19
  22. {cpgtools-2.0.2.dist-info → cpgtools-2.0.3.dist-info}/METADATA +1 -1
  23. {cpgtools-2.0.2.dist-info → cpgtools-2.0.3.dist-info}/RECORD +55 -36
  24. {cpgtools-2.0.2.dist-info → cpgtools-2.0.3.dist-info}/WHEEL +1 -1
  25. impyute/ops/util.py +34 -14
  26. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/CpG_aggregation.py +0 -0
  27. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/CpG_anno_position.py +0 -0
  28. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/CpG_anno_probe.py +0 -0
  29. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/CpG_density_gene_centered.py +0 -0
  30. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/CpG_distrb_chrom.py +0 -0
  31. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/CpG_distrb_gene_centered.py +0 -0
  32. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/CpG_distrb_region.py +0 -0
  33. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/CpG_logo.py +0 -0
  34. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/CpG_to_gene.py +0 -0
  35. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/beta_PCA.py +0 -0
  36. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/beta_UMAP.py +0 -0
  37. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/beta_jitter_plot.py +0 -0
  38. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/beta_m_conversion.py +0 -0
  39. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/beta_profile_gene_centered.py +0 -0
  40. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/beta_profile_region.py +0 -0
  41. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/beta_selectNBest.py +0 -0
  42. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/beta_stacked_barplot.py +0 -0
  43. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/beta_stats.py +0 -0
  44. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/beta_tSNE.py +0 -0
  45. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/beta_topN.py +0 -0
  46. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/beta_trichotmize.py +0 -0
  47. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/dmc_Bayes.py +0 -0
  48. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/dmc_bb.py +0 -0
  49. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/dmc_fisher.py +0 -0
  50. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/dmc_glm.py +0 -0
  51. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/dmc_logit.py +0 -0
  52. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/dmc_nonparametric.py +0 -0
  53. {cpgtools-2.0.2.data → cpgtools-2.0.3.data}/scripts/predict_sex.py +0 -0
  54. {cpgtools-2.0.2.dist-info → cpgtools-2.0.3.dist-info}/LICENSE +0 -0
  55. {cpgtools-2.0.2.dist-info → cpgtools-2.0.3.dist-info}/top_level.txt +0 -0
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -189,7 +189,7 @@ def main():
189
189
  else:
190
190
  continue
191
191
  line_num += 1
192
-
192
+
193
193
  printlog("Perfrom Benjamini-Hochberg (aka FDR) correction ...")
194
194
  adjusted_p = {}
195
195
  q_list = padjust.multiple_testing_correction(p_list)
@@ -204,6 +204,10 @@ def main():
204
204
  else:
205
205
  f = l.split()
206
206
  probe_ID = f[0]
207
+ if probe_ID in delta_beta:
208
+ pass
209
+ else:
210
+ delta_beta[probe_ID] = 'n/a'
207
211
  try:
208
212
  print (l + '\t' + str(delta_beta[probe_ID]) + '\t' + adjusted_p[probe_ID], file=FOUT)
209
213
  except:
@@ -20,6 +20,7 @@ from impyute.cs.em import em
20
20
  from impyute.ops.util import toy_df,insert_na
21
21
  from impyute.cs.random import random_impute
22
22
  from impyute.cs.buck_iterative import buck_iterative
23
+ from impyute.ts.moving_window import moving_window
23
24
  from missingpy import MissForest
24
25
 
25
26
  #use pip to install fancyimpute
@@ -64,6 +65,7 @@ def nafiller():
64
65
  the same row or column.",
65
66
  'FillRef': "Impute missing values using values from an external \
66
67
  reference dataset.",
68
+ 'MW': "Interpolate the missing values with moving window.",
67
69
  'KNN': "Impute missing values using scikit-learn's KNNImputer function. \
68
70
  Note: slow for large datasets.",
69
71
  'KNN2': "Impute missing values using KNN2",
@@ -98,6 +100,7 @@ def nafiller():
98
100
  FillMax_parser = sub_parsers.add_parser('FillMax', help=commands['FillMax'])
99
101
  FillRand_parser = sub_parsers.add_parser('FillRand', help=commands['FillRand'])
100
102
  FillRef_parser = sub_parsers.add_parser('FillRef', help=commands['FillRef'])
103
+ MW_parser = sub_parsers.add_parser('MW', help=commands['MW'])
101
104
  KNN_parser = sub_parsers.add_parser('KNN', help=commands['KNN'])
102
105
  fKNN_parser = sub_parsers.add_parser('fKNN', help=commands['fKNN'])
103
106
  EM_parser = sub_parsers.add_parser('EM', help=commands['EM'])
@@ -116,7 +119,7 @@ def nafiller():
116
119
  'output', type=str, metavar='out_df',
117
120
  help="Output data frame.")
118
121
  DropNA_parser.add_argument(
119
- '-a', '--axis', type=int, choices=range(2), default=0,
122
+ '--axis', type=int, choices=range(2), default=0,
120
123
  help="0 : drop rows with any missing values, 1 : drop columns with \
121
124
  missing values. Default: 0")
122
125
  DropNA_parser.add_argument(
@@ -130,7 +133,7 @@ def nafiller():
130
133
  'output', type=str, metavar='out_df',
131
134
  help="Output data frame.")
132
135
  FillValue_parser.add_argument(
133
- '-s', '--score', type=float, default=0.0,
136
+ '--score', type=float, default=0.0,
134
137
  help="The value uesd to fill all NAs.")
135
138
  FillValue_parser.add_argument(
136
139
  '--decimal', type=int, default=5,
@@ -143,7 +146,7 @@ def nafiller():
143
146
  'output', type=str, metavar='out_df',
144
147
  help="Output data frame.")
145
148
  FillMean_parser.add_argument(
146
- '-a', '--axis', type=int, choices=range(2), default=1,
149
+ '--axis', type=int, choices=range(2), default=1,
147
150
  help="0 means column, 1 means row. Default: fill NAs with row means")
148
151
  FillMean_parser.add_argument(
149
152
  '--decimal', type=int, default=5,
@@ -156,7 +159,7 @@ def nafiller():
156
159
  'output', type=str, metavar='out_df',
157
160
  help="Output data frame.")
158
161
  FillMedian_parser.add_argument(
159
- '-a', '--axis', type=int, choices=range(2), default=1,
162
+ '--axis', type=int, choices=range(2), default=1,
160
163
  help="0 means column, 1 means row. Default: fill NAs with row medians")
161
164
  FillMedian_parser.add_argument(
162
165
  '--decimal', type=int, default=5,
@@ -169,7 +172,7 @@ def nafiller():
169
172
  'output', type=str, metavar='out_df',
170
173
  help="Output data frame.")
171
174
  FillMin_parser.add_argument(
172
- '-a', '--axis', type=int, choices=range(2), default=1,
175
+ '--axis', type=int, choices=range(2), default=1,
173
176
  help="0 means column, 1 means row. Default: fill NAs with the minimum value of the rows.")
174
177
  FillMin_parser.add_argument(
175
178
  '--decimal', type=int, default=5,
@@ -182,7 +185,7 @@ def nafiller():
182
185
  'output', type=str, metavar='out_df',
183
186
  help="Output data frame.")
184
187
  FillMax_parser.add_argument(
185
- '-a', '--axis', type=int, choices=range(2), default=1,
188
+ '--axis', type=int, choices=range(2), default=1,
186
189
  help="0 means column, 1 means row. Default: fill NAs with the maximum value of the rows.")
187
190
  FillMax_parser.add_argument(
188
191
  '--decimal', type=int, default=5,
@@ -195,7 +198,7 @@ def nafiller():
195
198
  'output', type=str, metavar='out_df',
196
199
  help="Output data frame.")
197
200
  FillRand_parser.add_argument(
198
- '-a', '--axis', type=int, choices=range(2), default=1,
201
+ '--axis', type=int, choices=range(2), default=1,
199
202
  help="0 means column, 1 means row. Default: fill NAs with values randomly selected from rows.")
200
203
  FillRand_parser.add_argument(
201
204
  '--decimal', type=int, default=5,
@@ -208,12 +211,50 @@ def nafiller():
208
211
  'output', type=str, metavar='out_df',
209
212
  help="Output data frame.")
210
213
  FillRef_parser.add_argument(
211
- '-r', '--ref', type=str,
214
+ '--ref', type=str,
212
215
  help="File name of the external reference.")
213
216
  FillRef_parser.add_argument(
214
217
  '--decimal', type=int, default=5,
215
218
  help="Number of decimal places to round each column to. default: %(default)s")
216
219
 
220
+ MW_parser.add_argument(
221
+ 'input', type=str, metavar='input_df',
222
+ help="Input data frame.")
223
+ MW_parser.add_argument(
224
+ 'output', type=str, metavar='out_df',
225
+ help="Output data frame.")
226
+ MW_parser.add_argument(
227
+ '--nindex', type=int, choices=[0, -1, None],
228
+ default=None,
229
+ help="Null index. Index of the null value inside the moving average window. \
230
+ See impyute documentation for details. default: %(default)s")
231
+ MW_parser.add_argument(
232
+ '--wsize', type=int, default=5,
233
+ help="Size of the moving average window/area of values being used \
234
+ for each local imputation. This number includes the missing value. \
235
+ default: %(default)s")
236
+ MW_parser.add_argument(
237
+ '--errors', type=str, choices=["raise", "coerce", "ignore"],
238
+ default='coerce',
239
+ help="Errors will occur with the indexing of the windows - for \
240
+ example if there is a nan at data[x][0] and `nindex` is set to \
241
+ -1 or there is a nan at data[x][-1] and `nindex` is set to 0. `\
242
+ 'raise' will raise an error, `coerce` will try again using an \
243
+ nindex set to the middle and `ignore` will just leave it as a \
244
+ nan default: %(default)s")
245
+ MW_parser.add_argument(
246
+ '--func', type=str, choices=["mean", "median"],
247
+ default='mean',
248
+ help="Function to summerzie values within the moving window. \
249
+ default: %(default)s")
250
+ MW_parser.add_argument(
251
+ '--axis', type=int, choices=range(2), default=1,
252
+ help="0 means column, 1 means row. Default: fill missing value \
253
+ with windows moving on rows.")
254
+ MW_parser.add_argument(
255
+ '--decimal', type=int, default=5,
256
+ help="Number of decimal places to round each column to. default: %(default)s")
257
+
217
258
  KNN_parser.add_argument(
218
259
  'input', type=str, metavar='input_df',
219
260
  help="Input data frame.")
@@ -306,20 +347,28 @@ def nafiller():
306
347
  'output', type=str, metavar='out_df',
307
348
  help="Name of the output data frame.")
308
349
  ToyDf_parser.add_argument(
309
- '-r', '--nrow', type=int, default=10,
350
+ '--nrow', type=int, default=10,
310
351
  help="Number of rows. default: %(default)s")
311
352
  ToyDf_parser.add_argument(
312
- '-c', '--ncol', type=int, default=10,
353
+ '--ncol', type=int, default=10,
313
354
  help="Number of columns. default: %(default)s")
314
355
  ToyDf_parser.add_argument(
315
- '--na', type=int, default=5,
356
+ '--nmiss', type=float, default=5,
316
357
  help="Number of missing values ingested into the dataframe. default: %(default)s")
317
358
  ToyDf_parser.add_argument(
318
- '-s', '--seed', type=int, default=123,
359
+ '--seed', type=int, default=123,
319
360
  help="Seed used to initialize a pseudorandom number generator. default: %(default)s")
320
361
  ToyDf_parser.add_argument(
321
362
  '--prefix', type=str, default='s',
322
- help="Prefix of the column names, a series numbers will be appended to the prefix. default: %(default)s")
363
+ help="Prefix of the column names, a series numbers will be appended to \
364
+ the prefix. If this is set to None, as np.ndarray rather than pd.dataframe \
365
+ will be returned. default: %(default)s")
366
+ ToyDf_parser.add_argument(
367
+ '--min', type=float, default=0.0,
368
+ help="The minimum value. default: %(default)s")
369
+ ToyDf_parser.add_argument(
370
+ '--max', type=float, default=1.0,
371
+ help="The maximum value. default: %(default)s")
323
372
  ToyDf_parser.add_argument(
324
373
  '--decimal', type=int, default=5,
325
374
  help="Number of decimal places to round each column to. default: %(default)s")
@@ -331,10 +380,10 @@ def nafiller():
331
380
  'output', type=str, metavar='out_df',
332
381
  help="Output data frame.")
333
382
  InsertNA_parser.add_argument(
334
- '--na', type=int,
383
+ '--nmiss', type=int,
335
384
  help="Number of missing values ingested into the dataframe.")
336
385
  InsertNA_parser.add_argument(
337
- '-s', '--seed', type=int, default=123,
386
+ '--seed', type=int, default=123,
338
387
  help="Seed used to initialize a pseudorandom number generator. default: %(default)s")
339
388
  InsertNA_parser.add_argument(
340
389
  '--decimal', type=int, default=5,
@@ -483,6 +532,25 @@ def nafiller():
483
532
  continue
484
533
  output_df = input_df
485
534
  output_df = output_df.round(args.decimal)
535
+ output_df.to_csv(args.output, sep="\t", na_rep="NaN")
536
+ logging.info("File \"%s\" contains %d missing values ..." %
537
+ (args.output, output_df.isna().sum().sum()))
538
+ elif command.lower() == 'mw':
539
+ input_df = read_df(args.input)
540
+ logging.info("File \"%s\" contains %d missing values ..." %
541
+ (args.input, input_df.isna().sum().sum()))
542
+ logging.info("Replace missing values using moving window on %s ..." % axis_name[args.axis])
543
+ if args.axis == 1:
544
+ output_df = moving_window(
545
+ input_df, nindex = args.nindex, wsize=args.wsize,
546
+ errors=args.errors, func=getattr(np, args.func))
547
+ output_df = output_df.round(args.decimal)
548
+ elif args.axis == 0:
549
+ output_df = moving_window(
550
+ input_df.T, nindex = args.nindex, wsize=args.wsize,
551
+ errors=args.errors, func=getattr(np, args.func))
552
+ output_df = output_df.round(args.decimal).T
553
+
486
554
  output_df.to_csv(args.output, sep="\t", na_rep="NaN")
487
555
  logging.info("File \"%s\" contains %d missing values ..." %
488
556
  (args.output, output_df.isna().sum().sum()))
@@ -576,9 +644,10 @@ def nafiller():
576
644
 
577
645
  elif command.lower() == 'toydf':
578
646
  logging.info("Generate toy dataframe ...")
579
- output_df = toy_df(nrow = args.nrow, ncol = args.ncol,
580
- n_miss = args.na, sample_prefix=args.prefix,
581
- seed=args.seed)
647
+ output_df = toy_df(n_rows = args.nrow, n_cols = args.ncol,
648
+ missingness = args.nmiss, sample_prefix=args.prefix,
649
+ min_val = args.min, max_val = args.max,
650
+ rand_seed=args.seed)
582
651
  #print(output_df)
583
652
  output_df = output_df.round(args.decimal)
584
653
  output_df.to_csv(args.output, sep="\t", na_rep="NaN")
@@ -589,7 +658,7 @@ def nafiller():
589
658
  logging.info("File \"%s\" contains %d missing values ..." %
590
659
  (args.input, input_df.isna().sum().sum()))
591
660
  logging.info("Insert %d NAs into dataframe ..." % args.na)
592
- output_df = insert_na(df=input_df, n_miss=args.na, seed=args.seed)
661
+ output_df = insert_na(df=input_df, n_miss=args.nmiss, seed=args.seed)
593
662
  output_df = output_df.round(args.decimal)
594
663
  output_df.to_csv(args.output, sep="\t", na_rep="NaN")
595
664
  logging.info("File \"%s\" contains %d missing values ..." %
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cpgtools
3
- Version: 2.0.2
3
+ Version: 2.0.3
4
4
  Summary: Tools to analyze and visualize DNA methylation data
5
5
  Author-email: Liguo Wang <wangliguo78@gmail.com>
6
6
  Maintainer-email: Liguo Wang <wangliguo78@gmail.com>
@@ -10,37 +10,56 @@ cpgmodule/methylClock.py,sha256=HyMk3vpVwR3yrkLCPv9fVJs6JCeSchbJEBe7i69jdSs,1202
10
10
  cpgmodule/padjust.py,sha256=mvbQ9_crn_S39yvWrv_JPoU5OxZGHaw7cV3deBxTPdk,2389
11
11
  cpgmodule/region2gene.py,sha256=iuSYa2-ki-qbL1TqqAlTXGT7g-j1uNIA1y7hzMwVM2U,5347
12
12
  cpgmodule/utils.py,sha256=NgKT3aJlRT5pQKKO3_e0WB2_u93pY13F-k-r0pvgKno,16095
13
+ cpgmodule/data/AltumAge_cpg.pkl,sha256=T0pfBWrzQO00-z85QNE2CP0ntU0mtF2WkZeEPiArqTw,264691
14
+ cpgmodule/data/AltumAge_multi_platform_cpgs.pkl,sha256=Bo7ZG6AuxXUmLH6dCFfrlYkiTkeLjyqRgcD7yw4mNZ0,264816
15
+ cpgmodule/data/AltumAge_scaler.pkl,sha256=aDMcC4l0wZJGDi6_MeSpdNoykndfgsUtVKEd6mKf9T0,325509
16
+ cpgmodule/data/GA_Bohlin.pkl,sha256=W72ra6APUZDNPmVJ-kRuaVzpiKGgI_K7nDqSFx3nf3w,2667
17
+ cpgmodule/data/GA_Haftorn.pkl,sha256=ZFCJ2h0DWEaa-pHKdsSuWbQGL1JMwDXBtDyhx9DlZLc,4622
18
+ cpgmodule/data/GA_Knight.pkl,sha256=dsKeIt7PxCcw_tcVObtVKB3KqtF7bPH-vQqbClWI5Tc,25322
19
+ cpgmodule/data/GA_Lee_CPC.pkl,sha256=WfeI1bGnIS5_AIX7qnspyGYUgzGOAJ9JJ9CGQjLgkFo,25373
20
+ cpgmodule/data/GA_Lee_RPC.pkl,sha256=rLhr-sboWqIZ6Y08zODh7bgwXBa4SNkE4ZZaIyQTaYc,25382
21
+ cpgmodule/data/GA_Lee_refined_RPC.pkl,sha256=bXiZIgFSoX7Sr0ai1zHzGXv-EMGBglvx8z5vr3IpccA,25348
22
+ cpgmodule/data/GA_Mayne.pkl,sha256=CUfwiVIywJZwTb0PhRuW-6TQ5H1kO7tR6KHepyzBULg,1899
23
+ cpgmodule/data/Hannum.pkl,sha256=gXJxy6S8lBCUdr_T7ZM3R6hqKI22yev5-2giCPCCdLc,2049
24
+ cpgmodule/data/Horvath_2013.pkl,sha256=qwP614VYmmaESKanQ680sYgaHSWJrP73im_f5duzKgU,8313
25
+ cpgmodule/data/Horvath_2018.pkl,sha256=IQcFNIg-z-Mrq8vEoHI509c64Dt1wtyHInU9YwsYP-w,9225
26
+ cpgmodule/data/Levine.pkl,sha256=OhasOuEAvbDblZfGGFmu82kDqdo5CQ2DjZ8hfCofhvI,11751
27
+ cpgmodule/data/Lu_DNAmTL.pkl,sha256=SdjT9x8A0GbnKoMlDhgslmmINJ8e5Asyk9KQlw_EtJo,3548
28
+ cpgmodule/data/Ped_McEwen.pkl,sha256=-hxOAqFUxORmPLV_FKYZxjOFtabxPzbbzfY6JTitq8k,2654
29
+ cpgmodule/data/Ped_Wu.pkl,sha256=gjXtpdpQ8QVJs1I6UM-VPAvQjNd-JDoy5DzDL-gP-Cg,2924
30
+ cpgmodule/data/Zhang_BLUP.pkl,sha256=dpH0L3qTxMAJ50XtTeNMmqx1khZkZyrH204E37fE37E,7038226
31
+ cpgmodule/data/Zhang_EN.pkl,sha256=RFEOmBQGXZvIwQ7tOYT8itOPU87V2Y02vJA2PfY6TnI,11844
13
32
  cpgmodule/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- cpgtools-2.0.2.data/scripts/CpG_aggregation.py,sha256=iSSUanCPlTkT3f6Df_f_b6zEP2Ixzse9zFwJLL5z2Qw,7278
15
- cpgtools-2.0.2.data/scripts/CpG_anno_position.py,sha256=j8l4v7SpGFaJ3pBVE5qK8coMkxe5h4p1bwmhcPnhxMk,4643
16
- cpgtools-2.0.2.data/scripts/CpG_anno_probe.py,sha256=bACac7x9zX2E1QJnUUmNKS6YNKg_f4K-jxwl_v93--4,3171
17
- cpgtools-2.0.2.data/scripts/CpG_density_gene_centered.py,sha256=JM2PrvHCnGAvymBbbIBr30AfXwu69WgJkJNy6PCmCrg,5266
18
- cpgtools-2.0.2.data/scripts/CpG_distrb_chrom.py,sha256=q3xZ1AH4JEif1DQRG6UXiiUb4jZyt2khzTZld1jaXnA,6207
19
- cpgtools-2.0.2.data/scripts/CpG_distrb_gene_centered.py,sha256=tLfalDRzklBcNfZxhABjhprcBlk4HgUyxXxznOGVTN8,7736
20
- cpgtools-2.0.2.data/scripts/CpG_distrb_region.py,sha256=nJGgZLhZe2kYCPnxuW--EemxOcoReu2hdkIAgl-7UAE,5391
21
- cpgtools-2.0.2.data/scripts/CpG_logo.py,sha256=U8RxYPmakKii1xmEgG22tKfdZBPpjhZbAfKxgF_Z-O8,4597
22
- cpgtools-2.0.2.data/scripts/CpG_to_gene.py,sha256=fciNtI5N6fO-jZX2eyKgiAhDIsus4SljYSCbEQMeTHI,6114
23
- cpgtools-2.0.2.data/scripts/beta_PCA.py,sha256=cRlqGC5CQlpsmzhAwy2AoI4_lLwjFh_rvn7ObNbHgYg,7155
24
- cpgtools-2.0.2.data/scripts/beta_UMAP.py,sha256=SJfxtCsM1NTK5rYnZjtXiDj_x1kJiYZvR1NGHKo0IRI,8079
25
- cpgtools-2.0.2.data/scripts/beta_imputation.py,sha256=o7wM_iHFwhxjRAMQ5_fCxgAfO9VYe48O9dY4BeZF2wA,28928
26
- cpgtools-2.0.2.data/scripts/beta_jitter_plot.py,sha256=Knja1n1rpD0qe3FHkNPax0p6BYsmAtEQBFB1wBdpHyY,4389
27
- cpgtools-2.0.2.data/scripts/beta_m_conversion.py,sha256=992tJI0IzkphUaEr_9_CveSRPjKdnyYkCTO10D_xkbg,2848
28
- cpgtools-2.0.2.data/scripts/beta_profile_gene_centered.py,sha256=Jb9mm8y-1cKH_EepRFhUH5mfBdoAt0zpN_VXETRnQek,7312
29
- cpgtools-2.0.2.data/scripts/beta_profile_region.py,sha256=GHDPyUFR9XJm2CK0_9wCKossMTWWvn4VwYCCSA8kn2E,5971
30
- cpgtools-2.0.2.data/scripts/beta_selectNBest.py,sha256=Uu_MvQUm6Zc8MGKuGUEooJ-IL7C1eg_LNRqQsNdLaWs,4638
31
- cpgtools-2.0.2.data/scripts/beta_stacked_barplot.py,sha256=2fcypD_BE4XqK-vl8hHsgyWqvp3I9oLvg8tF2dd5QZ0,3415
32
- cpgtools-2.0.2.data/scripts/beta_stats.py,sha256=WBh3Aquk_AHeUcteLPtt2Q2yKpqu5aBA38zaFz_7mVI,3021
33
- cpgtools-2.0.2.data/scripts/beta_tSNE.py,sha256=0UTxNxyaDb-iNqbeIfFKj9DRURTQcHeJdltr-iY72XM,8450
34
- cpgtools-2.0.2.data/scripts/beta_topN.py,sha256=gmbmJZoGJt3QljlRHUA8LaKSIurdCNgtn2J9LAIQNCo,3914
35
- cpgtools-2.0.2.data/scripts/beta_trichotmize.py,sha256=T594UfSCDJHRFyGvov7qetLQs10WU73PGVVCDNrwNPg,7143
36
- cpgtools-2.0.2.data/scripts/dmc_Bayes.py,sha256=imqKv5x8s_plXQPL3_d9OpqSI-imrSfUj-k39Zf1XqA,13735
37
- cpgtools-2.0.2.data/scripts/dmc_bb.py,sha256=pOEEHT-hT--r6IswZwQFyGri77OJqbW1y9VVG8pzzpg,8407
38
- cpgtools-2.0.2.data/scripts/dmc_fisher.py,sha256=t18smNJAtipmDm3xgUg62ccKsFQWcdKBo4Tm2cxx43s,5163
39
- cpgtools-2.0.2.data/scripts/dmc_glm.py,sha256=_vJiZhbGA-Rv5v5oBU9N9lGND8MTEVAFHHFqW5hgfSw,6516
40
- cpgtools-2.0.2.data/scripts/dmc_logit.py,sha256=V35wyBgcdiUbsR_NDnEDfs6F2yPyz11ryZvYkkV6Ess,8488
41
- cpgtools-2.0.2.data/scripts/dmc_nonparametric.py,sha256=eFyUy7jXl8bPyySKZLEi3LVxYkDOKp9S4XBXevYvbVw,4945
42
- cpgtools-2.0.2.data/scripts/dmc_ttest.py,sha256=jOW0el_NeJlBuF91wKN2UHnu7phoP0OPrKCNCaOAQBs,6704
43
- cpgtools-2.0.2.data/scripts/predict_sex.py,sha256=ojoMJ6XwBsE1kGpxzKGZ4TZPs5JcUwI_C_9ieEQbB9c,4755
33
+ cpgtools-2.0.3.data/scripts/CpG_aggregation.py,sha256=iSSUanCPlTkT3f6Df_f_b6zEP2Ixzse9zFwJLL5z2Qw,7278
34
+ cpgtools-2.0.3.data/scripts/CpG_anno_position.py,sha256=j8l4v7SpGFaJ3pBVE5qK8coMkxe5h4p1bwmhcPnhxMk,4643
35
+ cpgtools-2.0.3.data/scripts/CpG_anno_probe.py,sha256=bACac7x9zX2E1QJnUUmNKS6YNKg_f4K-jxwl_v93--4,3171
36
+ cpgtools-2.0.3.data/scripts/CpG_density_gene_centered.py,sha256=JM2PrvHCnGAvymBbbIBr30AfXwu69WgJkJNy6PCmCrg,5266
37
+ cpgtools-2.0.3.data/scripts/CpG_distrb_chrom.py,sha256=q3xZ1AH4JEif1DQRG6UXiiUb4jZyt2khzTZld1jaXnA,6207
38
+ cpgtools-2.0.3.data/scripts/CpG_distrb_gene_centered.py,sha256=tLfalDRzklBcNfZxhABjhprcBlk4HgUyxXxznOGVTN8,7736
39
+ cpgtools-2.0.3.data/scripts/CpG_distrb_region.py,sha256=nJGgZLhZe2kYCPnxuW--EemxOcoReu2hdkIAgl-7UAE,5391
40
+ cpgtools-2.0.3.data/scripts/CpG_logo.py,sha256=U8RxYPmakKii1xmEgG22tKfdZBPpjhZbAfKxgF_Z-O8,4597
41
+ cpgtools-2.0.3.data/scripts/CpG_to_gene.py,sha256=fciNtI5N6fO-jZX2eyKgiAhDIsus4SljYSCbEQMeTHI,6114
42
+ cpgtools-2.0.3.data/scripts/beta_PCA.py,sha256=cRlqGC5CQlpsmzhAwy2AoI4_lLwjFh_rvn7ObNbHgYg,7155
43
+ cpgtools-2.0.3.data/scripts/beta_UMAP.py,sha256=SJfxtCsM1NTK5rYnZjtXiDj_x1kJiYZvR1NGHKo0IRI,8079
44
+ cpgtools-2.0.3.data/scripts/beta_jitter_plot.py,sha256=Knja1n1rpD0qe3FHkNPax0p6BYsmAtEQBFB1wBdpHyY,4389
45
+ cpgtools-2.0.3.data/scripts/beta_m_conversion.py,sha256=992tJI0IzkphUaEr_9_CveSRPjKdnyYkCTO10D_xkbg,2848
46
+ cpgtools-2.0.3.data/scripts/beta_profile_gene_centered.py,sha256=Jb9mm8y-1cKH_EepRFhUH5mfBdoAt0zpN_VXETRnQek,7312
47
+ cpgtools-2.0.3.data/scripts/beta_profile_region.py,sha256=GHDPyUFR9XJm2CK0_9wCKossMTWWvn4VwYCCSA8kn2E,5971
48
+ cpgtools-2.0.3.data/scripts/beta_selectNBest.py,sha256=Uu_MvQUm6Zc8MGKuGUEooJ-IL7C1eg_LNRqQsNdLaWs,4638
49
+ cpgtools-2.0.3.data/scripts/beta_stacked_barplot.py,sha256=2fcypD_BE4XqK-vl8hHsgyWqvp3I9oLvg8tF2dd5QZ0,3415
50
+ cpgtools-2.0.3.data/scripts/beta_stats.py,sha256=WBh3Aquk_AHeUcteLPtt2Q2yKpqu5aBA38zaFz_7mVI,3021
51
+ cpgtools-2.0.3.data/scripts/beta_tSNE.py,sha256=0UTxNxyaDb-iNqbeIfFKj9DRURTQcHeJdltr-iY72XM,8450
52
+ cpgtools-2.0.3.data/scripts/beta_topN.py,sha256=gmbmJZoGJt3QljlRHUA8LaKSIurdCNgtn2J9LAIQNCo,3914
53
+ cpgtools-2.0.3.data/scripts/beta_trichotmize.py,sha256=T594UfSCDJHRFyGvov7qetLQs10WU73PGVVCDNrwNPg,7143
54
+ cpgtools-2.0.3.data/scripts/dmc_Bayes.py,sha256=imqKv5x8s_plXQPL3_d9OpqSI-imrSfUj-k39Zf1XqA,13735
55
+ cpgtools-2.0.3.data/scripts/dmc_bb.py,sha256=pOEEHT-hT--r6IswZwQFyGri77OJqbW1y9VVG8pzzpg,8407
56
+ cpgtools-2.0.3.data/scripts/dmc_fisher.py,sha256=t18smNJAtipmDm3xgUg62ccKsFQWcdKBo4Tm2cxx43s,5163
57
+ cpgtools-2.0.3.data/scripts/dmc_glm.py,sha256=_vJiZhbGA-Rv5v5oBU9N9lGND8MTEVAFHHFqW5hgfSw,6516
58
+ cpgtools-2.0.3.data/scripts/dmc_logit.py,sha256=V35wyBgcdiUbsR_NDnEDfs6F2yPyz11ryZvYkkV6Ess,8488
59
+ cpgtools-2.0.3.data/scripts/dmc_nonparametric.py,sha256=eFyUy7jXl8bPyySKZLEi3LVxYkDOKp9S4XBXevYvbVw,4945
60
+ cpgtools-2.0.3.data/scripts/dmc_ttest.py,sha256=DDmWi1udo1gqOAQvFTeRxhIJuAgEmakSNU7P4LKTT7U,6786
61
+ cpgtools-2.0.3.data/scripts/predict_missing.py,sha256=pNSfOD9i7LNLG9BGa80W2-bsCi6qcUlIwoiLnBLbPlo,32465
62
+ cpgtools-2.0.3.data/scripts/predict_sex.py,sha256=ojoMJ6XwBsE1kGpxzKGZ4TZPs5JcUwI_C_9ieEQbB9c,4755
44
63
  impyute/__init__.py,sha256=Q07nw2fDjLm_c3EQWMpSo8nZCfC-fvs2lpRG5uha-gg,87
45
64
  impyute/contrib/__init__.py,sha256=DgGgN0iBoMfHjzr9edz-ZgtJ2KeyIDlyV1t8hxhcM2c,209
46
65
  impyute/contrib/compare.py,sha256=psWygE7PD4Cky4MoTJLAgjaEkTXwqemv3NTKkKVaapo,2852
@@ -62,7 +81,7 @@ impyute/ops/error.py,sha256=lyXJcIDyfdrj_JJbsEI18z-t4UKi1mpLTyt3iS2YvTs,226
62
81
  impyute/ops/inverse_distance_weighting.py,sha256=88hmyc-dWltP66MbjrW-VHBcVgPOI0V0BkrV30B-Lyc,1017
63
82
  impyute/ops/matrix.py,sha256=-eniwqvbvwXNrYw8PmIMD95uMcPMq6HE6OBOyjzR-Vk,837
64
83
  impyute/ops/testing.py,sha256=3ZRVL1sc5IIQLPW2GrD6-lF4_nVnSLo7SBW4rfaOMLA,415
65
- impyute/ops/util.py,sha256=Tcm8reCrWQf8btKRP5StawBL1UV_GOBWmzy1NYDcPbY,2204
84
+ impyute/ops/util.py,sha256=167Xpwib86kT09-pZnJg-VhyVD9r-z7Gkd_L42A2-EY,2838
66
85
  impyute/ops/wrapper.py,sha256=bQj3r9WsZxqNw51y3OobyyXPPxCeISUv2jb-Ga2C_-M,6022
67
86
  impyute/ts/__init__.py,sha256=veePCYSBfo1t5Ahh3wrVCNveizcniXZYhjyS7ahkIhI,145
68
87
  impyute/ts/locf.py,sha256=qusLWfBrepKzFIkeGxIS0S5KnqRgZde2CV6mcF5nf4c,1876
@@ -75,8 +94,8 @@ missingpy/utils.py,sha256=sMU4OGHPvRgReT8e_HqGCYvjAJZkE_qlAPDzLo-_M6U,4360
75
94
  missingpy/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
95
  missingpy/tests/test_knnimpute.py,sha256=rR1OPZ4-IzcIKgWmSEN7UEPvVSRwJe0KU_wTxPr46k0,17527
77
96
  missingpy/tests/test_missforest.py,sha256=5YfKZf_xdy9RcXmnlFz7cJDqdnspJDzzrxLnVVWJi3A,13725
78
- cpgtools-2.0.2.dist-info/LICENSE,sha256=NAIE1kmjlmRNJ1BwR9m9i0jXHmQqqujTnEyIBuIxvwM,1074
79
- cpgtools-2.0.2.dist-info/METADATA,sha256=EfdjaKSMi75Wo041GCIx9gd9JjFf7rULeCkAhw2uuqY,2943
80
- cpgtools-2.0.2.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
81
- cpgtools-2.0.2.dist-info/top_level.txt,sha256=L6IX1ORvZ1JssvdU8qOtU99-vbMK9ZSIgYg4zH-rL5Y,28
82
- cpgtools-2.0.2.dist-info/RECORD,,
97
+ cpgtools-2.0.3.dist-info/LICENSE,sha256=NAIE1kmjlmRNJ1BwR9m9i0jXHmQqqujTnEyIBuIxvwM,1074
98
+ cpgtools-2.0.3.dist-info/METADATA,sha256=N1FqwvSLFmK2DvVyYY4oHcqbo6Vj1B4ii48vESlNPkU,2943
99
+ cpgtools-2.0.3.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
100
+ cpgtools-2.0.3.dist-info/top_level.txt,sha256=L6IX1ORvZ1JssvdU8qOtU99-vbMK9ZSIgYg4zH-rL5Y,28
101
+ cpgtools-2.0.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.5.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
impyute/ops/util.py CHANGED
@@ -41,20 +41,40 @@ def execute_fn_with_args_and_or_kwargs(fn, args, kwargs):
41
41
  except TypeError:
42
42
  return fn(*args)
43
43
 
44
- def toy_df(nrow, ncol, n_miss, sample_prefix, seed):
45
- """
46
- Make a dataFrame (nrow x ncol) with random values between 0 and 1, add
47
- some missing values (n_miss). Generate a toy dataframe for testing purposes.
48
- """
49
- np.random.seed(seed)
50
- data = np.random.rand(nrow*ncol).reshape((nrow, ncol)).astype(float)
51
- x_ind = np.random.choice(nrow, n_miss)
52
- y_ind = np.random.choice(ncol, n_miss)
53
- for x,y in zip(x_ind, y_ind):
54
- data[x][y] = np.nan
55
- colNames = [sample_prefix + '_' + str(i) for i in range(0,ncol)]
56
- df = pd.DataFrame(data, columns=colNames)
57
- return df
44
+ def toy_df(n_rows=20, n_cols=5, missingness=0.2, min_val=0, max_val=1,
45
+ missing_value=np.nan, rand_seed=1234, sample_prefix=None):
46
+ """Generate an array or DataFrame with NaNs"""
47
+ np.random.seed(rand_seed)
48
+ X = np.random.uniform(
49
+ low = min_val, high = max_val, size = n_rows * n_cols).reshape(n_rows, n_cols).astype(
50
+ float)
51
+ # check missingness
52
+ if missingness > 0:
53
+ # If missingness >= 1 then use it as approximate (see below) count
54
+ if missingness >= 1:
55
+ n_missing = int(missingness)
56
+ else:
57
+ n_missing = int(missingness * n_rows * n_cols)
58
+ print(n_missing)
59
+
60
+ # Introduce NaNs until n_miss "NAs" are inserted.
61
+ missing_count = 0
62
+ for i,j in zip(np.random.choice(n_rows, n_missing), np.random.choice(n_cols, n_missing)):
63
+ if np.isnan(X[i][j]):
64
+ continue
65
+ else:
66
+ X[i][j] = missing_value
67
+ missing_count += 1
68
+ if missing_count >= n_missing:
69
+ break
70
+
71
+ # check sample_prefix
72
+ if sample_prefix is None:
73
+ return X
74
+ else:
75
+ colNames = [sample_prefix + '_' + str(i) for i in range(0, n_cols)]
76
+ return pd.DataFrame(X, columns=colNames)
77
+
58
78
 
59
79
  def insert_na(df, n_miss, seed):
60
80
  np.random.seed(seed)