cpgtools 2.0.2__tar.gz → 2.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cpgtools might be problematic. Click here for more details.
- {cpgtools-2.0.2 → cpgtools-2.0.3}/MANIFEST.in +4 -4
- {cpgtools-2.0.2/src/cpgtools.egg-info → cpgtools-2.0.3}/PKG-INFO +1 -1
- {cpgtools-2.0.2 → cpgtools-2.0.3}/pyproject.toml +1 -1
- cpgtools-2.0.3/scripts/.DS_Store +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/dmc_ttest.py +5 -1
- cpgtools-2.0.2/scripts/beta_imputation.py → cpgtools-2.0.3/scripts/predict_missing.py +88 -19
- {cpgtools-2.0.2 → cpgtools-2.0.3}/setup.py +1 -1
- cpgtools-2.0.3/src/cpgmodule/data/AltumAge_cpg.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/AltumAge_multi_platform_cpgs.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/AltumAge_scaler.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/GA_Bohlin.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/GA_Haftorn.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/GA_Knight.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/GA_Lee_CPC.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/GA_Lee_RPC.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/GA_Lee_refined_RPC.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/GA_Mayne.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/Hannum.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/Horvath_2013.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/Horvath_2018.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/Levine.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/Lu_DNAmTL.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/Ped_McEwen.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/Ped_Wu.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/Zhang_BLUP.pkl +0 -0
- cpgtools-2.0.3/src/cpgmodule/data/Zhang_EN.pkl +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3/src/cpgtools.egg-info}/PKG-INFO +1 -1
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgtools.egg-info/SOURCES.txt +21 -1
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ops/util.py +34 -14
- {cpgtools-2.0.2 → cpgtools-2.0.3}/LICENSE +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/README.md +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/distribute_setup.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_aggregation.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_anno_position.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_anno_probe.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_density_gene_centered.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_distrb_chrom.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_distrb_gene_centered.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_distrb_region.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_logo.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/CpG_to_gene.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_PCA.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_UMAP.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_jitter_plot.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_m_conversion.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_profile_gene_centered.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_profile_region.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_selectNBest.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_stacked_barplot.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_stats.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_tSNE.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_topN.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/beta_trichotmize.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/dmc_Bayes.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/dmc_bb.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/dmc_fisher.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/dmc_glm.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/dmc_logit.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/dmc_nonparametric.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/scripts/predict_sex.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/setup.cfg +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/BED.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/MI.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/_version.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/cgID.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/data/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/extend_bed.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/imotif.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/ireader.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/methylClock.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/padjust.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/region2gene.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgmodule/utils.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgtools.egg-info/dependency_links.txt +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgtools.egg-info/requires.txt +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/cpgtools.egg-info/top_level.txt +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/contrib/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/contrib/compare.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/contrib/count_missing.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/contrib/describe.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/cs/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/cs/buck_iterative.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/cs/central_tendency.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/cs/em.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/cs/fast_knn.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/cs/random.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/dataset/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/dataset/base.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/dataset/corrupt.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/deletion/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/deletion/complete_case.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ops/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ops/error.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ops/inverse_distance_weighting.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ops/matrix.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ops/testing.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ops/wrapper.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ts/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ts/locf.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/impyute/ts/moving_window.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/knnimpute.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/missforest.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/pairwise_external.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/tests/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/tests/test_knnimpute.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/tests/test_missforest.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.3}/src/missingpy/utils.py +0 -0
|
@@ -4,8 +4,8 @@ include PKG-INFO
|
|
|
4
4
|
include LICENSE
|
|
5
5
|
|
|
6
6
|
include distribute_setup.py
|
|
7
|
-
recursive-include
|
|
8
|
-
recursive-include
|
|
9
|
-
recursive-include
|
|
10
|
-
recursive-include
|
|
7
|
+
recursive-include src *.pyx
|
|
8
|
+
recursive-include src *.py
|
|
9
|
+
recursive-include src *.pkl
|
|
10
|
+
recursive-include scripts *
|
|
11
11
|
recursive-include doc *
|
|
Binary file
|
|
@@ -189,7 +189,7 @@ def main():
|
|
|
189
189
|
else:
|
|
190
190
|
continue
|
|
191
191
|
line_num += 1
|
|
192
|
-
|
|
192
|
+
|
|
193
193
|
printlog("Perfrom Benjamini-Hochberg (aka FDR) correction ...")
|
|
194
194
|
adjusted_p = {}
|
|
195
195
|
q_list = padjust.multiple_testing_correction(p_list)
|
|
@@ -204,6 +204,10 @@ def main():
|
|
|
204
204
|
else:
|
|
205
205
|
f = l.split()
|
|
206
206
|
probe_ID = f[0]
|
|
207
|
+
if probe_ID in delta_beta:
|
|
208
|
+
pass
|
|
209
|
+
else:
|
|
210
|
+
delta_beta[probe_ID] = 'n/a'
|
|
207
211
|
try:
|
|
208
212
|
print (l + '\t' + str(delta_beta[probe_ID]) + '\t' + adjusted_p[probe_ID], file=FOUT)
|
|
209
213
|
except:
|
|
@@ -20,6 +20,7 @@ from impyute.cs.em import em
|
|
|
20
20
|
from impyute.ops.util import toy_df,insert_na
|
|
21
21
|
from impyute.cs.random import random_impute
|
|
22
22
|
from impyute.cs.buck_iterative import buck_iterative
|
|
23
|
+
from impyute.ts.moving_window import moving_window
|
|
23
24
|
from missingpy import MissForest
|
|
24
25
|
|
|
25
26
|
#use pip to install fancyimpute
|
|
@@ -64,6 +65,7 @@ def nafiller():
|
|
|
64
65
|
the same row or column.",
|
|
65
66
|
'FillRef': "Impute missing values using values from an external \
|
|
66
67
|
reference dataset.",
|
|
68
|
+
'MW': "Interpolate the missing values with moving window.",
|
|
67
69
|
'KNN': "Impute missing values using scikit-learn's KNNImputer function. \
|
|
68
70
|
Note: slow for large datasets.",
|
|
69
71
|
'KNN2': "Impute missing values using KNN2",
|
|
@@ -98,6 +100,7 @@ def nafiller():
|
|
|
98
100
|
FillMax_parser = sub_parsers.add_parser('FillMax', help=commands['FillMax'])
|
|
99
101
|
FillRand_parser = sub_parsers.add_parser('FillRand', help=commands['FillRand'])
|
|
100
102
|
FillRef_parser = sub_parsers.add_parser('FillRef', help=commands['FillRef'])
|
|
103
|
+
MW_parser = sub_parsers.add_parser('MW', help=commands['MW'])
|
|
101
104
|
KNN_parser = sub_parsers.add_parser('KNN', help=commands['KNN'])
|
|
102
105
|
fKNN_parser = sub_parsers.add_parser('fKNN', help=commands['fKNN'])
|
|
103
106
|
EM_parser = sub_parsers.add_parser('EM', help=commands['EM'])
|
|
@@ -116,7 +119,7 @@ def nafiller():
|
|
|
116
119
|
'output', type=str, metavar='out_df',
|
|
117
120
|
help="Output data frame.")
|
|
118
121
|
DropNA_parser.add_argument(
|
|
119
|
-
'
|
|
122
|
+
'--axis', type=int, choices=range(2), default=0,
|
|
120
123
|
help="0 : drop rows with any missing values, 1 : drop columns with \
|
|
121
124
|
missing values. Default: 0")
|
|
122
125
|
DropNA_parser.add_argument(
|
|
@@ -130,7 +133,7 @@ def nafiller():
|
|
|
130
133
|
'output', type=str, metavar='out_df',
|
|
131
134
|
help="Output data frame.")
|
|
132
135
|
FillValue_parser.add_argument(
|
|
133
|
-
'
|
|
136
|
+
'--score', type=float, default=0.0,
|
|
134
137
|
help="The value uesd to fill all NAs.")
|
|
135
138
|
FillValue_parser.add_argument(
|
|
136
139
|
'--decimal', type=int, default=5,
|
|
@@ -143,7 +146,7 @@ def nafiller():
|
|
|
143
146
|
'output', type=str, metavar='out_df',
|
|
144
147
|
help="Output data frame.")
|
|
145
148
|
FillMean_parser.add_argument(
|
|
146
|
-
'
|
|
149
|
+
'--axis', type=int, choices=range(2), default=1,
|
|
147
150
|
help="0 means column, 1 means row. Default: fill NAs with row means")
|
|
148
151
|
FillMean_parser.add_argument(
|
|
149
152
|
'--decimal', type=int, default=5,
|
|
@@ -156,7 +159,7 @@ def nafiller():
|
|
|
156
159
|
'output', type=str, metavar='out_df',
|
|
157
160
|
help="Output data frame.")
|
|
158
161
|
FillMedian_parser.add_argument(
|
|
159
|
-
'
|
|
162
|
+
'--axis', type=int, choices=range(2), default=1,
|
|
160
163
|
help="0 means column, 1 means row. Default: fill NAs with row medians")
|
|
161
164
|
FillMedian_parser.add_argument(
|
|
162
165
|
'--decimal', type=int, default=5,
|
|
@@ -169,7 +172,7 @@ def nafiller():
|
|
|
169
172
|
'output', type=str, metavar='out_df',
|
|
170
173
|
help="Output data frame.")
|
|
171
174
|
FillMin_parser.add_argument(
|
|
172
|
-
'
|
|
175
|
+
'--axis', type=int, choices=range(2), default=1,
|
|
173
176
|
help="0 means column, 1 means row. Default: fill NAs with the minimum value of the rows.")
|
|
174
177
|
FillMin_parser.add_argument(
|
|
175
178
|
'--decimal', type=int, default=5,
|
|
@@ -182,7 +185,7 @@ def nafiller():
|
|
|
182
185
|
'output', type=str, metavar='out_df',
|
|
183
186
|
help="Output data frame.")
|
|
184
187
|
FillMax_parser.add_argument(
|
|
185
|
-
'
|
|
188
|
+
'--axis', type=int, choices=range(2), default=1,
|
|
186
189
|
help="0 means column, 1 means row. Default: fill NAs with the maximum value of the rows.")
|
|
187
190
|
FillMax_parser.add_argument(
|
|
188
191
|
'--decimal', type=int, default=5,
|
|
@@ -195,7 +198,7 @@ def nafiller():
|
|
|
195
198
|
'output', type=str, metavar='out_df',
|
|
196
199
|
help="Output data frame.")
|
|
197
200
|
FillRand_parser.add_argument(
|
|
198
|
-
'
|
|
201
|
+
'--axis', type=int, choices=range(2), default=1,
|
|
199
202
|
help="0 means column, 1 means row. Default: fill NAs with values randomly selected from rows.")
|
|
200
203
|
FillRand_parser.add_argument(
|
|
201
204
|
'--decimal', type=int, default=5,
|
|
@@ -208,12 +211,50 @@ def nafiller():
|
|
|
208
211
|
'output', type=str, metavar='out_df',
|
|
209
212
|
help="Output data frame.")
|
|
210
213
|
FillRef_parser.add_argument(
|
|
211
|
-
'
|
|
214
|
+
'--ref', type=str,
|
|
212
215
|
help="File name of the external reference.")
|
|
213
216
|
FillRef_parser.add_argument(
|
|
214
217
|
'--decimal', type=int, default=5,
|
|
215
218
|
help="Number of decimal places to round each column to. default: %(default)s")
|
|
216
219
|
|
|
220
|
+
MW_parser.add_argument(
|
|
221
|
+
'input', type=str, metavar='input_df',
|
|
222
|
+
help="Input data frame.")
|
|
223
|
+
MW_parser.add_argument(
|
|
224
|
+
'output', type=str, metavar='out_df',
|
|
225
|
+
help="Output data frame.")
|
|
226
|
+
MW_parser.add_argument(
|
|
227
|
+
'--nindex', type=int, choices=[0, -1, None],
|
|
228
|
+
default=None,
|
|
229
|
+
help="Null index. Index of the null value inside the moving average window. \
|
|
230
|
+
See impyute documentation for details. default: %(default)s")
|
|
231
|
+
MW_parser.add_argument(
|
|
232
|
+
'--wsize', type=int, default=5,
|
|
233
|
+
help="Size of the moving average window/area of values being used \
|
|
234
|
+
for each local imputation. This number includes the missing value. \
|
|
235
|
+
default: %(default)s")
|
|
236
|
+
MW_parser.add_argument(
|
|
237
|
+
'--errors', type=str, choices=["raise", "coerce", "ignore"],
|
|
238
|
+
default='coerce',
|
|
239
|
+
help="Errors will occur with the indexing of the windows - for \
|
|
240
|
+
example if there is a nan at data[x][0] and `nindex` is set to \
|
|
241
|
+
-1 or there is a nan at data[x][-1] and `nindex` is set to 0. `\
|
|
242
|
+
'raise' will raise an error, `coerce` will try again using an \
|
|
243
|
+
nindex set to the middle and `ignore` will just leave it as a \
|
|
244
|
+
nan default: %(default)s")
|
|
245
|
+
MW_parser.add_argument(
|
|
246
|
+
'--func', type=str, choices=["mean", "median"],
|
|
247
|
+
default='mean',
|
|
248
|
+
help="Function to summerzie values within the moving window. \
|
|
249
|
+
default: %(default)s")
|
|
250
|
+
MW_parser.add_argument(
|
|
251
|
+
'--axis', type=int, choices=range(2), default=1,
|
|
252
|
+
help="0 means column, 1 means row. Default: fill missing value \
|
|
253
|
+
with windows moving on rows.")
|
|
254
|
+
MW_parser.add_argument(
|
|
255
|
+
'--decimal', type=int, default=5,
|
|
256
|
+
help="Number of decimal places to round each column to. default: %(default)s")
|
|
257
|
+
|
|
217
258
|
KNN_parser.add_argument(
|
|
218
259
|
'input', type=str, metavar='input_df',
|
|
219
260
|
help="Input data frame.")
|
|
@@ -306,20 +347,28 @@ def nafiller():
|
|
|
306
347
|
'output', type=str, metavar='out_df',
|
|
307
348
|
help="Name of the output data frame.")
|
|
308
349
|
ToyDf_parser.add_argument(
|
|
309
|
-
'
|
|
350
|
+
'--nrow', type=int, default=10,
|
|
310
351
|
help="Number of rows. default: %(default)s")
|
|
311
352
|
ToyDf_parser.add_argument(
|
|
312
|
-
'
|
|
353
|
+
'--ncol', type=int, default=10,
|
|
313
354
|
help="Number of columns. default: %(default)s")
|
|
314
355
|
ToyDf_parser.add_argument(
|
|
315
|
-
'--
|
|
356
|
+
'--nmiss', type=float, default=5,
|
|
316
357
|
help="Number of missing values ingested into the dataframe. default: %(default)s")
|
|
317
358
|
ToyDf_parser.add_argument(
|
|
318
|
-
'
|
|
359
|
+
'--seed', type=int, default=123,
|
|
319
360
|
help="Seed used to initialize a pseudorandom number generator. default: %(default)s")
|
|
320
361
|
ToyDf_parser.add_argument(
|
|
321
362
|
'--prefix', type=str, default='s',
|
|
322
|
-
help="Prefix of the column names, a series numbers will be appended to
|
|
363
|
+
help="Prefix of the column names, a series numbers will be appended to \
|
|
364
|
+
the prefix. If this is set to None, as np.ndarray rather than pd.dataframe \
|
|
365
|
+
will be returned. default: %(default)s")
|
|
366
|
+
ToyDf_parser.add_argument(
|
|
367
|
+
'--min', type=float, default=0.0,
|
|
368
|
+
help="The minimum value. default: %(default)s")
|
|
369
|
+
ToyDf_parser.add_argument(
|
|
370
|
+
'--max', type=float, default=1.0,
|
|
371
|
+
help="The maximum value. default: %(default)s")
|
|
323
372
|
ToyDf_parser.add_argument(
|
|
324
373
|
'--decimal', type=int, default=5,
|
|
325
374
|
help="Number of decimal places to round each column to. default: %(default)s")
|
|
@@ -331,10 +380,10 @@ def nafiller():
|
|
|
331
380
|
'output', type=str, metavar='out_df',
|
|
332
381
|
help="Output data frame.")
|
|
333
382
|
InsertNA_parser.add_argument(
|
|
334
|
-
'--
|
|
383
|
+
'--nmiss', type=int,
|
|
335
384
|
help="Number of missing values ingested into the dataframe.")
|
|
336
385
|
InsertNA_parser.add_argument(
|
|
337
|
-
'
|
|
386
|
+
'--seed', type=int, default=123,
|
|
338
387
|
help="Seed used to initialize a pseudorandom number generator. default: %(default)s")
|
|
339
388
|
InsertNA_parser.add_argument(
|
|
340
389
|
'--decimal', type=int, default=5,
|
|
@@ -483,6 +532,25 @@ def nafiller():
|
|
|
483
532
|
continue
|
|
484
533
|
output_df = input_df
|
|
485
534
|
output_df = output_df.round(args.decimal)
|
|
535
|
+
output_df.to_csv(args.output, sep="\t", na_rep="NaN")
|
|
536
|
+
logging.info("File \"%s\" contains %d missing values ..." %
|
|
537
|
+
(args.output, output_df.isna().sum().sum()))
|
|
538
|
+
elif command.lower() == 'mw':
|
|
539
|
+
input_df = read_df(args.input)
|
|
540
|
+
logging.info("File \"%s\" contains %d missing values ..." %
|
|
541
|
+
(args.input, input_df.isna().sum().sum()))
|
|
542
|
+
logging.info("Replace missing values using moving window on %s ..." % axis_name[args.axis])
|
|
543
|
+
if args.axis == 1:
|
|
544
|
+
output_df = moving_window(
|
|
545
|
+
input_df, nindex = args.nindex, wsize=args.wsize,
|
|
546
|
+
errors=args.errors, func=getattr(np, args.func))
|
|
547
|
+
output_df = output_df.round(args.decimal)
|
|
548
|
+
elif args.axis == 0:
|
|
549
|
+
output_df = moving_window(
|
|
550
|
+
input_df.T, nindex = args.nindex, wsize=args.wsize,
|
|
551
|
+
errors=args.errors, func=getattr(np, args.func))
|
|
552
|
+
output_df = output_df.round(args.decimal).T
|
|
553
|
+
|
|
486
554
|
output_df.to_csv(args.output, sep="\t", na_rep="NaN")
|
|
487
555
|
logging.info("File \"%s\" contains %d missing values ..." %
|
|
488
556
|
(args.output, output_df.isna().sum().sum()))
|
|
@@ -576,9 +644,10 @@ def nafiller():
|
|
|
576
644
|
|
|
577
645
|
elif command.lower() == 'toydf':
|
|
578
646
|
logging.info("Generate toy dataframe ...")
|
|
579
|
-
output_df = toy_df(
|
|
580
|
-
|
|
581
|
-
|
|
647
|
+
output_df = toy_df(n_rows = args.nrow, n_cols = args.ncol,
|
|
648
|
+
missingness = args.nmiss, sample_prefix=args.prefix,
|
|
649
|
+
min_val = args.min, max_val = args.max,
|
|
650
|
+
rand_seed=args.seed)
|
|
582
651
|
#print(output_df)
|
|
583
652
|
output_df = output_df.round(args.decimal)
|
|
584
653
|
output_df.to_csv(args.output, sep="\t", na_rep="NaN")
|
|
@@ -589,7 +658,7 @@ def nafiller():
|
|
|
589
658
|
logging.info("File \"%s\" contains %d missing values ..." %
|
|
590
659
|
(args.input, input_df.isna().sum().sum()))
|
|
591
660
|
logging.info("Insert %d NAs into dataframe ..." % args.na)
|
|
592
|
-
output_df = insert_na(df=input_df, n_miss=args.
|
|
661
|
+
output_df = insert_na(df=input_df, n_miss=args.nmiss, seed=args.seed)
|
|
593
662
|
output_df = output_df.round(args.decimal)
|
|
594
663
|
output_df.to_csv(args.output, sep="\t", na_rep="NaN")
|
|
595
664
|
logging.info("File \"%s\" contains %d missing values ..." %
|
|
@@ -11,7 +11,6 @@ script_files = [
|
|
|
11
11
|
"scripts/CpG_to_gene.py",
|
|
12
12
|
"scripts/beta_PCA.py",
|
|
13
13
|
"scripts/beta_UMAP.py",
|
|
14
|
-
"scripts/beta_imputation.py",
|
|
15
14
|
"scripts/beta_jitter_plot.py",
|
|
16
15
|
"scripts/beta_m_conversion.py",
|
|
17
16
|
"scripts/beta_profile_gene_centered.py",
|
|
@@ -30,6 +29,7 @@ script_files = [
|
|
|
30
29
|
"scripts/dmc_nonparametric.py",
|
|
31
30
|
"scripts/dmc_ttest.py",
|
|
32
31
|
"scripts/predict_sex.py",
|
|
32
|
+
"scripts/predict_missing.py",
|
|
33
33
|
]
|
|
34
34
|
|
|
35
35
|
if __name__ == "__main__":
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -4,6 +4,7 @@ README.md
|
|
|
4
4
|
distribute_setup.py
|
|
5
5
|
pyproject.toml
|
|
6
6
|
setup.py
|
|
7
|
+
scripts/.DS_Store
|
|
7
8
|
scripts/CpG_aggregation.py
|
|
8
9
|
scripts/CpG_anno_position.py
|
|
9
10
|
scripts/CpG_anno_probe.py
|
|
@@ -15,7 +16,6 @@ scripts/CpG_logo.py
|
|
|
15
16
|
scripts/CpG_to_gene.py
|
|
16
17
|
scripts/beta_PCA.py
|
|
17
18
|
scripts/beta_UMAP.py
|
|
18
|
-
scripts/beta_imputation.py
|
|
19
19
|
scripts/beta_jitter_plot.py
|
|
20
20
|
scripts/beta_m_conversion.py
|
|
21
21
|
scripts/beta_profile_gene_centered.py
|
|
@@ -33,6 +33,7 @@ scripts/dmc_glm.py
|
|
|
33
33
|
scripts/dmc_logit.py
|
|
34
34
|
scripts/dmc_nonparametric.py
|
|
35
35
|
scripts/dmc_ttest.py
|
|
36
|
+
scripts/predict_missing.py
|
|
36
37
|
scripts/predict_sex.py
|
|
37
38
|
src/cpgmodule/BED.py
|
|
38
39
|
src/cpgmodule/MI.py
|
|
@@ -46,6 +47,25 @@ src/cpgmodule/methylClock.py
|
|
|
46
47
|
src/cpgmodule/padjust.py
|
|
47
48
|
src/cpgmodule/region2gene.py
|
|
48
49
|
src/cpgmodule/utils.py
|
|
50
|
+
src/cpgmodule/data/AltumAge_cpg.pkl
|
|
51
|
+
src/cpgmodule/data/AltumAge_multi_platform_cpgs.pkl
|
|
52
|
+
src/cpgmodule/data/AltumAge_scaler.pkl
|
|
53
|
+
src/cpgmodule/data/GA_Bohlin.pkl
|
|
54
|
+
src/cpgmodule/data/GA_Haftorn.pkl
|
|
55
|
+
src/cpgmodule/data/GA_Knight.pkl
|
|
56
|
+
src/cpgmodule/data/GA_Lee_CPC.pkl
|
|
57
|
+
src/cpgmodule/data/GA_Lee_RPC.pkl
|
|
58
|
+
src/cpgmodule/data/GA_Lee_refined_RPC.pkl
|
|
59
|
+
src/cpgmodule/data/GA_Mayne.pkl
|
|
60
|
+
src/cpgmodule/data/Hannum.pkl
|
|
61
|
+
src/cpgmodule/data/Horvath_2013.pkl
|
|
62
|
+
src/cpgmodule/data/Horvath_2018.pkl
|
|
63
|
+
src/cpgmodule/data/Levine.pkl
|
|
64
|
+
src/cpgmodule/data/Lu_DNAmTL.pkl
|
|
65
|
+
src/cpgmodule/data/Ped_McEwen.pkl
|
|
66
|
+
src/cpgmodule/data/Ped_Wu.pkl
|
|
67
|
+
src/cpgmodule/data/Zhang_BLUP.pkl
|
|
68
|
+
src/cpgmodule/data/Zhang_EN.pkl
|
|
49
69
|
src/cpgmodule/data/__init__.py
|
|
50
70
|
src/cpgtools.egg-info/PKG-INFO
|
|
51
71
|
src/cpgtools.egg-info/SOURCES.txt
|
|
@@ -41,20 +41,40 @@ def execute_fn_with_args_and_or_kwargs(fn, args, kwargs):
|
|
|
41
41
|
except TypeError:
|
|
42
42
|
return fn(*args)
|
|
43
43
|
|
|
44
|
-
def toy_df(
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
44
|
+
def toy_df(n_rows=20, n_cols=5, missingness=0.2, min_val=0, max_val=1,
|
|
45
|
+
missing_value=np.nan, rand_seed=1234, sample_prefix=None):
|
|
46
|
+
"""Generate an array or DataFrame with NaNs"""
|
|
47
|
+
np.random.seed(rand_seed)
|
|
48
|
+
X = np.random.uniform(
|
|
49
|
+
low = min_val, high = max_val, size = n_rows * n_cols).reshape(n_rows, n_cols).astype(
|
|
50
|
+
float)
|
|
51
|
+
# check missingness
|
|
52
|
+
if missingness > 0:
|
|
53
|
+
# If missingness >= 1 then use it as approximate (see below) count
|
|
54
|
+
if missingness >= 1:
|
|
55
|
+
n_missing = int(missingness)
|
|
56
|
+
else:
|
|
57
|
+
n_missing = int(missingness * n_rows * n_cols)
|
|
58
|
+
print(n_missing)
|
|
59
|
+
|
|
60
|
+
# Introduce NaNs until n_miss "NAs" are inserted.
|
|
61
|
+
missing_count = 0
|
|
62
|
+
for i,j in zip(np.random.choice(n_rows, n_missing), np.random.choice(n_cols, n_missing)):
|
|
63
|
+
if np.isnan(X[i][j]):
|
|
64
|
+
continue
|
|
65
|
+
else:
|
|
66
|
+
X[i][j] = missing_value
|
|
67
|
+
missing_count += 1
|
|
68
|
+
if missing_count >= n_missing:
|
|
69
|
+
break
|
|
70
|
+
|
|
71
|
+
# check sample_prefix
|
|
72
|
+
if sample_prefix is None:
|
|
73
|
+
return X
|
|
74
|
+
else:
|
|
75
|
+
colNames = [sample_prefix + '_' + str(i) for i in range(0, n_cols)]
|
|
76
|
+
return pd.DataFrame(X, columns=colNames)
|
|
77
|
+
|
|
58
78
|
|
|
59
79
|
def insert_na(df, n_miss, seed):
|
|
60
80
|
np.random.seed(seed)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|