cpgtools 2.0.2__tar.gz → 2.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cpgtools might be problematic. Click here for more details.
- {cpgtools-2.0.2 → cpgtools-2.0.4}/MANIFEST.in +4 -4
- {cpgtools-2.0.2 → cpgtools-2.0.4}/PKG-INFO +6 -24
- {cpgtools-2.0.2 → cpgtools-2.0.4}/pyproject.toml +3 -3
- cpgtools-2.0.4/scripts/.DS_Store +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/beta_PCA.py +4 -1
- cpgtools-2.0.4/scripts/beta_remove_batch_effects.py +138 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/dmc_ttest.py +5 -1
- cpgtools-2.0.2/scripts/beta_imputation.py → cpgtools-2.0.4/scripts/predict_missing.py +88 -19
- {cpgtools-2.0.2 → cpgtools-2.0.4}/setup.py +2 -1
- cpgtools-2.0.4/src/cpgmodule/data/AltumAge_cpg.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/AltumAge_multi_platform_cpgs.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/AltumAge_scaler.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/GA_Bohlin.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/GA_Haftorn.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/GA_Knight.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/GA_Lee_CPC.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/GA_Lee_RPC.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/GA_Lee_refined_RPC.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/GA_Mayne.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/Hannum.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/Horvath_2013.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/Horvath_2018.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/Levine.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/Lu_DNAmTL.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/Ped_McEwen.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/Ped_Wu.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/Zhang_BLUP.pkl +0 -0
- cpgtools-2.0.4/src/cpgmodule/data/Zhang_EN.pkl +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgtools.egg-info/PKG-INFO +6 -24
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgtools.egg-info/SOURCES.txt +23 -2
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgtools.egg-info/requires.txt +1 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/ops/util.py +34 -14
- /cpgtools-2.0.2/LICENSE → /cpgtools-2.0.4/LICENSE.txt +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/README.md +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/distribute_setup.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/CpG_aggregation.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/CpG_anno_position.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/CpG_anno_probe.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/CpG_density_gene_centered.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/CpG_distrb_chrom.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/CpG_distrb_gene_centered.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/CpG_distrb_region.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/CpG_logo.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/CpG_to_gene.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/beta_UMAP.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/beta_jitter_plot.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/beta_m_conversion.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/beta_profile_gene_centered.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/beta_profile_region.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/beta_selectNBest.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/beta_stacked_barplot.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/beta_stats.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/beta_tSNE.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/beta_topN.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/beta_trichotmize.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/dmc_Bayes.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/dmc_bb.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/dmc_fisher.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/dmc_glm.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/dmc_logit.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/dmc_nonparametric.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/scripts/predict_sex.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/setup.cfg +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgmodule/BED.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgmodule/MI.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgmodule/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgmodule/_version.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgmodule/cgID.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgmodule/data/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgmodule/extend_bed.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgmodule/imotif.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgmodule/ireader.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgmodule/methylClock.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgmodule/padjust.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgmodule/region2gene.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgmodule/utils.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgtools.egg-info/dependency_links.txt +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/cpgtools.egg-info/top_level.txt +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/contrib/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/contrib/compare.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/contrib/count_missing.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/contrib/describe.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/cs/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/cs/buck_iterative.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/cs/central_tendency.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/cs/em.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/cs/fast_knn.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/cs/random.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/dataset/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/dataset/base.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/dataset/corrupt.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/deletion/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/deletion/complete_case.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/ops/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/ops/error.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/ops/inverse_distance_weighting.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/ops/matrix.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/ops/testing.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/ops/wrapper.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/ts/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/ts/locf.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/impyute/ts/moving_window.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/missingpy/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/missingpy/knnimpute.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/missingpy/missforest.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/missingpy/pairwise_external.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/missingpy/tests/__init__.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/missingpy/tests/test_knnimpute.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/missingpy/tests/test_missforest.py +0 -0
- {cpgtools-2.0.2 → cpgtools-2.0.4}/src/missingpy/utils.py +0 -0
|
@@ -4,8 +4,8 @@ include PKG-INFO
|
|
|
4
4
|
include LICENSE
|
|
5
5
|
|
|
6
6
|
include distribute_setup.py
|
|
7
|
-
recursive-include
|
|
8
|
-
recursive-include
|
|
9
|
-
recursive-include
|
|
10
|
-
recursive-include
|
|
7
|
+
recursive-include src *.pyx
|
|
8
|
+
recursive-include src *.py
|
|
9
|
+
recursive-include src *.pkl
|
|
10
|
+
recursive-include scripts *
|
|
11
11
|
recursive-include doc *
|
|
@@ -1,35 +1,15 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: cpgtools
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.4
|
|
4
4
|
Summary: Tools to analyze and visualize DNA methylation data
|
|
5
5
|
Author-email: Liguo Wang <wangliguo78@gmail.com>
|
|
6
6
|
Maintainer-email: Liguo Wang <wangliguo78@gmail.com>
|
|
7
|
-
License:
|
|
8
|
-
|
|
9
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
10
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
11
|
-
in the Software without restriction, including without limitation the rights
|
|
12
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
13
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
14
|
-
furnished to do so, subject to the following conditions:
|
|
15
|
-
|
|
16
|
-
The above copyright notice and this permission notice shall be included in all
|
|
17
|
-
copies or substantial portions of the Software.
|
|
18
|
-
|
|
19
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
20
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
21
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
22
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
23
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
24
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
25
|
-
SOFTWARE.
|
|
26
|
-
|
|
7
|
+
License-Expression: MIT
|
|
27
8
|
Project-URL: Documentation, https://cpgtools.readthedocs.io/en/latest/index.html
|
|
28
9
|
Project-URL: Repository, https://github.com/liguowang/cpgtools.git
|
|
29
10
|
Keywords: DNA methylation,EPIC,450K,850K,935K,RRBS,WGBS
|
|
30
11
|
Classifier: Programming Language :: Python :: 3
|
|
31
12
|
Classifier: Development Status :: 4 - Beta
|
|
32
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
33
13
|
Classifier: Environment :: Console
|
|
34
14
|
Classifier: Intended Audience :: Science/Research
|
|
35
15
|
Classifier: Operating System :: MacOS :: MacOS X
|
|
@@ -37,7 +17,7 @@ Classifier: Operating System :: POSIX
|
|
|
37
17
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
38
18
|
Requires-Python: >=3.5
|
|
39
19
|
Description-Content-Type: text/markdown
|
|
40
|
-
License-File: LICENSE
|
|
20
|
+
License-File: LICENSE.txt
|
|
41
21
|
Requires-Dist: numpy
|
|
42
22
|
Requires-Dist: scipy
|
|
43
23
|
Requires-Dist: scikit-learn
|
|
@@ -46,6 +26,8 @@ Requires-Dist: bx-python
|
|
|
46
26
|
Requires-Dist: pandas
|
|
47
27
|
Requires-Dist: umap-learn
|
|
48
28
|
Requires-Dist: fancyimpute
|
|
29
|
+
Requires-Dist: pycombat
|
|
30
|
+
Dynamic: license-file
|
|
49
31
|
|
|
50
32
|
## Install CpGtools using [pip](https://pip.pypa.io/en/stable/)
|
|
51
33
|
|
|
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
|
|
|
5
5
|
|
|
6
6
|
#Project's meta data
|
|
7
7
|
[project]
|
|
8
|
-
version = "2.0.
|
|
8
|
+
version = "2.0.4"
|
|
9
9
|
name = "cpgtools"
|
|
10
10
|
authors = [
|
|
11
11
|
{name="Liguo Wang", email="wangliguo78@gmail.com"},
|
|
@@ -15,7 +15,7 @@ maintainers = [
|
|
|
15
15
|
]
|
|
16
16
|
description = "Tools to analyze and visualize DNA methylation data"
|
|
17
17
|
readme = "README.md"
|
|
18
|
-
license =
|
|
18
|
+
license = "MIT"
|
|
19
19
|
requires-python = ">=3.5"
|
|
20
20
|
|
|
21
21
|
dependencies = [
|
|
@@ -27,12 +27,12 @@ dependencies = [
|
|
|
27
27
|
"pandas",
|
|
28
28
|
"umap-learn",
|
|
29
29
|
"fancyimpute",
|
|
30
|
+
"pycombat",
|
|
30
31
|
]
|
|
31
32
|
|
|
32
33
|
classifiers=[
|
|
33
34
|
"Programming Language :: Python :: 3",
|
|
34
35
|
'Development Status :: 4 - Beta',
|
|
35
|
-
"License :: OSI Approved :: MIT License",
|
|
36
36
|
'Environment :: Console',
|
|
37
37
|
'Intended Audience :: Science/Research',
|
|
38
38
|
'Operating System :: MacOS :: MacOS X',
|
|
Binary file
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Description
|
|
5
|
+
-----------
|
|
6
|
+
This program corrects batch effect.
|
|
7
|
+
|
|
8
|
+
Example of input data file
|
|
9
|
+
---------------------------
|
|
10
|
+
CpG_ID Sample_01 Sample_02 Sample_03 Sample_04
|
|
11
|
+
cg_001 0.831035 0.878022 0.794427 0.880911
|
|
12
|
+
cg_002 0.249544 0.209949 0.234294 0.236680
|
|
13
|
+
cg_003 0.845065 0.843957 0.840184 0.824286
|
|
14
|
+
...
|
|
15
|
+
|
|
16
|
+
Example of batch file
|
|
17
|
+
-------------------------------
|
|
18
|
+
Sample,Group
|
|
19
|
+
Sample_01,plate_1
|
|
20
|
+
Sample_02,plate_1
|
|
21
|
+
Sample_03,plate_2
|
|
22
|
+
Sample_04,plate_2
|
|
23
|
+
...
|
|
24
|
+
|
|
25
|
+
Notes
|
|
26
|
+
-----
|
|
27
|
+
* Rows with missing values will be removed
|
|
28
|
+
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
import sys
|
|
33
|
+
import subprocess
|
|
34
|
+
from optparse import OptionParser
|
|
35
|
+
from cpgmodule.utils import *
|
|
36
|
+
from cpgmodule._version import __version__
|
|
37
|
+
import pandas as pd
|
|
38
|
+
#from sklearn.preprocessing import StandardScaler
|
|
39
|
+
#from sklearn.decomposition import PCA
|
|
40
|
+
from collections import Counter
|
|
41
|
+
from combat.pycombat import pycombat
|
|
42
|
+
import matplotlib.pyplot as plt
|
|
43
|
+
import matplotlib.colors as mcolors
|
|
44
|
+
|
|
45
|
+
__author__ = "Liguo Wang"
|
|
46
|
+
__copyright__ = "Copyleft"
|
|
47
|
+
__credits__ = []
|
|
48
|
+
__license__ = "GPL"
|
|
49
|
+
__maintainer__ = "Liguo Wang"
|
|
50
|
+
__email__ = "wang.liguo@mayo.edu"
|
|
51
|
+
__status__ = "Development"
|
|
52
|
+
|
|
53
|
+
def pick_colors(n):
|
|
54
|
+
my_colors = list(mcolors.CSS4_COLORS.keys())
|
|
55
|
+
if n > len(my_colors):
|
|
56
|
+
print ("Too many colors requested", file = sys.stderr)
|
|
57
|
+
sys.exit()
|
|
58
|
+
return my_colors[0:n]
|
|
59
|
+
|
|
60
|
+
def box_plot(df, s_colors, out_png, ylab="Beta values", title=""):
|
|
61
|
+
s_names = df.columns
|
|
62
|
+
fig, ax = plt.subplots()
|
|
63
|
+
bplot = ax.boxplot(df, patch_artist=True, tick_labels = s_names)
|
|
64
|
+
for patch, color in zip(bplot['boxes'], s_colors):
|
|
65
|
+
patch.set_facecolor(color)
|
|
66
|
+
ax.set_xticklabels(s_names, rotation='vertical')
|
|
67
|
+
plt.ylabel(ylab)
|
|
68
|
+
plt.title(title)
|
|
69
|
+
plt.savefig(out_png)
|
|
70
|
+
|
|
71
|
+
def main():
|
|
72
|
+
|
|
73
|
+
usage="%prog [options]" + "\n"
|
|
74
|
+
parser = OptionParser(usage,version="%prog " + __version__)
|
|
75
|
+
parser.add_option("-i","--input_file",action="store",type="string",dest="input_file",help="Tab-separated data frame file containing beta values with the 1st row containing sample IDs and the 1st column containing CpG IDs.")
|
|
76
|
+
parser.add_option("-g","--group",action="store",type="string",dest="group_file",help="Comma-separated group file defining the batch groups of each sample.")
|
|
77
|
+
parser.add_option("-o","--output",action="store",type='string', dest="out_file",help="The prefix of the output file.")
|
|
78
|
+
(options,args)=parser.parse_args()
|
|
79
|
+
|
|
80
|
+
print ()
|
|
81
|
+
if not (options.input_file):
|
|
82
|
+
print (__doc__)
|
|
83
|
+
parser.print_help()
|
|
84
|
+
sys.exit(101)
|
|
85
|
+
if not (options.group_file):
|
|
86
|
+
print (__doc__)
|
|
87
|
+
parser.print_help()
|
|
88
|
+
sys.exit(101)
|
|
89
|
+
if not (options.out_file):
|
|
90
|
+
print (__doc__)
|
|
91
|
+
parser.print_help()
|
|
92
|
+
sys.exit(103)
|
|
93
|
+
|
|
94
|
+
beta_out = options.out_file + '.combat.tsv'
|
|
95
|
+
beta_boxplot_before = options.out_file + '.boxplot.png'
|
|
96
|
+
beta_boxplot_after = options.out_file + '.boxplot_combat.png'
|
|
97
|
+
|
|
98
|
+
printlog("Reading input file: \"%s\" ..." % (options.input_file))
|
|
99
|
+
df1 = pd.read_csv(options.input_file, index_col = 0, sep="\t")
|
|
100
|
+
|
|
101
|
+
#remove NA and transpose
|
|
102
|
+
df2 = df1.dropna(axis=0, how='any')
|
|
103
|
+
printlog("%d rows with missing values were removed." % (len(df1.index) - len(df2.index)))
|
|
104
|
+
|
|
105
|
+
printlog("Reading group file: \"%s\" ..." % (options.group_file))
|
|
106
|
+
group = pd.read_csv(options.group_file, index_col=0, names=['Sample_ID', 'Group_ID'])
|
|
107
|
+
#check if sample IDs are unique
|
|
108
|
+
if len(group.index) != len(group.index.unique()):
|
|
109
|
+
print ("Sample IDs are not unique", file = sys.stderr)
|
|
110
|
+
sys.exit()
|
|
111
|
+
group.index = group.index.map(str)
|
|
112
|
+
printlog("Group/batch \"%s\" contains %d samples" % (options.group_file, len(group.index)))
|
|
113
|
+
|
|
114
|
+
# a list of unique group names, and their frequencies
|
|
115
|
+
group_info = Counter(group['Group_ID'])
|
|
116
|
+
print(list(group['Group_ID']))
|
|
117
|
+
# a list of unique colors
|
|
118
|
+
color_names = pick_colors(len(group_info))
|
|
119
|
+
color_list = []
|
|
120
|
+
for name,count in zip(color_names, list(group_info.values())):
|
|
121
|
+
color_list.extend([name]*count)
|
|
122
|
+
|
|
123
|
+
printlog("Generate boxplot before correction. Save to '%s'" % beta_boxplot_before)
|
|
124
|
+
box_plot(df2, s_colors=color_list, out_png=beta_boxplot_before, title="Before batch effects correction")
|
|
125
|
+
|
|
126
|
+
# remove batch effect
|
|
127
|
+
printlog("Removing batch effect ...")
|
|
128
|
+
df_corrected = pycombat(df2, list(group['Group_ID']))
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
df_corrected.to_csv(beta_out,sep="\t")
|
|
132
|
+
|
|
133
|
+
printlog("Generate boxplot after correction. Save to '%s'" % beta_boxplot_after)
|
|
134
|
+
box_plot(df_corrected, s_colors=color_list, out_png=beta_boxplot_after, title="After batch effects correction")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
if __name__=='__main__':
|
|
138
|
+
main()
|
|
@@ -189,7 +189,7 @@ def main():
|
|
|
189
189
|
else:
|
|
190
190
|
continue
|
|
191
191
|
line_num += 1
|
|
192
|
-
|
|
192
|
+
|
|
193
193
|
printlog("Perfrom Benjamini-Hochberg (aka FDR) correction ...")
|
|
194
194
|
adjusted_p = {}
|
|
195
195
|
q_list = padjust.multiple_testing_correction(p_list)
|
|
@@ -204,6 +204,10 @@ def main():
|
|
|
204
204
|
else:
|
|
205
205
|
f = l.split()
|
|
206
206
|
probe_ID = f[0]
|
|
207
|
+
if probe_ID in delta_beta:
|
|
208
|
+
pass
|
|
209
|
+
else:
|
|
210
|
+
delta_beta[probe_ID] = 'n/a'
|
|
207
211
|
try:
|
|
208
212
|
print (l + '\t' + str(delta_beta[probe_ID]) + '\t' + adjusted_p[probe_ID], file=FOUT)
|
|
209
213
|
except:
|
|
@@ -20,6 +20,7 @@ from impyute.cs.em import em
|
|
|
20
20
|
from impyute.ops.util import toy_df,insert_na
|
|
21
21
|
from impyute.cs.random import random_impute
|
|
22
22
|
from impyute.cs.buck_iterative import buck_iterative
|
|
23
|
+
from impyute.ts.moving_window import moving_window
|
|
23
24
|
from missingpy import MissForest
|
|
24
25
|
|
|
25
26
|
#use pip to install fancyimpute
|
|
@@ -64,6 +65,7 @@ def nafiller():
|
|
|
64
65
|
the same row or column.",
|
|
65
66
|
'FillRef': "Impute missing values using values from an external \
|
|
66
67
|
reference dataset.",
|
|
68
|
+
'MW': "Interpolate the missing values with moving window.",
|
|
67
69
|
'KNN': "Impute missing values using scikit-learn's KNNImputer function. \
|
|
68
70
|
Note: slow for large datasets.",
|
|
69
71
|
'KNN2': "Impute missing values using KNN2",
|
|
@@ -98,6 +100,7 @@ def nafiller():
|
|
|
98
100
|
FillMax_parser = sub_parsers.add_parser('FillMax', help=commands['FillMax'])
|
|
99
101
|
FillRand_parser = sub_parsers.add_parser('FillRand', help=commands['FillRand'])
|
|
100
102
|
FillRef_parser = sub_parsers.add_parser('FillRef', help=commands['FillRef'])
|
|
103
|
+
MW_parser = sub_parsers.add_parser('MW', help=commands['MW'])
|
|
101
104
|
KNN_parser = sub_parsers.add_parser('KNN', help=commands['KNN'])
|
|
102
105
|
fKNN_parser = sub_parsers.add_parser('fKNN', help=commands['fKNN'])
|
|
103
106
|
EM_parser = sub_parsers.add_parser('EM', help=commands['EM'])
|
|
@@ -116,7 +119,7 @@ def nafiller():
|
|
|
116
119
|
'output', type=str, metavar='out_df',
|
|
117
120
|
help="Output data frame.")
|
|
118
121
|
DropNA_parser.add_argument(
|
|
119
|
-
'
|
|
122
|
+
'--axis', type=int, choices=range(2), default=0,
|
|
120
123
|
help="0 : drop rows with any missing values, 1 : drop columns with \
|
|
121
124
|
missing values. Default: 0")
|
|
122
125
|
DropNA_parser.add_argument(
|
|
@@ -130,7 +133,7 @@ def nafiller():
|
|
|
130
133
|
'output', type=str, metavar='out_df',
|
|
131
134
|
help="Output data frame.")
|
|
132
135
|
FillValue_parser.add_argument(
|
|
133
|
-
'
|
|
136
|
+
'--score', type=float, default=0.0,
|
|
134
137
|
help="The value uesd to fill all NAs.")
|
|
135
138
|
FillValue_parser.add_argument(
|
|
136
139
|
'--decimal', type=int, default=5,
|
|
@@ -143,7 +146,7 @@ def nafiller():
|
|
|
143
146
|
'output', type=str, metavar='out_df',
|
|
144
147
|
help="Output data frame.")
|
|
145
148
|
FillMean_parser.add_argument(
|
|
146
|
-
'
|
|
149
|
+
'--axis', type=int, choices=range(2), default=1,
|
|
147
150
|
help="0 means column, 1 means row. Default: fill NAs with row means")
|
|
148
151
|
FillMean_parser.add_argument(
|
|
149
152
|
'--decimal', type=int, default=5,
|
|
@@ -156,7 +159,7 @@ def nafiller():
|
|
|
156
159
|
'output', type=str, metavar='out_df',
|
|
157
160
|
help="Output data frame.")
|
|
158
161
|
FillMedian_parser.add_argument(
|
|
159
|
-
'
|
|
162
|
+
'--axis', type=int, choices=range(2), default=1,
|
|
160
163
|
help="0 means column, 1 means row. Default: fill NAs with row medians")
|
|
161
164
|
FillMedian_parser.add_argument(
|
|
162
165
|
'--decimal', type=int, default=5,
|
|
@@ -169,7 +172,7 @@ def nafiller():
|
|
|
169
172
|
'output', type=str, metavar='out_df',
|
|
170
173
|
help="Output data frame.")
|
|
171
174
|
FillMin_parser.add_argument(
|
|
172
|
-
'
|
|
175
|
+
'--axis', type=int, choices=range(2), default=1,
|
|
173
176
|
help="0 means column, 1 means row. Default: fill NAs with the minimum value of the rows.")
|
|
174
177
|
FillMin_parser.add_argument(
|
|
175
178
|
'--decimal', type=int, default=5,
|
|
@@ -182,7 +185,7 @@ def nafiller():
|
|
|
182
185
|
'output', type=str, metavar='out_df',
|
|
183
186
|
help="Output data frame.")
|
|
184
187
|
FillMax_parser.add_argument(
|
|
185
|
-
'
|
|
188
|
+
'--axis', type=int, choices=range(2), default=1,
|
|
186
189
|
help="0 means column, 1 means row. Default: fill NAs with the maximum value of the rows.")
|
|
187
190
|
FillMax_parser.add_argument(
|
|
188
191
|
'--decimal', type=int, default=5,
|
|
@@ -195,7 +198,7 @@ def nafiller():
|
|
|
195
198
|
'output', type=str, metavar='out_df',
|
|
196
199
|
help="Output data frame.")
|
|
197
200
|
FillRand_parser.add_argument(
|
|
198
|
-
'
|
|
201
|
+
'--axis', type=int, choices=range(2), default=1,
|
|
199
202
|
help="0 means column, 1 means row. Default: fill NAs with values randomly selected from rows.")
|
|
200
203
|
FillRand_parser.add_argument(
|
|
201
204
|
'--decimal', type=int, default=5,
|
|
@@ -208,12 +211,50 @@ def nafiller():
|
|
|
208
211
|
'output', type=str, metavar='out_df',
|
|
209
212
|
help="Output data frame.")
|
|
210
213
|
FillRef_parser.add_argument(
|
|
211
|
-
'
|
|
214
|
+
'--ref', type=str,
|
|
212
215
|
help="File name of the external reference.")
|
|
213
216
|
FillRef_parser.add_argument(
|
|
214
217
|
'--decimal', type=int, default=5,
|
|
215
218
|
help="Number of decimal places to round each column to. default: %(default)s")
|
|
216
219
|
|
|
220
|
+
MW_parser.add_argument(
|
|
221
|
+
'input', type=str, metavar='input_df',
|
|
222
|
+
help="Input data frame.")
|
|
223
|
+
MW_parser.add_argument(
|
|
224
|
+
'output', type=str, metavar='out_df',
|
|
225
|
+
help="Output data frame.")
|
|
226
|
+
MW_parser.add_argument(
|
|
227
|
+
'--nindex', type=int, choices=[0, -1, None],
|
|
228
|
+
default=None,
|
|
229
|
+
help="Null index. Index of the null value inside the moving average window. \
|
|
230
|
+
See impyute documentation for details. default: %(default)s")
|
|
231
|
+
MW_parser.add_argument(
|
|
232
|
+
'--wsize', type=int, default=5,
|
|
233
|
+
help="Size of the moving average window/area of values being used \
|
|
234
|
+
for each local imputation. This number includes the missing value. \
|
|
235
|
+
default: %(default)s")
|
|
236
|
+
MW_parser.add_argument(
|
|
237
|
+
'--errors', type=str, choices=["raise", "coerce", "ignore"],
|
|
238
|
+
default='coerce',
|
|
239
|
+
help="Errors will occur with the indexing of the windows - for \
|
|
240
|
+
example if there is a nan at data[x][0] and `nindex` is set to \
|
|
241
|
+
-1 or there is a nan at data[x][-1] and `nindex` is set to 0. `\
|
|
242
|
+
'raise' will raise an error, `coerce` will try again using an \
|
|
243
|
+
nindex set to the middle and `ignore` will just leave it as a \
|
|
244
|
+
nan default: %(default)s")
|
|
245
|
+
MW_parser.add_argument(
|
|
246
|
+
'--func', type=str, choices=["mean", "median"],
|
|
247
|
+
default='mean',
|
|
248
|
+
help="Function to summerzie values within the moving window. \
|
|
249
|
+
default: %(default)s")
|
|
250
|
+
MW_parser.add_argument(
|
|
251
|
+
'--axis', type=int, choices=range(2), default=1,
|
|
252
|
+
help="0 means column, 1 means row. Default: fill missing value \
|
|
253
|
+
with windows moving on rows.")
|
|
254
|
+
MW_parser.add_argument(
|
|
255
|
+
'--decimal', type=int, default=5,
|
|
256
|
+
help="Number of decimal places to round each column to. default: %(default)s")
|
|
257
|
+
|
|
217
258
|
KNN_parser.add_argument(
|
|
218
259
|
'input', type=str, metavar='input_df',
|
|
219
260
|
help="Input data frame.")
|
|
@@ -306,20 +347,28 @@ def nafiller():
|
|
|
306
347
|
'output', type=str, metavar='out_df',
|
|
307
348
|
help="Name of the output data frame.")
|
|
308
349
|
ToyDf_parser.add_argument(
|
|
309
|
-
'
|
|
350
|
+
'--nrow', type=int, default=10,
|
|
310
351
|
help="Number of rows. default: %(default)s")
|
|
311
352
|
ToyDf_parser.add_argument(
|
|
312
|
-
'
|
|
353
|
+
'--ncol', type=int, default=10,
|
|
313
354
|
help="Number of columns. default: %(default)s")
|
|
314
355
|
ToyDf_parser.add_argument(
|
|
315
|
-
'--
|
|
356
|
+
'--nmiss', type=float, default=5,
|
|
316
357
|
help="Number of missing values ingested into the dataframe. default: %(default)s")
|
|
317
358
|
ToyDf_parser.add_argument(
|
|
318
|
-
'
|
|
359
|
+
'--seed', type=int, default=123,
|
|
319
360
|
help="Seed used to initialize a pseudorandom number generator. default: %(default)s")
|
|
320
361
|
ToyDf_parser.add_argument(
|
|
321
362
|
'--prefix', type=str, default='s',
|
|
322
|
-
help="Prefix of the column names, a series numbers will be appended to
|
|
363
|
+
help="Prefix of the column names, a series numbers will be appended to \
|
|
364
|
+
the prefix. If this is set to None, as np.ndarray rather than pd.dataframe \
|
|
365
|
+
will be returned. default: %(default)s")
|
|
366
|
+
ToyDf_parser.add_argument(
|
|
367
|
+
'--min', type=float, default=0.0,
|
|
368
|
+
help="The minimum value. default: %(default)s")
|
|
369
|
+
ToyDf_parser.add_argument(
|
|
370
|
+
'--max', type=float, default=1.0,
|
|
371
|
+
help="The maximum value. default: %(default)s")
|
|
323
372
|
ToyDf_parser.add_argument(
|
|
324
373
|
'--decimal', type=int, default=5,
|
|
325
374
|
help="Number of decimal places to round each column to. default: %(default)s")
|
|
@@ -331,10 +380,10 @@ def nafiller():
|
|
|
331
380
|
'output', type=str, metavar='out_df',
|
|
332
381
|
help="Output data frame.")
|
|
333
382
|
InsertNA_parser.add_argument(
|
|
334
|
-
'--
|
|
383
|
+
'--nmiss', type=int,
|
|
335
384
|
help="Number of missing values ingested into the dataframe.")
|
|
336
385
|
InsertNA_parser.add_argument(
|
|
337
|
-
'
|
|
386
|
+
'--seed', type=int, default=123,
|
|
338
387
|
help="Seed used to initialize a pseudorandom number generator. default: %(default)s")
|
|
339
388
|
InsertNA_parser.add_argument(
|
|
340
389
|
'--decimal', type=int, default=5,
|
|
@@ -483,6 +532,25 @@ def nafiller():
|
|
|
483
532
|
continue
|
|
484
533
|
output_df = input_df
|
|
485
534
|
output_df = output_df.round(args.decimal)
|
|
535
|
+
output_df.to_csv(args.output, sep="\t", na_rep="NaN")
|
|
536
|
+
logging.info("File \"%s\" contains %d missing values ..." %
|
|
537
|
+
(args.output, output_df.isna().sum().sum()))
|
|
538
|
+
elif command.lower() == 'mw':
|
|
539
|
+
input_df = read_df(args.input)
|
|
540
|
+
logging.info("File \"%s\" contains %d missing values ..." %
|
|
541
|
+
(args.input, input_df.isna().sum().sum()))
|
|
542
|
+
logging.info("Replace missing values using moving window on %s ..." % axis_name[args.axis])
|
|
543
|
+
if args.axis == 1:
|
|
544
|
+
output_df = moving_window(
|
|
545
|
+
input_df, nindex = args.nindex, wsize=args.wsize,
|
|
546
|
+
errors=args.errors, func=getattr(np, args.func))
|
|
547
|
+
output_df = output_df.round(args.decimal)
|
|
548
|
+
elif args.axis == 0:
|
|
549
|
+
output_df = moving_window(
|
|
550
|
+
input_df.T, nindex = args.nindex, wsize=args.wsize,
|
|
551
|
+
errors=args.errors, func=getattr(np, args.func))
|
|
552
|
+
output_df = output_df.round(args.decimal).T
|
|
553
|
+
|
|
486
554
|
output_df.to_csv(args.output, sep="\t", na_rep="NaN")
|
|
487
555
|
logging.info("File \"%s\" contains %d missing values ..." %
|
|
488
556
|
(args.output, output_df.isna().sum().sum()))
|
|
@@ -576,9 +644,10 @@ def nafiller():
|
|
|
576
644
|
|
|
577
645
|
elif command.lower() == 'toydf':
|
|
578
646
|
logging.info("Generate toy dataframe ...")
|
|
579
|
-
output_df = toy_df(
|
|
580
|
-
|
|
581
|
-
|
|
647
|
+
output_df = toy_df(n_rows = args.nrow, n_cols = args.ncol,
|
|
648
|
+
missingness = args.nmiss, sample_prefix=args.prefix,
|
|
649
|
+
min_val = args.min, max_val = args.max,
|
|
650
|
+
rand_seed=args.seed)
|
|
582
651
|
#print(output_df)
|
|
583
652
|
output_df = output_df.round(args.decimal)
|
|
584
653
|
output_df.to_csv(args.output, sep="\t", na_rep="NaN")
|
|
@@ -589,7 +658,7 @@ def nafiller():
|
|
|
589
658
|
logging.info("File \"%s\" contains %d missing values ..." %
|
|
590
659
|
(args.input, input_df.isna().sum().sum()))
|
|
591
660
|
logging.info("Insert %d NAs into dataframe ..." % args.na)
|
|
592
|
-
output_df = insert_na(df=input_df, n_miss=args.
|
|
661
|
+
output_df = insert_na(df=input_df, n_miss=args.nmiss, seed=args.seed)
|
|
593
662
|
output_df = output_df.round(args.decimal)
|
|
594
663
|
output_df.to_csv(args.output, sep="\t", na_rep="NaN")
|
|
595
664
|
logging.info("File \"%s\" contains %d missing values ..." %
|
|
@@ -11,7 +11,6 @@ script_files = [
|
|
|
11
11
|
"scripts/CpG_to_gene.py",
|
|
12
12
|
"scripts/beta_PCA.py",
|
|
13
13
|
"scripts/beta_UMAP.py",
|
|
14
|
-
"scripts/beta_imputation.py",
|
|
15
14
|
"scripts/beta_jitter_plot.py",
|
|
16
15
|
"scripts/beta_m_conversion.py",
|
|
17
16
|
"scripts/beta_profile_gene_centered.py",
|
|
@@ -22,6 +21,7 @@ script_files = [
|
|
|
22
21
|
"scripts/beta_tSNE.py",
|
|
23
22
|
"scripts/beta_topN.py",
|
|
24
23
|
"scripts/beta_trichotmize.py",
|
|
24
|
+
"scripts/beta_remove_batch_effects.py",
|
|
25
25
|
"scripts/dmc_Bayes.py",
|
|
26
26
|
"scripts/dmc_bb.py",
|
|
27
27
|
"scripts/dmc_fisher.py",
|
|
@@ -30,6 +30,7 @@ script_files = [
|
|
|
30
30
|
"scripts/dmc_nonparametric.py",
|
|
31
31
|
"scripts/dmc_ttest.py",
|
|
32
32
|
"scripts/predict_sex.py",
|
|
33
|
+
"scripts/predict_missing.py",
|
|
33
34
|
]
|
|
34
35
|
|
|
35
36
|
if __name__ == "__main__":
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,35 +1,15 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: cpgtools
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.4
|
|
4
4
|
Summary: Tools to analyze and visualize DNA methylation data
|
|
5
5
|
Author-email: Liguo Wang <wangliguo78@gmail.com>
|
|
6
6
|
Maintainer-email: Liguo Wang <wangliguo78@gmail.com>
|
|
7
|
-
License:
|
|
8
|
-
|
|
9
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
10
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
11
|
-
in the Software without restriction, including without limitation the rights
|
|
12
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
13
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
14
|
-
furnished to do so, subject to the following conditions:
|
|
15
|
-
|
|
16
|
-
The above copyright notice and this permission notice shall be included in all
|
|
17
|
-
copies or substantial portions of the Software.
|
|
18
|
-
|
|
19
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
20
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
21
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
22
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
23
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
24
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
25
|
-
SOFTWARE.
|
|
26
|
-
|
|
7
|
+
License-Expression: MIT
|
|
27
8
|
Project-URL: Documentation, https://cpgtools.readthedocs.io/en/latest/index.html
|
|
28
9
|
Project-URL: Repository, https://github.com/liguowang/cpgtools.git
|
|
29
10
|
Keywords: DNA methylation,EPIC,450K,850K,935K,RRBS,WGBS
|
|
30
11
|
Classifier: Programming Language :: Python :: 3
|
|
31
12
|
Classifier: Development Status :: 4 - Beta
|
|
32
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
33
13
|
Classifier: Environment :: Console
|
|
34
14
|
Classifier: Intended Audience :: Science/Research
|
|
35
15
|
Classifier: Operating System :: MacOS :: MacOS X
|
|
@@ -37,7 +17,7 @@ Classifier: Operating System :: POSIX
|
|
|
37
17
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
38
18
|
Requires-Python: >=3.5
|
|
39
19
|
Description-Content-Type: text/markdown
|
|
40
|
-
License-File: LICENSE
|
|
20
|
+
License-File: LICENSE.txt
|
|
41
21
|
Requires-Dist: numpy
|
|
42
22
|
Requires-Dist: scipy
|
|
43
23
|
Requires-Dist: scikit-learn
|
|
@@ -46,6 +26,8 @@ Requires-Dist: bx-python
|
|
|
46
26
|
Requires-Dist: pandas
|
|
47
27
|
Requires-Dist: umap-learn
|
|
48
28
|
Requires-Dist: fancyimpute
|
|
29
|
+
Requires-Dist: pycombat
|
|
30
|
+
Dynamic: license-file
|
|
49
31
|
|
|
50
32
|
## Install CpGtools using [pip](https://pip.pypa.io/en/stable/)
|
|
51
33
|
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
LICENSE
|
|
1
|
+
LICENSE.txt
|
|
2
2
|
MANIFEST.in
|
|
3
3
|
README.md
|
|
4
4
|
distribute_setup.py
|
|
5
5
|
pyproject.toml
|
|
6
6
|
setup.py
|
|
7
|
+
scripts/.DS_Store
|
|
7
8
|
scripts/CpG_aggregation.py
|
|
8
9
|
scripts/CpG_anno_position.py
|
|
9
10
|
scripts/CpG_anno_probe.py
|
|
@@ -15,11 +16,11 @@ scripts/CpG_logo.py
|
|
|
15
16
|
scripts/CpG_to_gene.py
|
|
16
17
|
scripts/beta_PCA.py
|
|
17
18
|
scripts/beta_UMAP.py
|
|
18
|
-
scripts/beta_imputation.py
|
|
19
19
|
scripts/beta_jitter_plot.py
|
|
20
20
|
scripts/beta_m_conversion.py
|
|
21
21
|
scripts/beta_profile_gene_centered.py
|
|
22
22
|
scripts/beta_profile_region.py
|
|
23
|
+
scripts/beta_remove_batch_effects.py
|
|
23
24
|
scripts/beta_selectNBest.py
|
|
24
25
|
scripts/beta_stacked_barplot.py
|
|
25
26
|
scripts/beta_stats.py
|
|
@@ -33,6 +34,7 @@ scripts/dmc_glm.py
|
|
|
33
34
|
scripts/dmc_logit.py
|
|
34
35
|
scripts/dmc_nonparametric.py
|
|
35
36
|
scripts/dmc_ttest.py
|
|
37
|
+
scripts/predict_missing.py
|
|
36
38
|
scripts/predict_sex.py
|
|
37
39
|
src/cpgmodule/BED.py
|
|
38
40
|
src/cpgmodule/MI.py
|
|
@@ -46,6 +48,25 @@ src/cpgmodule/methylClock.py
|
|
|
46
48
|
src/cpgmodule/padjust.py
|
|
47
49
|
src/cpgmodule/region2gene.py
|
|
48
50
|
src/cpgmodule/utils.py
|
|
51
|
+
src/cpgmodule/data/AltumAge_cpg.pkl
|
|
52
|
+
src/cpgmodule/data/AltumAge_multi_platform_cpgs.pkl
|
|
53
|
+
src/cpgmodule/data/AltumAge_scaler.pkl
|
|
54
|
+
src/cpgmodule/data/GA_Bohlin.pkl
|
|
55
|
+
src/cpgmodule/data/GA_Haftorn.pkl
|
|
56
|
+
src/cpgmodule/data/GA_Knight.pkl
|
|
57
|
+
src/cpgmodule/data/GA_Lee_CPC.pkl
|
|
58
|
+
src/cpgmodule/data/GA_Lee_RPC.pkl
|
|
59
|
+
src/cpgmodule/data/GA_Lee_refined_RPC.pkl
|
|
60
|
+
src/cpgmodule/data/GA_Mayne.pkl
|
|
61
|
+
src/cpgmodule/data/Hannum.pkl
|
|
62
|
+
src/cpgmodule/data/Horvath_2013.pkl
|
|
63
|
+
src/cpgmodule/data/Horvath_2018.pkl
|
|
64
|
+
src/cpgmodule/data/Levine.pkl
|
|
65
|
+
src/cpgmodule/data/Lu_DNAmTL.pkl
|
|
66
|
+
src/cpgmodule/data/Ped_McEwen.pkl
|
|
67
|
+
src/cpgmodule/data/Ped_Wu.pkl
|
|
68
|
+
src/cpgmodule/data/Zhang_BLUP.pkl
|
|
69
|
+
src/cpgmodule/data/Zhang_EN.pkl
|
|
49
70
|
src/cpgmodule/data/__init__.py
|
|
50
71
|
src/cpgtools.egg-info/PKG-INFO
|
|
51
72
|
src/cpgtools.egg-info/SOURCES.txt
|
|
@@ -41,20 +41,40 @@ def execute_fn_with_args_and_or_kwargs(fn, args, kwargs):
|
|
|
41
41
|
except TypeError:
|
|
42
42
|
return fn(*args)
|
|
43
43
|
|
|
44
|
-
def toy_df(
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
44
|
+
def toy_df(n_rows=20, n_cols=5, missingness=0.2, min_val=0, max_val=1,
|
|
45
|
+
missing_value=np.nan, rand_seed=1234, sample_prefix=None):
|
|
46
|
+
"""Generate an array or DataFrame with NaNs"""
|
|
47
|
+
np.random.seed(rand_seed)
|
|
48
|
+
X = np.random.uniform(
|
|
49
|
+
low = min_val, high = max_val, size = n_rows * n_cols).reshape(n_rows, n_cols).astype(
|
|
50
|
+
float)
|
|
51
|
+
# check missingness
|
|
52
|
+
if missingness > 0:
|
|
53
|
+
# If missingness >= 1 then use it as approximate (see below) count
|
|
54
|
+
if missingness >= 1:
|
|
55
|
+
n_missing = int(missingness)
|
|
56
|
+
else:
|
|
57
|
+
n_missing = int(missingness * n_rows * n_cols)
|
|
58
|
+
print(n_missing)
|
|
59
|
+
|
|
60
|
+
# Introduce NaNs until n_miss "NAs" are inserted.
|
|
61
|
+
missing_count = 0
|
|
62
|
+
for i,j in zip(np.random.choice(n_rows, n_missing), np.random.choice(n_cols, n_missing)):
|
|
63
|
+
if np.isnan(X[i][j]):
|
|
64
|
+
continue
|
|
65
|
+
else:
|
|
66
|
+
X[i][j] = missing_value
|
|
67
|
+
missing_count += 1
|
|
68
|
+
if missing_count >= n_missing:
|
|
69
|
+
break
|
|
70
|
+
|
|
71
|
+
# check sample_prefix
|
|
72
|
+
if sample_prefix is None:
|
|
73
|
+
return X
|
|
74
|
+
else:
|
|
75
|
+
colNames = [sample_prefix + '_' + str(i) for i in range(0, n_cols)]
|
|
76
|
+
return pd.DataFrame(X, columns=colNames)
|
|
77
|
+
|
|
58
78
|
|
|
59
79
|
def insert_na(df, n_miss, seed):
|
|
60
80
|
np.random.seed(seed)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|