cpgtools 2.0.0__tar.gz → 2.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cpgtools might be problematic. Click here for more details.

Files changed (119) hide show
  1. cpgtools-2.0.3/LICENSE +19 -0
  2. cpgtools-2.0.3/MANIFEST.in +11 -0
  3. cpgtools-2.0.3/PKG-INFO +76 -0
  4. cpgtools-2.0.3/README.md +27 -0
  5. cpgtools-2.0.3/pyproject.toml +48 -0
  6. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/CpG_aggregation.py +1 -1
  7. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/CpG_anno_position.py +1 -1
  8. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/CpG_anno_probe.py +1 -2
  9. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/CpG_density_gene_centered.py +1 -1
  10. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/CpG_distrb_chrom.py +1 -1
  11. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/CpG_distrb_gene_centered.py +1 -1
  12. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/CpG_distrb_region.py +1 -3
  13. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/CpG_logo.py +1 -1
  14. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/CpG_to_gene.py +1 -1
  15. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/beta_PCA.py +31 -23
  16. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/beta_UMAP.py +29 -22
  17. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/beta_jitter_plot.py +1 -1
  18. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/beta_m_conversion.py +1 -1
  19. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/beta_profile_gene_centered.py +1 -1
  20. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/beta_profile_region.py +1 -1
  21. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/beta_selectNBest.py +9 -6
  22. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/beta_stacked_barplot.py +1 -1
  23. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/beta_stats.py +1 -1
  24. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/beta_tSNE.py +31 -24
  25. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/beta_topN.py +1 -1
  26. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/beta_trichotmize.py +1 -1
  27. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/dmc_Bayes.py +1 -1
  28. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/dmc_bb.py +1 -1
  29. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/dmc_fisher.py +1 -1
  30. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/dmc_glm.py +1 -1
  31. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/dmc_logit.py +1 -1
  32. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/dmc_nonparametric.py +1 -1
  33. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/dmc_ttest.py +6 -2
  34. cpgtools-2.0.3/scripts/predict_missing.py +673 -0
  35. cpgtools-2.0.3/scripts/predict_sex.py +126 -0
  36. cpgtools-2.0.3/setup.py +36 -0
  37. cpgtools-2.0.3/src/cpgmodule/_version.py +1 -0
  38. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/utils.py +35 -0
  39. cpgtools-2.0.3/src/cpgtools.egg-info/PKG-INFO +76 -0
  40. cpgtools-2.0.3/src/cpgtools.egg-info/SOURCES.txt +108 -0
  41. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgtools.egg-info/requires.txt +1 -0
  42. cpgtools-2.0.3/src/cpgtools.egg-info/top_level.txt +3 -0
  43. cpgtools-2.0.3/src/impyute/__init__.py +3 -0
  44. cpgtools-2.0.3/src/impyute/contrib/__init__.py +7 -0
  45. cpgtools-2.0.3/src/impyute/contrib/compare.py +69 -0
  46. cpgtools-2.0.3/src/impyute/contrib/count_missing.py +30 -0
  47. cpgtools-2.0.3/src/impyute/contrib/describe.py +63 -0
  48. cpgtools-2.0.3/src/impyute/cs/__init__.py +11 -0
  49. cpgtools-2.0.3/src/impyute/cs/buck_iterative.py +82 -0
  50. cpgtools-2.0.3/src/impyute/cs/central_tendency.py +84 -0
  51. cpgtools-2.0.3/src/impyute/cs/em.py +52 -0
  52. cpgtools-2.0.3/src/impyute/cs/fast_knn.py +130 -0
  53. cpgtools-2.0.3/src/impyute/cs/random.py +27 -0
  54. cpgtools-2.0.3/src/impyute/dataset/__init__.py +6 -0
  55. cpgtools-2.0.3/src/impyute/dataset/base.py +137 -0
  56. cpgtools-2.0.3/src/impyute/dataset/corrupt.py +55 -0
  57. cpgtools-2.0.3/src/impyute/deletion/__init__.py +5 -0
  58. cpgtools-2.0.3/src/impyute/deletion/complete_case.py +21 -0
  59. cpgtools-2.0.3/src/impyute/ops/__init__.py +12 -0
  60. cpgtools-2.0.3/src/impyute/ops/error.py +9 -0
  61. cpgtools-2.0.3/src/impyute/ops/inverse_distance_weighting.py +31 -0
  62. cpgtools-2.0.3/src/impyute/ops/matrix.py +47 -0
  63. cpgtools-2.0.3/src/impyute/ops/testing.py +20 -0
  64. cpgtools-2.0.3/src/impyute/ops/util.py +96 -0
  65. cpgtools-2.0.3/src/impyute/ops/wrapper.py +179 -0
  66. cpgtools-2.0.3/src/impyute/ts/__init__.py +6 -0
  67. cpgtools-2.0.3/src/impyute/ts/locf.py +57 -0
  68. cpgtools-2.0.3/src/impyute/ts/moving_window.py +128 -0
  69. cpgtools-2.0.3/src/missingpy/__init__.py +4 -0
  70. cpgtools-2.0.3/src/missingpy/knnimpute.py +328 -0
  71. cpgtools-2.0.3/src/missingpy/missforest.py +556 -0
  72. cpgtools-2.0.3/src/missingpy/pairwise_external.py +315 -0
  73. cpgtools-2.0.3/src/missingpy/tests/__init__.py +0 -0
  74. cpgtools-2.0.3/src/missingpy/tests/test_knnimpute.py +605 -0
  75. cpgtools-2.0.3/src/missingpy/tests/test_missforest.py +409 -0
  76. cpgtools-2.0.3/src/missingpy/utils.py +124 -0
  77. cpgtools-2.0.0/LICENSE.txt +0 -674
  78. cpgtools-2.0.0/MANIFEST.in +0 -11
  79. cpgtools-2.0.0/PKG-INFO +0 -20
  80. cpgtools-2.0.0/README.md +0 -20
  81. cpgtools-2.0.0/lib/cpgtools.egg-info/PKG-INFO +0 -20
  82. cpgtools-2.0.0/lib/cpgtools.egg-info/SOURCES.txt +0 -72
  83. cpgtools-2.0.0/lib/cpgtools.egg-info/not-zip-safe +0 -1
  84. cpgtools-2.0.0/lib/cpgtools.egg-info/top_level.txt +0 -2
  85. cpgtools-2.0.0/setup.py +0 -47
  86. {cpgtools-2.0.0 → cpgtools-2.0.3}/distribute_setup.py +0 -0
  87. {cpgtools-2.0.0/bin → cpgtools-2.0.3/scripts}/.DS_Store +0 -0
  88. {cpgtools-2.0.0 → cpgtools-2.0.3}/setup.cfg +0 -0
  89. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/BED.py +0 -0
  90. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/MI.py +0 -0
  91. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/__init__.py +0 -0
  92. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/cgID.py +0 -0
  93. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/AltumAge_cpg.pkl +0 -0
  94. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/AltumAge_multi_platform_cpgs.pkl +0 -0
  95. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/AltumAge_scaler.pkl +0 -0
  96. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/GA_Bohlin.pkl +0 -0
  97. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/GA_Haftorn.pkl +0 -0
  98. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/GA_Knight.pkl +0 -0
  99. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/GA_Lee_CPC.pkl +0 -0
  100. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/GA_Lee_RPC.pkl +0 -0
  101. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/GA_Lee_refined_RPC.pkl +0 -0
  102. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/GA_Mayne.pkl +0 -0
  103. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/Hannum.pkl +0 -0
  104. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/Horvath_2013.pkl +0 -0
  105. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/Horvath_2018.pkl +0 -0
  106. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/Levine.pkl +0 -0
  107. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/Lu_DNAmTL.pkl +0 -0
  108. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/Ped_McEwen.pkl +0 -0
  109. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/Ped_Wu.pkl +0 -0
  110. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/Zhang_BLUP.pkl +0 -0
  111. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/Zhang_EN.pkl +0 -0
  112. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/data/__init__.py +0 -0
  113. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/extend_bed.py +0 -0
  114. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/imotif.py +0 -0
  115. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/ireader.py +0 -0
  116. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/methylClock.py +0 -0
  117. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/padjust.py +0 -0
  118. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgmodule/region2gene.py +0 -0
  119. {cpgtools-2.0.0/lib → cpgtools-2.0.3/src}/cpgtools.egg-info/dependency_links.txt +0 -0
cpgtools-2.0.3/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2024 The Python Packaging Authority
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
@@ -0,0 +1,11 @@
1
+ include MANIFEST.in
2
+ include README.md
3
+ include PKG-INFO
4
+ include LICENSE
5
+
6
+ include distribute_setup.py
7
+ recursive-include src *.pyx
8
+ recursive-include src *.py
9
+ recursive-include src *.pkl
10
+ recursive-include scripts *
11
+ recursive-include doc *
@@ -0,0 +1,76 @@
1
+ Metadata-Version: 2.1
2
+ Name: cpgtools
3
+ Version: 2.0.3
4
+ Summary: Tools to analyze and visualize DNA methylation data
5
+ Author-email: Liguo Wang <wangliguo78@gmail.com>
6
+ Maintainer-email: Liguo Wang <wangliguo78@gmail.com>
7
+ License: Copyright (c) 2024 The Python Packaging Authority
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in all
17
+ copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
26
+
27
+ Project-URL: Documentation, https://cpgtools.readthedocs.io/en/latest/index.html
28
+ Project-URL: Repository, https://github.com/liguowang/cpgtools.git
29
+ Keywords: DNA methylation,EPIC,450K,850K,935K,RRBS,WGBS
30
+ Classifier: Programming Language :: Python :: 3
31
+ Classifier: Development Status :: 4 - Beta
32
+ Classifier: License :: OSI Approved :: MIT License
33
+ Classifier: Environment :: Console
34
+ Classifier: Intended Audience :: Science/Research
35
+ Classifier: Operating System :: MacOS :: MacOS X
36
+ Classifier: Operating System :: POSIX
37
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
38
+ Requires-Python: >=3.5
39
+ Description-Content-Type: text/markdown
40
+ License-File: LICENSE
41
+ Requires-Dist: numpy
42
+ Requires-Dist: scipy
43
+ Requires-Dist: scikit-learn
44
+ Requires-Dist: weblogo
45
+ Requires-Dist: bx-python
46
+ Requires-Dist: pandas
47
+ Requires-Dist: umap-learn
48
+ Requires-Dist: fancyimpute
49
+
50
+ ## Install CpGtools using [pip](https://pip.pypa.io/en/stable/)
51
+
52
+ 1. (Optional) Create Virtual Environments (Note: `venv` is available in Python 3.3 and later. You can also use [virtualenv](https://packaging.python.org/en/latest/key_projects/#virtualenv))
53
+
54
+ `$ python3 -m venv my_env` (will create a directory called my_env)
55
+
56
+ `$ source my_env/bin/activate`
57
+
58
+ 2. Install CpGtools
59
+
60
+ `$ pip install cpgtools`
61
+
62
+ or
63
+
64
+ `$ pip install git+https://github.com/liguowang/cpgtools.git`
65
+
66
+ 3. Upgrade
67
+
68
+ `$ pip install cpgtools --upgrade`
69
+
70
+ 4. Uninstall
71
+
72
+ `pip -y uninstall cpgtools`
73
+
74
+ ## Documentation
75
+
76
+ https://cpgtools.readthedocs.io/en/latest/
@@ -0,0 +1,27 @@
1
+ ## Install CpGtools using [pip](https://pip.pypa.io/en/stable/)
2
+
3
+ 1. (Optional) Create Virtual Environments (Note: `venv` is available in Python 3.3 and later. You can also use [virtualenv](https://packaging.python.org/en/latest/key_projects/#virtualenv))
4
+
5
+ `$ python3 -m venv my_env` (will create a directory called my_env)
6
+
7
+ `$ source my_env/bin/activate`
8
+
9
+ 2. Install CpGtools
10
+
11
+ `$ pip install cpgtools`
12
+
13
+ or
14
+
15
+ `$ pip install git+https://github.com/liguowang/cpgtools.git`
16
+
17
+ 3. Upgrade
18
+
19
+ `$ pip install cpgtools --upgrade`
20
+
21
+ 4. Uninstall
22
+
23
+ `pip -y uninstall cpgtools`
24
+
25
+ ## Documentation
26
+
27
+ https://cpgtools.readthedocs.io/en/latest/
@@ -0,0 +1,48 @@
1
+ #Declaring the build backend
2
+ [build-system]
3
+ requires = ["setuptools"]
4
+ build-backend = "setuptools.build_meta"
5
+
6
+ #Project's meta data
7
+ [project]
8
+ version = "2.0.3"
9
+ name = "cpgtools"
10
+ authors = [
11
+ {name="Liguo Wang", email="wangliguo78@gmail.com"},
12
+ ]
13
+ maintainers = [
14
+ {name = "Liguo Wang", email = "wangliguo78@gmail.com"}
15
+ ]
16
+ description = "Tools to analyze and visualize DNA methylation data"
17
+ readme = "README.md"
18
+ license = {file = "LICENSE"}
19
+ requires-python = ">=3.5"
20
+
21
+ dependencies = [
22
+ "numpy",
23
+ "scipy",
24
+ "scikit-learn",
25
+ "weblogo",
26
+ "bx-python",
27
+ "pandas",
28
+ "umap-learn",
29
+ "fancyimpute",
30
+ ]
31
+
32
+ classifiers=[
33
+ "Programming Language :: Python :: 3",
34
+ 'Development Status :: 4 - Beta',
35
+ "License :: OSI Approved :: MIT License",
36
+ 'Environment :: Console',
37
+ 'Intended Audience :: Science/Research',
38
+ 'Operating System :: MacOS :: MacOS X',
39
+ 'Operating System :: POSIX',
40
+ 'Topic :: Scientific/Engineering :: Bio-Informatics',
41
+ ]
42
+
43
+ keywords = ["DNA methylation", "EPIC", "450K", "850K", "935K", "RRBS", "WGBS"]
44
+
45
+ [project.urls]
46
+ Documentation = "https://cpgtools.readthedocs.io/en/latest/index.html"
47
+ Repository = "https://github.com/liguowang/cpgtools.git"
48
+
@@ -34,6 +34,7 @@ import numpy as np
34
34
  from scipy.stats import binom
35
35
 
36
36
  from optparse import OptionParser
37
+ from cpgmodule._version import __version__
37
38
  from cpgmodule import ireader
38
39
  from cpgmodule.utils import *
39
40
  from cpgmodule import BED
@@ -44,7 +45,6 @@ __author__ = "Liguo Wang"
44
45
  __copyright__ = "Copyleft"
45
46
  __credits__ = []
46
47
  __license__ = "GPL"
47
- __version__="2.0.0"
48
48
  __maintainer__ = "Liguo Wang"
49
49
  __email__ = "wang.liguo@mayo.edu"
50
50
  __status__ = "Development"
@@ -18,6 +18,7 @@ import subprocess
18
18
  import numpy as np
19
19
  from os.path import basename
20
20
  from optparse import OptionParser
21
+ from cpgmodule._version import __version__
21
22
  from cpgmodule import ireader
22
23
  from cpgmodule.utils import *
23
24
  from cpgmodule import BED
@@ -28,7 +29,6 @@ __author__ = "Liguo Wang"
28
29
  __copyright__ = "Copyleft"
29
30
  __credits__ = []
30
31
  __license__ = "GPL"
31
- __version__="0.1.9"
32
32
  __maintainer__ = "Liguo Wang"
33
33
  __email__ = "wang.liguo@mayo.edu"
34
34
  __status__ = "Development"
@@ -10,13 +10,12 @@ import sys,os
10
10
  from optparse import OptionParser
11
11
  from cpgmodule import ireader
12
12
  from cpgmodule.utils import *
13
-
13
+ from cpgmodule._version import __version__
14
14
 
15
15
  __author__ = "Liguo Wang"
16
16
  __copyright__ = "Copyleft"
17
17
  __credits__ = []
18
18
  __license__ = "GPL"
19
- __version__="2.0.0"
20
19
  __maintainer__ = "Liguo Wang"
21
20
  __email__ = "wang.liguo@mayo.edu"
22
21
  __status__ = "Development"
@@ -19,12 +19,12 @@ from cpgmodule import ireader
19
19
  from cpgmodule.utils import *
20
20
  from cpgmodule import BED
21
21
  from cpgmodule import extend_bed
22
+ from cpgmodule._version import __version__
22
23
 
23
24
  __author__ = "Liguo Wang"
24
25
  __copyright__ = "Copyleft"
25
26
  __credits__ = []
26
27
  __license__ = "GPL"
27
- __version__="2.0.0"
28
28
  __maintainer__ = "Liguo Wang"
29
29
  __email__ = "wang.liguo@mayo.edu"
30
30
  __status__ = "Development"
@@ -14,12 +14,12 @@ import numpy as np
14
14
  from optparse import OptionParser
15
15
  from cpgmodule import ireader
16
16
  from cpgmodule.utils import *
17
+ from cpgmodule._version import __version__
17
18
 
18
19
  __author__ = "Liguo Wang"
19
20
  __copyright__ = "Copyleft"
20
21
  __credits__ = []
21
22
  __license__ = "GPL"
22
- __version__="2.0.0"
23
23
  __maintainer__ = "Liguo Wang"
24
24
  __email__ = "wang.liguo@mayo.edu"
25
25
  __status__ = "Development"
@@ -34,12 +34,12 @@ from optparse import OptionParser
34
34
  from cpgmodule import ireader
35
35
  from cpgmodule.utils import *
36
36
  from cpgmodule import BED
37
+ from cpgmodule._version import __version__
37
38
 
38
39
  __author__ = "Liguo Wang"
39
40
  __copyright__ = "Copyleft"
40
41
  __credits__ = []
41
42
  __license__ = "GPL"
42
- __version__="2.0.0"
43
43
  __maintainer__ = "Liguo Wang"
44
44
  __email__ = "wang.liguo@mayo.edu"
45
45
  __status__ = "Development"
@@ -23,18 +23,16 @@ import sys,os
23
23
  import collections
24
24
  import subprocess
25
25
  import numpy as np
26
- #import re
27
26
  from optparse import OptionParser
28
27
  from cpgmodule import ireader
29
28
  from cpgmodule.utils import *
30
29
  from cpgmodule import BED
31
-
30
+ from cpgmodule._version import __version__
32
31
 
33
32
  __author__ = "Liguo Wang"
34
33
  __copyright__ = "Copyleft"
35
34
  __credits__ = []
36
35
  __license__ = "GPL"
37
- __version__="2.0.0"
38
36
  __maintainer__ = "Liguo Wang"
39
37
  __email__ = "wang.liguo@mayo.edu"
40
38
  __status__ = "Development"
@@ -17,12 +17,12 @@ from cpgmodule import ireader
17
17
  from cpgmodule.utils import *
18
18
  from cpgmodule import BED
19
19
  from cpgmodule.imotif import PSSM
20
+ from cpgmodule._version import __version__
20
21
 
21
22
  __author__ = "Liguo Wang"
22
23
  __copyright__ = "Copyleft"
23
24
  __credits__ = []
24
25
  __license__ = "GPL"
25
- __version__="2.0.0"
26
26
  __maintainer__ = "Liguo Wang"
27
27
  __email__ = "wang.liguo@mayo.edu"
28
28
  __status__ = "Development"
@@ -34,12 +34,12 @@ from optparse import OptionParser
34
34
  from cpgmodule import ireader
35
35
  from cpgmodule.utils import *
36
36
  from cpgmodule.region2gene import *
37
+ from cpgmodule._version import __version__
37
38
 
38
39
  __author__ = "Liguo Wang"
39
40
  __copyright__ = "Copyleft"
40
41
  __credits__ = []
41
42
  __license__ = "GPL"
42
- __version__="2.0.0"
43
43
  __maintainer__ = "Liguo Wang"
44
44
  __email__ = "wang.liguo@mayo.edu"
45
45
  __status__ = "Development"
@@ -32,6 +32,7 @@ import sys
32
32
  import subprocess
33
33
  from optparse import OptionParser
34
34
  from cpgmodule.utils import *
35
+ from cpgmodule._version import __version__
35
36
  import pandas as pd
36
37
  from sklearn.preprocessing import StandardScaler
37
38
  from sklearn.decomposition import PCA
@@ -40,15 +41,15 @@ __author__ = "Liguo Wang"
40
41
  __copyright__ = "Copyleft"
41
42
  __credits__ = []
42
43
  __license__ = "GPL"
43
- __version__="2.0.0"
44
44
  __maintainer__ = "Liguo Wang"
45
45
  __email__ = "wang.liguo@mayo.edu"
46
46
  __status__ = "Development"
47
47
 
48
48
  def pick_colors(n):
49
- my_colors = ['#e6194B', '#3cb44b', '#4363d8', '#f58231', '#911eb4', '#42d4f4', '#f032e6', '#bfef45', '#fabebe', '#469990', '#e6beff', '#9A6324', '#fffac8', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#a9a9a9','#ffe119']
49
+ my_colors = [
50
+ "#F0A3FF", "#0075DC", "#993F00", "#4C005C", "#191919", "#005C31", "#2BCE48", "#FFCC99", "#808080", "#94FFB5", "#8F7C00", "#9DCC00", "#C20088", "#003380", "#FFA405", "#FFA8BB", "#426600", "#FF0010", "#5EF1F2", "#00998F", "#E0FF66", "#740AFF", "#990000", "#FFFF80", "#FFE100", "#FF5005"]
50
51
  if n > len(my_colors):
51
- print ("Only support 21 different colors", file = sys.stderr)
52
+ print ("Only support 26 different colors", file = sys.stderr)
52
53
  sys.exit()
53
54
  return my_colors[0:n]
54
55
 
@@ -86,27 +87,30 @@ def main():
86
87
  df1 = pd.read_csv(options.input_file, index_col = 0, sep="\t")
87
88
 
88
89
  #remove NA and transpose
89
- df2 = df1.dropna(axis=0, how='any')
90
- printlog("%d rows with missing values were removed." % (len(df1) - len(df2)))
91
- #print (df2.head())
92
-
93
- printlog("Transposing data frame ...")
94
- df2 = df2.T
95
- #print (df2.head())
96
-
97
- printlog("Standarizing values ...")
98
- x = df2.values
99
- x = StandardScaler().fit_transform(x)
100
-
90
+ df2 = df1.dropna(axis=0, how='any').T
91
+ printlog("%d rows with missing values were removed." % (len(df1.index) - len(df2.columns)))
92
+
101
93
  printlog("Reading group file: \"%s\" ..." % (options.group_file))
102
94
  group = pd.read_csv(options.group_file, index_col=0, header=0,names=['Sample_ID', 'Group_ID'])
103
- group.index = group.index.map(str)
104
-
105
95
  #check if sample IDs are unique
106
96
  if len(group.index) != len(group.index.unique()):
107
97
  print ("Sample IDs are not unique", file = sys.stderr)
108
98
  sys.exit()
99
+ group.index = group.index.map(str)
100
+ printlog("Group file \"%s\" contains %d samples" % (options.group_file, len(group.index)))
101
+
102
+ printlog("Find common sample IDs between group file and data file ...")
103
+ common_samples = list(set(group.index) & set(df2.index))
104
+ used_df = df2.loc[common_samples]
105
+ (usable_sample, usable_cpg) = used_df.shape
106
+ printlog("Used CpGs: %d, Used samples: %d" % (usable_cpg, usable_sample))
107
+
109
108
 
109
+ printlog("Standarizing values ...")
110
+ x = used_df.to_numpy()
111
+ x = StandardScaler().fit_transform(x)
112
+
113
+
110
114
  group_names = group['Group_ID'].unique().tolist() # a list of unique group names
111
115
  color_names = pick_colors(len(group_names)) # a list of unique colors
112
116
  group_to_col = dict(zip(group_names, color_names))
@@ -116,9 +120,9 @@ def main():
116
120
  pca = PCA(n_components = options.n_components, random_state = 0)
117
121
  principalComponents = pca.fit_transform(x)
118
122
  pca_names = [str(i)+str(j) for i,j in zip(['PC']*options.n_components,range(1,options.n_components+1))]
119
- principalDf = pd.DataFrame(data = principalComponents, columns = pca_names, index = df2.index)
123
+ principalDf = pd.DataFrame(data = principalComponents, columns = pca_names, index = used_df.index)
120
124
 
121
- finalDf = pd.concat([principalDf, group], axis = 1, sort=False)
125
+ finalDf = pd.concat([principalDf, group], axis = 1, sort=False, join='inner')
122
126
  finalDf.index.name = 'Sample_ID'
123
127
 
124
128
  printlog("Writing PCA results to file: \"%s\" ..." % (options.out_file + '.PCA.tsv'))
@@ -133,18 +137,22 @@ def main():
133
137
 
134
138
  print ('pdf(file=\"%s\", width=8, height=8)' % (options.out_file + '.PCA.pdf'),file=ROUT)
135
139
  print ('')
136
- print ('d = read.table(file=\"%s\", sep="\\t", header=TRUE, comment.char = "", stringsAsFactors=FALSE)' % (options.out_file + '.PCA.tsv'), file=ROUT)
140
+ print ('d = read.table(file=\"%s\", sep="\\t", header=TRUE, comment.char = "", stringsAsFactors=FALSE)'
141
+ % (options.out_file + '.PCA.tsv'), file=ROUT)
137
142
  print ('attach(d)', file=ROUT)
138
143
  if options.plot_alpha:
139
144
  print ('library(scales)', file=ROUT)
140
- print ('plot(PC1, PC2, col = alpha(Colors, %f), pch=%d, cex=1.5, main="PCA 2D map")' % (options.plot_alpha, pch[options.plot_char]), file=ROUT)
145
+ print ('plot(PC1, PC2, col = alpha(Colors, %f), pch=%d, cex=1.5, main="PCA 2D map", xlab="PC1 (var. explained: %.2f%%)", ylab="PC2 (var. explained: %.2f%%)")'
146
+ % (options.plot_alpha, pch[options.plot_char], pca_vars[0]*100, pca_vars[1]*100), file=ROUT)
141
147
  else:
142
- print ('plot(PC1, PC2, col = Colors, pch=%d, cex=1.2, main="PCA 2D map")' % pch[options.plot_char], file=ROUT)
148
+ print ('plot(PC1, PC2, col = Colors, pch=%d, cex=1.2, main="PCA 2D map", xlab="PC1 (var. explained: %.2f%%)", ylab="PC2 (var. explained: %.2f%%)")'
149
+ % (pca_vars[0]*100, pca_vars[1]*100, pch[options.plot_char], pca_vars[0]*100, pca_vars[1]*100), file=ROUT)
143
150
 
144
151
  if options.text_label:
145
152
  print ('text(PC1, PC2, labels=Sample_ID, col = Colors, cex=0.5, pos=1)', file=ROUT)
146
153
 
147
- print ('legend("%s", legend=c(%s), col=c(%s), pch=%d,cex=1)' % (legend_pos[options.legend_location], ','.join(['"' + str(i) + '"' for i in group_names]), ','.join(['"' + str(group_to_col[i]) + '"' for i in group_names]), pch[options.plot_char]), file=ROUT)
154
+ print ('legend("%s", legend=c(%s), col=c(%s), pch=%d,cex=1)'
155
+ % (legend_pos[options.legend_location], ','.join(['"' + str(i) + '"' for i in group_names]), ','.join(['"' + str(group_to_col[i]) + '"' for i in group_names]), pch[options.plot_char]), file=ROUT)
148
156
 
149
157
 
150
158
  print ('dev.off()', file=ROUT)
@@ -32,6 +32,7 @@ import pandas as pd
32
32
  import subprocess
33
33
  from optparse import OptionParser
34
34
  from cpgmodule.utils import *
35
+ from cpgmodule._version import __version__
35
36
  from sklearn.preprocessing import StandardScaler
36
37
  #import datatable as dt
37
38
  #import seaborn as sns
@@ -41,15 +42,15 @@ __author__ = "Liguo Wang"
41
42
  __copyright__ = "Copyleft"
42
43
  __credits__ = []
43
44
  __license__ = "GPL"
44
- __version__="2.0.0"
45
45
  __maintainer__ = "Liguo Wang"
46
46
  __email__ = "wang.liguo@mayo.edu"
47
47
  __status__ = "Development"
48
48
 
49
49
  def pick_colors(n):
50
- my_colors = ['#e6194B', '#3cb44b', '#4363d8', '#f58231', '#911eb4', '#42d4f4', '#f032e6', '#bfef45', '#fabebe', '#469990', '#e6beff', '#9A6324', '#fffac8', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#a9a9a9','#ffe119']
50
+ my_colors = [
51
+ "#F0A3FF", "#0075DC", "#993F00", "#4C005C", "#191919", "#005C31", "#2BCE48", "#FFCC99", "#808080", "#94FFB5", "#8F7C00", "#9DCC00", "#C20088", "#003380", "#FFA405", "#FFA8BB", "#426600", "#FF0010", "#5EF1F2", "#00998F", "#E0FF66", "#740AFF", "#990000", "#FFFF80", "#FFE100", "#FF5005"]
51
52
  if n > len(my_colors):
52
- print ("Only support 21 different colors", file = sys.stderr)
53
+ print ("Only support 26 different colors", file = sys.stderr)
53
54
  sys.exit()
54
55
  return my_colors[0:n]
55
56
 
@@ -99,26 +100,28 @@ def main():
99
100
  df1 = pd.read_csv(options.input_file, index_col = 0, sep="\t")
100
101
 
101
102
  #remove NA and transpose
102
- df2 = df1.dropna(axis=0, how='any')
103
- printlog("%d rows with missing values were removed." % (len(df1) - len(df2)))
104
- #print (df2.head())
105
-
106
- printlog("Transposing data frame ...")
107
- df2 = df2.T
108
- #print (df2.head())
109
-
110
- printlog("Standarizing values ...")
111
- x = df2.values
112
- x = StandardScaler().fit_transform(x)
103
+ df2 = df1.dropna(axis=0, how='any').T
104
+ printlog("%d rows with missing values were removed." % (len(df1.index) - len(df2.columns)))
113
105
 
114
106
  printlog("Reading group file: \"%s\" ..." % (options.group_file))
115
107
  group = pd.read_csv(options.group_file, index_col=0, header=0,names=['Sample_ID', 'Group_ID'])
116
- group.index = group.index.map(str)
117
-
118
108
  #check if sample IDs are unique
119
109
  if len(group.index) != len(group.index.unique()):
120
110
  print ("Sample IDs are not unique", file = sys.stderr)
121
111
  sys.exit()
112
+ group.index = group.index.map(str)
113
+ printlog("Group file \"%s\" contains %d samples" % (options.group_file, len(group.index)))
114
+
115
+ printlog("Find common sample IDs between group file and data file ...")
116
+ common_samples = list(set(group.index) & set(df2.index))
117
+ used_df = df2.loc[common_samples]
118
+ (usable_sample, usable_cpg) = used_df.shape
119
+ printlog("Used CpGs: %d, Used samples: %d" % (usable_cpg, usable_sample))
120
+
121
+ printlog("Standarizing values ...")
122
+ x = used_df.to_numpy()
123
+ x = StandardScaler().fit_transform(x)
124
+
122
125
 
123
126
  group_names = group['Group_ID'].unique().tolist() # a list of unique group names
124
127
  color_names = pick_colors(len(group_names)) # a list of unique colors
@@ -133,9 +136,9 @@ def main():
133
136
  #pca = PCA(n_components = options.n_components, random_state = 0)
134
137
  #principalComponents = pca.fit_transform(x)
135
138
  pca_names = [str(i)+str(j) for i,j in zip(['UMAP']*options.n_components,range(1,options.n_components+1))]
136
- principalDf = pd.DataFrame(data = principalComponents, columns = pca_names, index = df2.index)
139
+ principalDf = pd.DataFrame(data = principalComponents, columns = pca_names, index = used_df.index)
137
140
 
138
- finalDf = pd.concat([principalDf, group], axis = 1, sort=False)
141
+ finalDf = pd.concat([principalDf, group], axis = 1, sort=False, join='inner')
139
142
  finalDf.index.name = 'Sample_ID'
140
143
 
141
144
  printlog("Writing UMAP results to file: \"%s\" ..." % (options.out_file + '.UMAP.tsv'))
@@ -146,18 +149,22 @@ def main():
146
149
 
147
150
  print ('pdf(file=\"%s\", width=8, height=8)' % (options.out_file + '.UMAP.pdf'),file=ROUT)
148
151
  print ('')
149
- print ('d = read.table(file=\"%s\", sep="\\t", header=TRUE, comment.char = "", stringsAsFactors=FALSE)' % (options.out_file + '.UMAP.tsv'), file=ROUT)
152
+ print ('d = read.table(file=\"%s\", sep="\\t", header=TRUE, comment.char = "", stringsAsFactors=FALSE)'
153
+ % (options.out_file + '.UMAP.tsv'), file=ROUT)
150
154
  print ('attach(d)', file=ROUT)
151
155
  if options.plot_alpha:
152
156
  print ('library(scales)', file=ROUT)
153
- print ('plot(UMAP1, UMAP2, col = alpha(Colors, %f), pch=%d, cex=1.5, main="UMAP 2D map", xlab="UMAP_1", ylab="UMAP_2")' % (options.plot_alpha, pch[options.plot_char]), file=ROUT)
157
+ print ('plot(UMAP1, UMAP2, col = alpha(Colors, %f), pch=%d, cex=1.5, main="UMAP 2D map", xlab="UMAP_1", ylab="UMAP_2")'
158
+ % (options.plot_alpha, pch[options.plot_char]), file=ROUT)
154
159
  else:
155
- print ('plot(UMAP1, UMAP2, col = Colors, pch=%d, cex=1.2, main="UMAP 2D map", xlab="UMAP_1", ylab="UMAP_2")' % pch[options.plot_char], file=ROUT)
160
+ print ('plot(UMAP1, UMAP2, col = Colors, pch=%d, cex=1.2, main="UMAP 2D map", xlab="UMAP_1", ylab="UMAP_2")'
161
+ % pch[options.plot_char], file=ROUT)
156
162
 
157
163
  if options.text_label:
158
164
  print ('text(UMAP1, UMAP2, labels=Sample_ID, col = Colors, cex=0.5, pos=1)', file=ROUT)
159
165
 
160
- print ('legend("%s", legend=c(%s), col=c(%s), pch=%d,cex=1)' % (legend_pos[options.legend_location], ','.join(['"' + str(i) + '"' for i in group_names]), ','.join(['"' + str(group_to_col[i]) + '"' for i in group_names]), pch[options.plot_char]), file=ROUT)
166
+ print ('legend("%s", legend=c(%s), col=c(%s), pch=%d,cex=1)'
167
+ % (legend_pos[options.legend_location], ','.join(['"' + str(i) + '"' for i in group_names]), ','.join(['"' + str(group_to_col[i]) + '"' for i in group_names]), pch[options.plot_char]), file=ROUT)
161
168
 
162
169
 
163
170
  print ('dev.off()', file=ROUT)
@@ -26,6 +26,7 @@ import sys,os
26
26
  import collections
27
27
  import subprocess
28
28
  import numpy as np
29
+ from cpgmodule._version import __version__
29
30
  from optparse import OptionParser
30
31
  from cpgmodule import ireader
31
32
  from cpgmodule.utils import *
@@ -36,7 +37,6 @@ __author__ = "Liguo Wang"
36
37
  __copyright__ = "Copyleft"
37
38
  __credits__ = []
38
39
  __license__ = "GPL"
39
- __version__="2.0.0"
40
40
  __maintainer__ = "Liguo Wang"
41
41
  __email__ = "wang.liguo@mayo.edu"
42
42
  __status__ = "Development"
@@ -18,6 +18,7 @@ import sys,os
18
18
  import collections
19
19
  import numpy as np
20
20
  from scipy import stats
21
+ from cpgmodule._version import __version__
21
22
  from optparse import OptionParser
22
23
  from cpgmodule import ireader
23
24
  from cpgmodule.utils import *
@@ -26,7 +27,6 @@ __author__ = "Liguo Wang"
26
27
  __copyright__ = "Copyleft"
27
28
  __credits__ = []
28
29
  __license__ = "GPL"
29
- __version__="2.0.0"
30
30
  __maintainer__ = "Liguo Wang"
31
31
  __email__ = "wang.liguo@mayo.edu"
32
32
  __status__ = "Development"
@@ -21,6 +21,7 @@ import collections
21
21
  import subprocess
22
22
  import numpy as np
23
23
  from optparse import OptionParser
24
+ from cpgmodule._version import __version__
24
25
  from cpgmodule import ireader
25
26
  from cpgmodule.utils import *
26
27
  from cpgmodule import BED
@@ -29,7 +30,6 @@ __author__ = "Liguo Wang"
29
30
  __copyright__ = "Copyleft"
30
31
  __credits__ = []
31
32
  __license__ = "GPL"
32
- __version__="2.0.0"
33
33
  __maintainer__ = "Liguo Wang"
34
34
  __email__ = "wang.liguo@mayo.edu"
35
35
  __status__ = "Development"
@@ -25,6 +25,7 @@ import collections
25
25
  import subprocess
26
26
  import numpy as np
27
27
  from optparse import OptionParser
28
+ from cpgmodule._version import __version__
28
29
  from cpgmodule import ireader
29
30
  from cpgmodule.utils import *
30
31
  from cpgmodule import BED
@@ -33,7 +34,6 @@ __author__ = "Liguo Wang"
33
34
  __copyright__ = "Copyleft"
34
35
  __credits__ = []
35
36
  __license__ = "GPL"
36
- __version__="2.0.0"
37
37
  __maintainer__ = "Liguo Wang"
38
38
  __email__ = "wang.liguo@mayo.edu"
39
39
  __status__ = "Development"