genal-python 0.9__tar.gz → 1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {genal_python-0.9 → genal_python-1.0}/.DS_Store +0 -0
- genal_python-1.0/Genal_flowchart.png +0 -0
- {genal_python-0.9 → genal_python-1.0}/PKG-INFO +24 -10
- {genal_python-0.9 → genal_python-1.0}/README.md +20 -6
- {genal_python-0.9 → genal_python-1.0}/docs/.DS_Store +0 -0
- {genal_python-0.9 → genal_python-1.0/docs}/requirements.txt +4 -3
- {genal_python-0.9 → genal_python-1.0}/docs/source/conf.py +1 -1
- {genal_python-0.9 → genal_python-1.0}/docs/source/index.rst +3 -3
- {genal_python-0.9 → genal_python-1.0}/docs/source/introduction.rst +13 -6
- {genal_python-0.9 → genal_python-1.0}/genal/Geno.py +6 -2
- {genal_python-0.9 → genal_python-1.0}/genal/MR.py +0 -2
- {genal_python-0.9 → genal_python-1.0}/genal/MR_tools.py +0 -1
- {genal_python-0.9 → genal_python-1.0}/genal/MRpresso.py +0 -3
- {genal_python-0.9 → genal_python-1.0}/genal/__init__.py +2 -2
- {genal_python-0.9 → genal_python-1.0}/genal/extract_prs.py +11 -8
- {genal_python-0.9 → genal_python-1.0}/genal/geno_tools.py +1 -1
- {genal_python-0.9 → genal_python-1.0}/genal/snp_query.py +49 -25
- {genal_python-0.9 → genal_python-1.0}/genal/tools.py +143 -18
- genal_python-1.0/genal_logo.png +0 -0
- {genal_python-0.9 → genal_python-1.0}/pyproject.toml +3 -3
- {genal_python-0.9 → genal_python-1.0}/.gitignore +0 -0
- {genal_python-0.9 → genal_python-1.0}/.readthedocs.yaml +0 -0
- {genal_python-0.9 → genal_python-1.0}/LICENSE +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/Makefile +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/.DS_Store +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/.buildinfo +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/.doctrees/api.doctree +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/.doctrees/environment.pickle +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/.doctrees/genal.doctree +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/.doctrees/index.doctree +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/.doctrees/introduction.doctree +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/.doctrees/modules.doctree +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_images/MR_plot_SBP_AS.png +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_modules/genal/Geno.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_modules/genal/MR.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_modules/genal/MR_tools.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_modules/genal/MRpresso.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_modules/genal/association.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_modules/genal/clump.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_modules/genal/extract_prs.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_modules/genal/geno_tools.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_modules/genal/lift.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_modules/genal/proxy.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_modules/genal/snp_query.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_modules/genal/tools.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_modules/index.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_sources/api.rst.txt +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_sources/genal.rst.txt +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_sources/index.rst.txt +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_sources/introduction.rst.txt +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_sources/modules.rst.txt +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/basic.css +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/badge_only.css +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/Roboto-Slab-Bold.woff +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/Roboto-Slab-Bold.woff2 +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/Roboto-Slab-Regular.woff +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/Roboto-Slab-Regular.woff2 +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/fontawesome-webfont.eot +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/fontawesome-webfont.svg +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/fontawesome-webfont.ttf +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/fontawesome-webfont.woff +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/fontawesome-webfont.woff2 +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/lato-bold-italic.woff +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/lato-bold-italic.woff2 +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/lato-bold.woff +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/lato-bold.woff2 +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/lato-normal-italic.woff +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/lato-normal-italic.woff2 +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/lato-normal.woff +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/lato-normal.woff2 +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/theme.css +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/doctools.js +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/documentation_options.js +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/file.png +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/js/badge_only.js +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/js/html5shiv-printshiv.min.js +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/js/html5shiv.min.js +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/js/theme.js +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/language_data.js +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/minus.png +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/plus.png +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/pygments.css +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/searchtools.js +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/_static/sphinx_highlight.js +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/api.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/genal.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/genindex.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/index.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/introduction.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/modules.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/objects.inv +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/py-modindex.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/search.html +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/build/searchindex.js +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/make.bat +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/source/.DS_Store +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/source/Images/MR_plot_SBP_AS.png +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/source/api.rst +0 -0
- {genal_python-0.9 → genal_python-1.0}/docs/source/modules.rst +0 -0
- {genal_python-0.9 → genal_python-1.0}/genal/association.py +0 -0
- {genal_python-0.9 → genal_python-1.0}/genal/clump.py +0 -0
- {genal_python-0.9 → genal_python-1.0}/genal/constants.py +0 -0
- {genal_python-0.9 → genal_python-1.0}/genal/lift.py +0 -0
- {genal_python-0.9 → genal_python-1.0}/genal/proxy.py +0 -0
- {genal_python-0.9 → genal_python-1.0}/gitignore +0 -0
- {genal_python-0.9 → genal_python-1.0}/readthedocs.yaml +0 -0
|
Binary file
|
|
Binary file
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: genal-python
|
|
3
|
-
Version: 0
|
|
3
|
+
Version: 1.0
|
|
4
4
|
Summary: A python toolkit for polygenic risk scoring and mendelian randomization.
|
|
5
5
|
Author-email: Cyprien Rivier <riviercyprien@gmail.com>
|
|
6
|
-
Requires-Python: >=3.
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
@@ -16,12 +16,16 @@ Requires-Dist: plotnine==0.12.3
|
|
|
16
16
|
Requires-Dist: psutil==5.9.1
|
|
17
17
|
Requires-Dist: pyliftover==0.4
|
|
18
18
|
Requires-Dist: scikit_learn>=1.3.0
|
|
19
|
-
Requires-Dist: scipy>=1.11
|
|
19
|
+
Requires-Dist: scipy>=1.10.1, <1.11
|
|
20
20
|
Requires-Dist: statsmodels==0.14.0
|
|
21
21
|
Requires-Dist: tqdm==4.66.1
|
|
22
22
|
Requires-Dist: wget==3.2
|
|
23
23
|
Project-URL: Home, https://github.com/CypRiv/genal
|
|
24
24
|
|
|
25
|
+
[](https://www.python.org/downloads/release/python-3100/)
|
|
26
|
+
|
|
27
|
+
<img src="/genal_logo.png" data-canonical-src="/genal_logo.png" height="80" />
|
|
28
|
+
|
|
25
29
|
<center><h1> genal: A Python Toolkit for Genetic Risk Scoring and Mendelian Randomization </h1></center>
|
|
26
30
|
|
|
27
31
|
|
|
@@ -54,12 +58,15 @@ The module prioritizes user-friendliness and intuitive operation, aiming to redu
|
|
|
54
58
|
|
|
55
59
|
Genal draws on concepts from well-established R packages such as TwoSampleMR, MR-Presso, MendelianRandomization, and gwasvcf, adapting their proven methodologies to the Python environment. This approach ensures that users have access to tried and tested techniques with the versatility of Python's data science tools.
|
|
56
60
|
|
|
61
|
+
<img src="/Genal_flowchart.png" data-canonical-src="/Genal_flowchart.png" style="max-width:100%;" />
|
|
62
|
+
|
|
63
|
+
Genal flowchart. Created in https://www.BioRender.com
|
|
57
64
|
## Citation <a name="citation"></a>
|
|
58
65
|
If you're using genal, please cite the following paper:
|
|
59
66
|
**Genal: A Python Toolkit for Genetic Risk Scoring and Mendelian Randomization.** Cyprien A. Rivier, Santiago Clocchiatti-Tuozzo, Shufan Huo, Victor Torres-Lopez, Daniela Renedo, Kevin N. Sheth, Guido J. Falcone, Julian N. Acosta. medRxiv 2024.05.23.24307776; doi: https://doi.org/10.1101/2024.05.23.24307776
|
|
60
67
|
|
|
61
68
|
## Requirements for the genal module <a name="paragraph1"></a>
|
|
62
|
-
***Python 3.
|
|
69
|
+
***Python 3.8 or later***. https://www.python.org/ <br>
|
|
63
70
|
|
|
64
71
|
|
|
65
72
|
## Installation and How to use the genal module <a name="paragraph2"></a>
|
|
@@ -70,7 +77,7 @@ If you're using genal, please cite the following paper:
|
|
|
70
77
|
>
|
|
71
78
|
> **Optional**: It is recommended to create a new environment to avoid dependencies conflicts. Here, we create a new conda environment called 'genal_env'.
|
|
72
79
|
> ```
|
|
73
|
-
> conda create --name genal_env python=3.
|
|
80
|
+
> conda create --name genal_env python=3.8
|
|
74
81
|
> conda activate genal_env
|
|
75
82
|
> ```
|
|
76
83
|
|
|
@@ -84,12 +91,19 @@ And import it in a python environment with:
|
|
|
84
91
|
import genal
|
|
85
92
|
```
|
|
86
93
|
|
|
87
|
-
The main genal functionalities require a working installation of PLINK v1.9
|
|
88
|
-
|
|
94
|
+
The main genal functionalities require a working installation of PLINK v1.9 (and not 2.0 as certain functionalities have not been updated yet).
|
|
95
|
+
If you have already installed plink v1.9, you can set the path to its executable with:
|
|
89
96
|
|
|
90
97
|
```
|
|
91
98
|
genal.set_plink(path="/path/to/plink/executable/file")
|
|
92
99
|
```
|
|
100
|
+
|
|
101
|
+
If plink is not installed, genal can install the correct version for your system with the following line:
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
genal.install_plink()
|
|
105
|
+
```
|
|
106
|
+
|
|
93
107
|
### Documentation <a name="paragraph2.2"></a>
|
|
94
108
|
|
|
95
109
|
For detailed information on how to use the functionalities of Genal, please refer to the documentation: https://genal.rtfd.io
|
|
@@ -124,7 +138,7 @@ For this tutorial, we will obtain genetic instruments for systolic blood pressur
|
|
|
124
138
|
|
|
125
139
|
### Data loading <a name="paragraph3.1"></a>
|
|
126
140
|
|
|
127
|
-
We start this tutorial with publicly available summary statistics from a large GWAS study of systolic blood pressure. [Link to study](https://www.nature.com/articles/s41588-018-0205-x). After downloading and unzipping the summary statistics, we load them into a pandas DataFrame:
|
|
141
|
+
We start this tutorial with publicly available summary statistics from a large GWAS study of systolic blood pressure. [Link to study](https://www.nature.com/articles/s41588-018-0205-x). [Download link](http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST006001-GCST007000/GCST006624/Evangelou_30224653_SBP.txt.gz). After downloading and unzipping the summary statistics, we load them into a pandas DataFrame:
|
|
128
142
|
|
|
129
143
|
```python
|
|
130
144
|
import pandas as pd
|
|
@@ -378,7 +392,7 @@ You can customize how the proxies are chosen with the following arguments:
|
|
|
378
392
|
|
|
379
393
|
To run MR, we need to load both our exposure and outcome SNP-level data in `genal.Geno` instances. In our case, the genetic instruments of the MR are the SNPs associated with blood pressure at genome-wide significant levels resulting from the clumping of the blood pressure GWAS. They are stored in our `SBP_clumped` `genal.Geno` instance which also include their association with the exposure trait (instrument-SBP estimates in the `BETA` column).
|
|
380
394
|
|
|
381
|
-
To get their association with the outcome trait (instrument-stroke estimates), we are going to use SNP-level data from a large GWAS of stroke performed by the GIGASTROKE consortium (
|
|
395
|
+
To get their association with the outcome trait (instrument-stroke estimates), we are going to use SNP-level data from a large GWAS of stroke performed by the GIGASTROKE consortium: [Link to study](https://www.nature.com/articles/s41586-022-05165-3). [Link to download](http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST90104001-GCST90105000/GCST90104539/GCST90104539_buildGRCh37.tsv.gz):
|
|
382
396
|
|
|
383
397
|
```python
|
|
384
398
|
stroke_gwas = pd.read_csv("GCST90104539_buildGRCh37.tsv",sep="\t")
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
[](https://www.python.org/downloads/release/python-3100/)
|
|
2
|
+
|
|
3
|
+
<img src="/genal_logo.png" data-canonical-src="/genal_logo.png" height="80" />
|
|
4
|
+
|
|
1
5
|
<center><h1> genal: A Python Toolkit for Genetic Risk Scoring and Mendelian Randomization </h1></center>
|
|
2
6
|
|
|
3
7
|
|
|
@@ -30,12 +34,15 @@ The module prioritizes user-friendliness and intuitive operation, aiming to redu
|
|
|
30
34
|
|
|
31
35
|
Genal draws on concepts from well-established R packages such as TwoSampleMR, MR-Presso, MendelianRandomization, and gwasvcf, adapting their proven methodologies to the Python environment. This approach ensures that users have access to tried and tested techniques with the versatility of Python's data science tools.
|
|
32
36
|
|
|
37
|
+
<img src="/Genal_flowchart.png" data-canonical-src="/Genal_flowchart.png" style="max-width:100%;" />
|
|
38
|
+
|
|
39
|
+
Genal flowchart. Created in https://www.BioRender.com
|
|
33
40
|
## Citation <a name="citation"></a>
|
|
34
41
|
If you're using genal, please cite the following paper:
|
|
35
42
|
**Genal: A Python Toolkit for Genetic Risk Scoring and Mendelian Randomization.** Cyprien A. Rivier, Santiago Clocchiatti-Tuozzo, Shufan Huo, Victor Torres-Lopez, Daniela Renedo, Kevin N. Sheth, Guido J. Falcone, Julian N. Acosta. medRxiv 2024.05.23.24307776; doi: https://doi.org/10.1101/2024.05.23.24307776
|
|
36
43
|
|
|
37
44
|
## Requirements for the genal module <a name="paragraph1"></a>
|
|
38
|
-
***Python 3.
|
|
45
|
+
***Python 3.8 or later***. https://www.python.org/ <br>
|
|
39
46
|
|
|
40
47
|
|
|
41
48
|
## Installation and How to use the genal module <a name="paragraph2"></a>
|
|
@@ -46,7 +53,7 @@ If you're using genal, please cite the following paper:
|
|
|
46
53
|
>
|
|
47
54
|
> **Optional**: It is recommended to create a new environment to avoid dependencies conflicts. Here, we create a new conda environment called 'genal_env'.
|
|
48
55
|
> ```
|
|
49
|
-
> conda create --name genal_env python=3.
|
|
56
|
+
> conda create --name genal_env python=3.8
|
|
50
57
|
> conda activate genal_env
|
|
51
58
|
> ```
|
|
52
59
|
|
|
@@ -60,12 +67,19 @@ And import it in a python environment with:
|
|
|
60
67
|
import genal
|
|
61
68
|
```
|
|
62
69
|
|
|
63
|
-
The main genal functionalities require a working installation of PLINK v1.9
|
|
64
|
-
|
|
70
|
+
The main genal functionalities require a working installation of PLINK v1.9 (and not 2.0 as certain functionalities have not been updated yet).
|
|
71
|
+
If you have already installed plink v1.9, you can set the path to its executable with:
|
|
65
72
|
|
|
66
73
|
```
|
|
67
74
|
genal.set_plink(path="/path/to/plink/executable/file")
|
|
68
75
|
```
|
|
76
|
+
|
|
77
|
+
If plink is not installed, genal can install the correct version for your system with the following line:
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
genal.install_plink()
|
|
81
|
+
```
|
|
82
|
+
|
|
69
83
|
### Documentation <a name="paragraph2.2"></a>
|
|
70
84
|
|
|
71
85
|
For detailed information on how to use the functionalities of Genal, please refer to the documentation: https://genal.rtfd.io
|
|
@@ -100,7 +114,7 @@ For this tutorial, we will obtain genetic instruments for systolic blood pressur
|
|
|
100
114
|
|
|
101
115
|
### Data loading <a name="paragraph3.1"></a>
|
|
102
116
|
|
|
103
|
-
We start this tutorial with publicly available summary statistics from a large GWAS study of systolic blood pressure. [Link to study](https://www.nature.com/articles/s41588-018-0205-x). After downloading and unzipping the summary statistics, we load them into a pandas DataFrame:
|
|
117
|
+
We start this tutorial with publicly available summary statistics from a large GWAS study of systolic blood pressure. [Link to study](https://www.nature.com/articles/s41588-018-0205-x). [Download link](http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST006001-GCST007000/GCST006624/Evangelou_30224653_SBP.txt.gz). After downloading and unzipping the summary statistics, we load them into a pandas DataFrame:
|
|
104
118
|
|
|
105
119
|
```python
|
|
106
120
|
import pandas as pd
|
|
@@ -354,7 +368,7 @@ You can customize how the proxies are chosen with the following arguments:
|
|
|
354
368
|
|
|
355
369
|
To run MR, we need to load both our exposure and outcome SNP-level data in `genal.Geno` instances. In our case, the genetic instruments of the MR are the SNPs associated with blood pressure at genome-wide significant levels resulting from the clumping of the blood pressure GWAS. They are stored in our `SBP_clumped` `genal.Geno` instance which also include their association with the exposure trait (instrument-SBP estimates in the `BETA` column).
|
|
356
370
|
|
|
357
|
-
To get their association with the outcome trait (instrument-stroke estimates), we are going to use SNP-level data from a large GWAS of stroke performed by the GIGASTROKE consortium (
|
|
371
|
+
To get their association with the outcome trait (instrument-stroke estimates), we are going to use SNP-level data from a large GWAS of stroke performed by the GIGASTROKE consortium: [Link to study](https://www.nature.com/articles/s41586-022-05165-3). [Link to download](http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST90104001-GCST90105000/GCST90104539/GCST90104539_buildGRCh37.tsv.gz):
|
|
358
372
|
|
|
359
373
|
```python
|
|
360
374
|
stroke_gwas = pd.read_csv("GCST90104539_buildGRCh37.tsv",sep="\t")
|
|
Binary file
|
|
@@ -1,13 +1,14 @@
|
|
|
1
|
+
sphinx
|
|
2
|
+
sphinx_rtd_theme
|
|
1
3
|
aiohttp==3.9.5
|
|
2
4
|
nest_asyncio==1.5.5
|
|
3
|
-
numpy>=1.24.4
|
|
5
|
+
numpy>=1.24.4,<2.0
|
|
4
6
|
pandas>=2.0.3
|
|
5
7
|
plotnine==0.12.3
|
|
6
8
|
psutil==5.9.1
|
|
7
9
|
pyliftover==0.4
|
|
8
10
|
scikit_learn>=1.3.0
|
|
9
11
|
scipy>=1.11.4
|
|
10
|
-
sphinx_rtd_theme==1.3.0
|
|
11
12
|
statsmodels==0.14.0
|
|
12
13
|
tqdm==4.66.1
|
|
13
|
-
wget==3.2
|
|
14
|
+
wget==3.2
|
|
@@ -13,7 +13,7 @@ sys.path.insert(0, os.path.abspath('../../'))
|
|
|
13
13
|
project = 'genal'
|
|
14
14
|
copyright = '2023, Cyprien A. Rivier'
|
|
15
15
|
author = 'Cyprien A. Rivier'
|
|
16
|
-
release = '
|
|
16
|
+
release = 'v1.0'
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
# -- General configuration ---------------------------------------------------
|
|
@@ -6,9 +6,9 @@
|
|
|
6
6
|
genal: A Python Toolkit for Genetic Risk Scoring and Mendelian Randomization
|
|
7
7
|
============================================================================
|
|
8
8
|
|
|
9
|
-
:Author: Cyprien Rivier
|
|
9
|
+
:Author: Cyprien A. Rivier
|
|
10
10
|
:Date: |today|
|
|
11
|
-
:Version: "0
|
|
11
|
+
:Version: "1.0"
|
|
12
12
|
|
|
13
13
|
Genal is a python module designed to make it easy to run genetic risk scores and mendelian randomization analyses. It integrates a collection of tools that facilitate the cleaning of single nucleotide polymorphism data (usually derived from Genome-Wide Association Studies) and enable the execution of key clinical population genetic workflows. The functionalities provided by genal include clumping, lifting, association testing, polygenic risk scoring, and Mendelian randomization analyses, all within a single Python module.
|
|
14
14
|
|
|
@@ -46,7 +46,7 @@ Citation
|
|
|
46
46
|
If you use genal in your work, please cite the following paper:
|
|
47
47
|
|
|
48
48
|
.. [Rivier.2024] *Genal: A Python Toolkit for Genetic Risk Scoring and Mendelian Randomization*
|
|
49
|
-
Cyprien
|
|
49
|
+
Cyprien A. Rivier, Santiago Clocchiatti-Tuozzo, Shufan Huo, Victor Torres-Lopez, Daniela Renedo, Kevin N. Sheth, Guido J. Falcone, Julian N. Acosta.
|
|
50
50
|
medRxiv. 2024 May `10.1101/2024.05.23.24307776 <https://doi.org/10.1101/2024.05.23.24307776>`_.
|
|
51
51
|
|
|
52
52
|
References
|
|
@@ -7,10 +7,10 @@ Installation
|
|
|
7
7
|
|
|
8
8
|
.. code-block:: bash
|
|
9
9
|
|
|
10
|
-
conda create --name genal_env python=3.
|
|
10
|
+
conda create --name genal_env python=3.8
|
|
11
11
|
conda activate genal_env
|
|
12
12
|
|
|
13
|
-
The genal package requires Python 3.
|
|
13
|
+
The genal package requires Python 3.8 or later. Download and install it with pip:
|
|
14
14
|
|
|
15
15
|
.. code-block:: bash
|
|
16
16
|
|
|
@@ -22,13 +22,19 @@ And import it in a python environment with:
|
|
|
22
22
|
|
|
23
23
|
import genal
|
|
24
24
|
|
|
25
|
-
The main genal functionalities require a working installation of PLINK v1.9 (and not 2.0 as certain functionalities have not been updated yet)
|
|
26
|
-
|
|
25
|
+
The main genal functionalities require a working installation of PLINK v1.9 (and not 2.0 as certain functionalities have not been updated yet).
|
|
26
|
+
If you have already installed plink v1.9, you can set the path to its executable with:
|
|
27
27
|
|
|
28
28
|
.. code-block:: python
|
|
29
29
|
|
|
30
30
|
genal.set_plink(path="/path/to/plink/executable/file")
|
|
31
31
|
|
|
32
|
+
If plink is not installed, genal can install the correct version for your system with the :meth:`~genal.tools.install_plink` function:
|
|
33
|
+
|
|
34
|
+
.. code-block:: python
|
|
35
|
+
|
|
36
|
+
genal.install_plink()
|
|
37
|
+
|
|
32
38
|
========
|
|
33
39
|
Tutorial
|
|
34
40
|
========
|
|
@@ -51,7 +57,7 @@ h. `GWAS Catalog`_
|
|
|
51
57
|
Data loading
|
|
52
58
|
============
|
|
53
59
|
|
|
54
|
-
We start this tutorial with publicly available summary statistics data from a large GWAS of systolic blood pressure
|
|
60
|
+
We start this tutorial with publicly available summary statistics data from a large GWAS of systolic blood pressure `Link to study <https://www.nature.com/articles/s41588-018-0205-x>`_. `Download link <http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST006001-GCST007000/GCST006624/Evangelou_30224653_SBP.txt.gz>`_. After downloading and unzipping the summary statistics, we load the data into a pandas dataframe:
|
|
55
61
|
|
|
56
62
|
.. code-block:: python
|
|
57
63
|
|
|
@@ -318,7 +324,8 @@ Mendelian Randomization
|
|
|
318
324
|
|
|
319
325
|
To run MR, we need to load both our exposure and outcome SNP-level data in :class:`~genal.Geno` instances. In our case, the genetic instruments of the MR are the SNPs associated with blood pressure at genome-wide significant levels resulting from the clumping of the blood pressure GWAS. They are stored in our ``SBP_clumped`` :class:`~genal.Geno` instance which also include their association with the exposure trait (instrument-SBP estimates in the ``BETA`` column).
|
|
320
326
|
|
|
321
|
-
To get their association with the outcome trait (instrument-stroke estimates), we are going to use SNP-level data from a large GWAS of stroke performed by the GIGASTROKE consortium
|
|
327
|
+
To get their association with the outcome trait (instrument-stroke estimates), we are going to use SNP-level data from a large GWAS of stroke performed by the GIGASTROKE consortium:
|
|
328
|
+
`Link to study <https://www.nature.com/articles/s41586-022-05165-3>`_. `Download link <http://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST90104001-GCST90105000/GCST90104539/GCST90104539_buildGRCh37.tsv.gz>`_.
|
|
322
329
|
|
|
323
330
|
.. code-block:: python
|
|
324
331
|
|
|
@@ -223,6 +223,8 @@ class Geno:
|
|
|
223
223
|
and "EA" in data.columns
|
|
224
224
|
)
|
|
225
225
|
if missing_nea_condition and preprocessing in ['Fill', 'Fill_delete']:
|
|
226
|
+
check_allele_column(data, "EA", keep_multi)
|
|
227
|
+
self.checks["EA"] = True
|
|
226
228
|
data = fill_nea(data, self.get_reference_panel(reference_panel))
|
|
227
229
|
|
|
228
230
|
# Fill missing EA and NEA columns from reference data if necessary and preprocessing is enabled
|
|
@@ -254,7 +256,7 @@ class Geno:
|
|
|
254
256
|
check_allele_condition = (allele_col in data.columns) and (
|
|
255
257
|
(preprocessing in ['Fill', 'Fill_delete']) or (not keep_multi)
|
|
256
258
|
)
|
|
257
|
-
if check_allele_condition:
|
|
259
|
+
if check_allele_condition and not self.checks[allele_col]:
|
|
258
260
|
check_allele_column(data, allele_col, keep_multi)
|
|
259
261
|
self.checks[allele_col] = True
|
|
260
262
|
|
|
@@ -687,7 +689,9 @@ class Geno:
|
|
|
687
689
|
snp_list = data["SNP"]
|
|
688
690
|
|
|
689
691
|
# Extract SNPs using the provided path and SNP list
|
|
690
|
-
|
|
692
|
+
path = extract_snps_func(snp_list, self.name, path)
|
|
693
|
+
if path == "FAILED":
|
|
694
|
+
raise ValueError("No SNPs were extracted from the genetic data and the association test can't be run.")
|
|
691
695
|
|
|
692
696
|
# Perform the association test
|
|
693
697
|
updated_data = association_test_func(
|
|
@@ -196,8 +196,6 @@ def mr_egger_regression(BETA_e, SE_e, BETA_o, SE_o):
|
|
|
196
196
|
SE_e (numpy array): Standard errors corresponding to `BETA_e`.
|
|
197
197
|
BETA_o (numpy array): Effect sizes of the same genetic variants on the outcome.
|
|
198
198
|
SE_o (numpy array): Standard errors corresponding to `BETA_o`.
|
|
199
|
-
nboot (int): Number of boostrap iterations to obtain the standard error and p-value
|
|
200
|
-
cpus (int): Number of cpu cores to use in parallel for the boostrapping iterations.
|
|
201
199
|
|
|
202
200
|
Returns:
|
|
203
201
|
list of dict: A list containing two dictionaries with the results for the egger regression estimate and the egger regression intercept (horizontal pleiotropy estimate):
|
|
@@ -2,8 +2,6 @@ import numpy as np
|
|
|
2
2
|
import pandas as pd
|
|
3
3
|
import statsmodels.api as sm
|
|
4
4
|
import statsmodels.formula.api as smf
|
|
5
|
-
from scipy import stats
|
|
6
|
-
from scipy.stats import norm, chi2
|
|
7
5
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
8
6
|
from sklearn.linear_model import LinearRegression
|
|
9
7
|
from tqdm import tqdm
|
|
@@ -11,7 +9,6 @@ from numpy.random import default_rng
|
|
|
11
9
|
from functools import partial
|
|
12
10
|
|
|
13
11
|
##todo: implement the multivariable option, for the moment we assume only 1 BETA_e column
|
|
14
|
-
# Also: check if we can replace the LinearRegression of sklearn with one from statsmodels to avoid using sklearn just for that
|
|
15
12
|
|
|
16
13
|
|
|
17
14
|
# MR-PRESSO main function
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import json
|
|
3
|
-
from .tools import default_config, write_config, set_plink, delete_tmp, get_reference_panel_path
|
|
3
|
+
from .tools import default_config, write_config, set_plink, install_plink, delete_tmp, get_reference_panel_path
|
|
4
4
|
from .geno_tools import Combine_Geno
|
|
5
5
|
|
|
6
|
-
__version__ = "0
|
|
6
|
+
__version__ = "1.0"
|
|
7
7
|
|
|
8
8
|
config_dir = os.path.expanduser(
|
|
9
9
|
"~/.genal/"
|
|
@@ -121,14 +121,17 @@ def extract_snps_func(snp_list, name, path=None):
|
|
|
121
121
|
else:
|
|
122
122
|
extract_snps_from_combined_data(name, path, output_path, snp_list_path)
|
|
123
123
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
124
|
+
#Check that at least 1 variant has been extracted. If not, return "FAILED" to warn downstream functions (prs, association_test)
|
|
125
|
+
log_path = output_path + ".log"
|
|
126
|
+
with open(log_path, 'r') as log_file:
|
|
127
|
+
if "No variants remaining" in log_file.read():
|
|
128
|
+
print("None of the provided SNPs were found in the genetic data.")
|
|
129
|
+
return "FAILED"
|
|
130
|
+
|
|
131
|
+
else:
|
|
132
|
+
print(f"Created bed/bim/fam fileset with extracted SNPs: {output_path}")
|
|
133
|
+
# Report SNPs not found
|
|
134
|
+
report_snps_not_found(nrow, name)
|
|
132
135
|
|
|
133
136
|
return output_path
|
|
134
137
|
|
|
@@ -121,7 +121,7 @@ def check_beta_column(data, effect_column, preprocessing):
|
|
|
121
121
|
"The argument effect_column accepts only 'BETA' or 'OR' as values."
|
|
122
122
|
)
|
|
123
123
|
if effect_column == "OR":
|
|
124
|
-
data["BETA"] = np.log(data["BETA"])
|
|
124
|
+
data["BETA"] = np.log(data["BETA"].clip(lower=0.01))
|
|
125
125
|
data.drop(columns="SE", errors="ignore", inplace=True)
|
|
126
126
|
print("The BETA column has been log-transformed to obtain Beta estimates.")
|
|
127
127
|
return
|
|
@@ -2,7 +2,7 @@ import aiohttp
|
|
|
2
2
|
import asyncio
|
|
3
3
|
import numpy as np
|
|
4
4
|
import nest_asyncio
|
|
5
|
-
from tqdm.
|
|
5
|
+
from tqdm.auto import tqdm
|
|
6
6
|
|
|
7
7
|
# Using nest_asyncio to allow execution in notebooks
|
|
8
8
|
nest_asyncio.apply()
|
|
@@ -10,8 +10,16 @@ nest_asyncio.apply()
|
|
|
10
10
|
# Main function to start the event loop and run the asynchronous query
|
|
11
11
|
def async_query_gwas_catalog(snps, p_threshold=5e-8, return_p=False, return_study=False,
|
|
12
12
|
max_associations=None, timeout=100):
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
try:
|
|
14
|
+
loop = asyncio.get_event_loop()
|
|
15
|
+
except RuntimeError:
|
|
16
|
+
loop = asyncio.new_event_loop()
|
|
17
|
+
asyncio.set_event_loop(loop)
|
|
18
|
+
results_global, errors, timeouts = loop.run_until_complete(
|
|
19
|
+
query_gwas_catalog_coroutine(
|
|
20
|
+
snps, p_threshold, return_p, return_study, max_associations, timeout
|
|
21
|
+
)
|
|
22
|
+
)
|
|
15
23
|
return results_global, errors, timeouts
|
|
16
24
|
|
|
17
25
|
|
|
@@ -36,18 +44,21 @@ async def query_gwas_catalog_coroutine(snps, p_threshold=5e-8, return_p=False, r
|
|
|
36
44
|
"""
|
|
37
45
|
|
|
38
46
|
results_global = {} # Dictionary storing the SNP (keys) and results for each SNP: a list of single strings or tuples
|
|
39
|
-
errors = []
|
|
40
|
-
timeouts = []
|
|
47
|
+
errors = [] # List storing SNP for which the GWAS Catalog could not be queried
|
|
48
|
+
timeouts = [] # List storing SNP for which the timeout was reached
|
|
41
49
|
|
|
42
|
-
async def fetch(session, url,
|
|
50
|
+
async def fetch(session, url, timeout_duration=timeout):
|
|
43
51
|
try:
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
return
|
|
52
|
+
# Wrap the entire fetch operation with asyncio.wait_for for timeout
|
|
53
|
+
response = await asyncio.wait_for(session.get(url), timeout=timeout_duration)
|
|
54
|
+
async with response:
|
|
55
|
+
if response.status == 200:
|
|
56
|
+
return await response.json()
|
|
57
|
+
return None
|
|
49
58
|
except asyncio.TimeoutError:
|
|
50
59
|
return "TIMEOUT"
|
|
60
|
+
except aiohttp.ClientError:
|
|
61
|
+
return "ERROR"
|
|
51
62
|
|
|
52
63
|
async def process_snp(session, snp):
|
|
53
64
|
#print(f"Processing SNP {snp}")
|
|
@@ -55,11 +66,13 @@ async def query_gwas_catalog_coroutine(snps, p_threshold=5e-8, return_p=False, r
|
|
|
55
66
|
results_snp = [] # List storing the results for each association found for this SNP
|
|
56
67
|
|
|
57
68
|
base_url = f"https://www.ebi.ac.uk/gwas/rest/api/singleNucleotidePolymorphisms/{snp}/associations?projection=associationBySnp"
|
|
58
|
-
base_data = await fetch(session, base_url,
|
|
69
|
+
base_data = await fetch(session, base_url, timeout_duration=timeout)
|
|
59
70
|
|
|
60
71
|
if base_data == "TIMEOUT":
|
|
61
72
|
timeouts.append(snp)
|
|
62
|
-
elif base_data:
|
|
73
|
+
elif base_data == "ERROR" or base_data is None:
|
|
74
|
+
errors.append(snp)
|
|
75
|
+
else:
|
|
63
76
|
i = 0
|
|
64
77
|
# Process each association found for this SNP
|
|
65
78
|
for assoc in base_data.get('_embedded', {}).get('associations', []):
|
|
@@ -72,13 +85,25 @@ async def query_gwas_catalog_coroutine(snps, p_threshold=5e-8, return_p=False, r
|
|
|
72
85
|
pvalue = assoc.get("pvalue", np.nan)
|
|
73
86
|
# If the pvalue of the association does not pass the threshold, the association is not processed further nor reported
|
|
74
87
|
if pvalue < p_threshold:
|
|
75
|
-
|
|
88
|
+
efo_traits = assoc.get("efoTraits", [])
|
|
89
|
+
if efo_traits:
|
|
90
|
+
trait = efo_traits[0].get("trait", "")
|
|
91
|
+
else:
|
|
92
|
+
trait = ""
|
|
76
93
|
|
|
77
94
|
# If the return_study flag is active: query the page containing the GWAS Catalog study ID
|
|
78
95
|
if return_study:
|
|
79
|
-
study_url = assoc.get("_links", {}).get("study", {}).get("href",
|
|
80
|
-
|
|
81
|
-
|
|
96
|
+
study_url = assoc.get("_links", {}).get("study", {}).get("href", "")
|
|
97
|
+
if study_url:
|
|
98
|
+
study_data = await fetch(session, study_url, timeout_duration=timeout)
|
|
99
|
+
if study_data == "TIMEOUT":
|
|
100
|
+
study_id = "TIMEOUT"
|
|
101
|
+
elif study_data == "ERROR" or study_data is None:
|
|
102
|
+
study_id = "Error"
|
|
103
|
+
else:
|
|
104
|
+
study_id = study_data.get("accessionId", "Not found")
|
|
105
|
+
else:
|
|
106
|
+
study_id = "Not available"
|
|
82
107
|
else:
|
|
83
108
|
study_id = None
|
|
84
109
|
|
|
@@ -109,14 +134,13 @@ async def query_gwas_catalog_coroutine(snps, p_threshold=5e-8, return_p=False, r
|
|
|
109
134
|
results_snp = [(trait, min_trait[trait]) for trait in min_trait]
|
|
110
135
|
|
|
111
136
|
results_global[snp] = results_snp
|
|
112
|
-
|
|
113
|
-
errors.append(snp)
|
|
114
|
-
|
|
137
|
+
|
|
115
138
|
async with aiohttp.ClientSession() as session:
|
|
116
139
|
tasks = [process_snp(session, snp) for snp in snps]
|
|
117
|
-
|
|
140
|
+
# Initialize tqdm progress bar
|
|
141
|
+
with tqdm(total=len(tasks), desc="Processing SNPs") as pbar:
|
|
142
|
+
for coro in asyncio.as_completed(tasks):
|
|
143
|
+
await coro
|
|
144
|
+
pbar.update(1)
|
|
118
145
|
|
|
119
|
-
|
|
120
|
-
#errors = [error for error in errors if error not in timeouts]
|
|
121
|
-
|
|
122
|
-
return results_global, errors, timeouts
|
|
146
|
+
return results_global, errors, timeouts
|
|
@@ -1,14 +1,16 @@
|
|
|
1
|
-
import os, subprocess
|
|
1
|
+
import os, subprocess, sys
|
|
2
2
|
import pandas as pd
|
|
3
3
|
import json
|
|
4
4
|
import wget
|
|
5
5
|
import shutil
|
|
6
6
|
import tarfile
|
|
7
|
+
import platform
|
|
8
|
+
import requests
|
|
9
|
+
import zipfile
|
|
7
10
|
|
|
8
11
|
from .constants import REF_PANELS, REF_PANELS_URL
|
|
9
12
|
|
|
10
13
|
config_path = os.path.join(os.path.expanduser("~/.genal/"), "config.json")
|
|
11
|
-
# default_ref_path = os.path.join(os.getcwd(), "tmp_GENAL", "Reference_files")
|
|
12
14
|
default_ref_path = os.path.join(os.path.expanduser("~/.genal/"), "Reference_files")
|
|
13
15
|
|
|
14
16
|
|
|
@@ -79,6 +81,25 @@ def create_tmp():
|
|
|
79
81
|
"Unable to create the 'tmp_GENAL' directory. Check permissions."
|
|
80
82
|
)
|
|
81
83
|
|
|
84
|
+
def check_bfiles(filepath):
|
|
85
|
+
"""Check if the path specified leads to a bed/bim/fam triple."""
|
|
86
|
+
if (
|
|
87
|
+
os.path.exists("{}.bed".format(filepath))
|
|
88
|
+
and os.path.exists("{}.bim".format(filepath))
|
|
89
|
+
and os.path.exists("{}.fam".format(filepath))
|
|
90
|
+
):
|
|
91
|
+
return True
|
|
92
|
+
return False
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def delete_tmp():
|
|
96
|
+
"""Delete the tmp folder."""
|
|
97
|
+
if os.path.isdir("tmp_GENAL"):
|
|
98
|
+
shutil.rmtree("tmp_GENAL")
|
|
99
|
+
print("The tmp_GENAL folder has been successfully deleted.")
|
|
100
|
+
else:
|
|
101
|
+
print("There is no tmp_GENAL folder to delete in the current directory.")
|
|
102
|
+
return
|
|
82
103
|
|
|
83
104
|
def set_reference_folder(path=""):
|
|
84
105
|
"""
|
|
@@ -234,6 +255,7 @@ def load_reference_panel(reference_panel="eur"):
|
|
|
234
255
|
reference_panel_df["CHR"] = reference_panel_df["CHR"].astype(str).str.replace("^chr", "", regex=True).astype(int)
|
|
235
256
|
return reference_panel_df
|
|
236
257
|
|
|
258
|
+
|
|
237
259
|
def set_plink(path=""):
|
|
238
260
|
"""Set the plink 1.9 path and verify that it is the correct version."""
|
|
239
261
|
if not path:
|
|
@@ -279,22 +301,125 @@ def get_plink19_path():
|
|
|
279
301
|
return config["paths"]["plink19_path"]
|
|
280
302
|
|
|
281
303
|
|
|
282
|
-
def
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
304
|
+
def is_plink_installed(plink_path):
|
|
305
|
+
try:
|
|
306
|
+
result = subprocess.run([plink_path, '--version'],
|
|
307
|
+
stdout=subprocess.PIPE,
|
|
308
|
+
stderr=subprocess.PIPE,
|
|
309
|
+
text=True,
|
|
310
|
+
check=True)
|
|
311
|
+
# Parse version from output
|
|
312
|
+
if 'plink v1.9' in result.stdout.lower():
|
|
313
|
+
return True
|
|
314
|
+
else:
|
|
315
|
+
return False
|
|
316
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
317
|
+
return False
|
|
318
|
+
|
|
292
319
|
|
|
293
|
-
def
|
|
294
|
-
"""
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
320
|
+
def install_plink(path=None):
|
|
321
|
+
"""Install plink 1.9 for the current operating system.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
path (str, optional): Path to the folder to install plink in. If not provided, install it in the .genal folder at root.
|
|
325
|
+
"""
|
|
326
|
+
# Determine operating system and architecture
|
|
327
|
+
system = platform.system()
|
|
328
|
+
system_arch = platform.architecture()[0][:2]
|
|
329
|
+
|
|
330
|
+
# Handle path variable
|
|
331
|
+
if not path:
|
|
332
|
+
path = os.path.join(os.path.expanduser("~/.genal/"), "plink")
|
|
333
|
+
print(f"You have not specified a path for the installation of plink. The following directory will be used: {path}")
|
|
334
|
+
|
|
335
|
+
# Determine the path of the plink binary
|
|
336
|
+
if system == 'Windows':
|
|
337
|
+
plink_path = os.path.join(path, 'plink.exe')
|
|
298
338
|
else:
|
|
299
|
-
|
|
339
|
+
plink_path = os.path.join(path, 'plink')
|
|
340
|
+
|
|
341
|
+
# Check that it does not already exists
|
|
342
|
+
if is_plink_installed(plink_path):
|
|
343
|
+
print(f"Plink1.9 is already installed at {plink_path}. Installation is skipped.")
|
|
344
|
+
return
|
|
345
|
+
|
|
346
|
+
# If the directory doesn't exist, attempt to create it
|
|
347
|
+
if not os.path.isdir(path):
|
|
348
|
+
try:
|
|
349
|
+
os.makedirs(path, exist_ok=True)
|
|
350
|
+
except OSError:
|
|
351
|
+
raise OSError(
|
|
352
|
+
f"Unable to create the '{path}' directory. Check permissions."
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
# Determine appropriate download link
|
|
356
|
+
if system == "Linux":
|
|
357
|
+
if system_arch == "64":
|
|
358
|
+
download_url = "https://s3.amazonaws.com/plink1-assets/plink_linux_x86_64_20241022.zip"
|
|
359
|
+
else:
|
|
360
|
+
download_url = "https://s3.amazonaws.com/plink1-assets/plink_linux_i686_20241022.zip"
|
|
361
|
+
elif system == "Windows":
|
|
362
|
+
if system_arch == "64":
|
|
363
|
+
download_url = "https://s3.amazonaws.com/plink1-assets/plink_win64_20241022.zip"
|
|
364
|
+
else:
|
|
365
|
+
download_url = "https://s3.amazonaws.com/plink1-assets/plink_win32_20241022.zip"
|
|
366
|
+
elif system == "Darwin":
|
|
367
|
+
download_url = "https://s3.amazonaws.com/plink1-assets/plink_mac_20241022.zip"
|
|
368
|
+
else:
|
|
369
|
+
raise ValueError("Your operating system is not Linux, Windows, or Mac OS and plink1.9 can't be installed automatically. \
|
|
370
|
+
Please install plink1.9 manually from https://www.cog-genomics.org/plink/1.9/")
|
|
371
|
+
|
|
372
|
+
# Create tmp folder if it does not exist and zip file path
|
|
373
|
+
create_tmp()
|
|
374
|
+
zip_path = os.path.join("tmp_GENAL", 'plink1.9.zip')
|
|
375
|
+
|
|
376
|
+
# Download plink
|
|
377
|
+
print(f"Downloading plink1.9 for {system} {system_arch}bits from {download_url}...")
|
|
378
|
+
try:
|
|
379
|
+
response = requests.get(download_url, stream=True)
|
|
380
|
+
response.raise_for_status()
|
|
381
|
+
with open(zip_path, 'wb') as f:
|
|
382
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
383
|
+
f.write(chunk)
|
|
384
|
+
print("Download completed.")
|
|
385
|
+
except requests.RequestException as e:
|
|
386
|
+
print(f"Failed to download plink1.9: {e}")
|
|
387
|
+
sys.exit(1)
|
|
388
|
+
|
|
389
|
+
# Extract the zip file
|
|
390
|
+
print("Extracting plink1.9...")
|
|
391
|
+
try:
|
|
392
|
+
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
|
393
|
+
zip_ref.extractall(path)
|
|
394
|
+
print("Extraction completed.")
|
|
395
|
+
except zipfile.BadZipFile as e:
|
|
396
|
+
print(f"Failed to extract plink1.9: {e}")
|
|
397
|
+
sys.exit(1)
|
|
398
|
+
|
|
399
|
+
# Clean zip file
|
|
400
|
+
os.remove(zip_path)
|
|
401
|
+
|
|
402
|
+
# Make the file executable
|
|
403
|
+
try:
|
|
404
|
+
os.chmod(plink_path, 0o755) # Set permissions to rw-r--r--
|
|
405
|
+
except PermissionError:
|
|
406
|
+
print("Permission denied: cannot change file permissions.")
|
|
407
|
+
except FileNotFoundError:
|
|
408
|
+
print("File not found: cannot change permissions on a non-existent file.")
|
|
409
|
+
except OSError as e:
|
|
410
|
+
print(f"OS error occurred: {e}")
|
|
411
|
+
|
|
412
|
+
# Test the installation
|
|
413
|
+
if is_plink_installed(plink_path):
|
|
414
|
+
print("plink1.9 has been successfully installed and is accessible.")
|
|
415
|
+
else:
|
|
416
|
+
print("plink1.9 installation may have failed. \
|
|
417
|
+
Please install manually from https://www.cog-genomics.org/plink/1.9/ and set the path using set_plink(path).")
|
|
418
|
+
|
|
419
|
+
# Change config file
|
|
420
|
+
config = read_config()
|
|
421
|
+
config["paths"]["plink19_path"] = plink_path
|
|
422
|
+
write_config(config)
|
|
423
|
+
print(f"Path to plink 1.9 successfully set: '{plink_path}'")
|
|
424
|
+
|
|
300
425
|
return
|
|
Binary file
|
|
@@ -4,11 +4,11 @@ build-backend = "flit_core.buildapi"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "genal-python" # Updated name for PyPI
|
|
7
|
-
version = "0
|
|
7
|
+
version = "1.0"
|
|
8
8
|
authors = [{name = "Cyprien Rivier", email = "riviercyprien@gmail.com"}]
|
|
9
9
|
description = "A python toolkit for polygenic risk scoring and mendelian randomization."
|
|
10
10
|
readme = "README.md"
|
|
11
|
-
requires-python = ">=3.
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
12
|
license = {file = "LICENSE"}
|
|
13
13
|
classifiers = [
|
|
14
14
|
"Programming Language :: Python :: 3",
|
|
@@ -26,7 +26,7 @@ dependencies = [
|
|
|
26
26
|
"psutil==5.9.1",
|
|
27
27
|
"pyliftover==0.4",
|
|
28
28
|
"scikit_learn>=1.3.0",
|
|
29
|
-
"scipy>=1.11
|
|
29
|
+
"scipy>=1.10.1, <1.11",
|
|
30
30
|
"statsmodels==0.14.0",
|
|
31
31
|
"tqdm==4.66.1",
|
|
32
32
|
"wget==3.2"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/Roboto-Slab-Regular.woff2
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{genal_python-0.9 → genal_python-1.0}/docs/build/_static/css/fonts/fontawesome-webfont.woff2
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|