genal-python 0.0.dev0__tar.gz → 0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {genal_python-0.0.dev0 → genal_python-0.1}/PKG-INFO +24 -11
- {genal_python-0.0.dev0 → genal_python-0.1}/README.md +23 -10
- genal_python-0.1/docs/Images/MR_plot_SBP_AS.png +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/genal/Geno.py +15 -15
- {genal_python-0.0.dev0 → genal_python-0.1}/genal/__init__.py +2 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/genal/geno_tools.py +8 -8
- {genal_python-0.0.dev0 → genal_python-0.1}/pyproject.toml +1 -1
- {genal_python-0.0.dev0 → genal_python-0.1}/.gitignore +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/LICENSE +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/Makefile +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/doctrees/environment.pickle +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/doctrees/index.doctree +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/doctrees/source/genal.doctree +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/doctrees/source/modules.doctree +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/.buildinfo +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_sources/index.rst.txt +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_sources/source/genal.rst.txt +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_sources/source/modules.rst.txt +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/_sphinx_javascript_frameworks_compat.js +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/basic.css +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/badge_only.css +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff2 +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff2 +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/fontawesome-webfont.eot +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/fontawesome-webfont.svg +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/fontawesome-webfont.ttf +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff2 +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-bold-italic.woff +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-bold-italic.woff2 +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-bold.woff +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-bold.woff2 +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-normal-italic.woff +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-normal-italic.woff2 +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-normal.woff +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-normal.woff2 +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/theme.css +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/doctools.js +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/documentation_options.js +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/file.png +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/jquery.js +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/js/badge_only.js +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/js/html5shiv-printshiv.min.js +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/js/html5shiv.min.js +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/js/theme.js +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/language_data.js +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/minus.png +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/plus.png +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/pygments.css +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/searchtools.js +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/sphinx_highlight.js +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/genindex.html +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/index.html +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/objects.inv +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/py-modindex.html +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/search.html +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/searchindex.js +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/source/genal.html +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/source/modules.html +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/make.bat +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/requirements.txt +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/source/api.rst +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/source/conf.py +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/source/genal.rst +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/source/index.rst +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/source/introduction.rst +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/docs/source/modules.rst +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/genal/MR.py +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/genal/MR_tools.py +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/genal/MRpresso.py +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/genal/association.py +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/genal/clump.py +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/genal/constants.py +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/genal/extract_prs.py +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/genal/lift.py +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/genal/proxy.py +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/genal/tools.py +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/gitignore +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/readthedocs.yaml +0 -0
- {genal_python-0.0.dev0 → genal_python-0.1}/requirements.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: genal-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.1
|
|
4
4
|
Summary: A python toolkit for polygenic risk scoring and mendelian randomization.
|
|
5
5
|
Author-email: Cyprien Rivier <riviercyprien@gmail.com>
|
|
6
6
|
Requires-Python: >=3.7
|
|
@@ -51,7 +51,7 @@ The module prioritizes user-friendliness and intuitive operation, aiming to redu
|
|
|
51
51
|
Genal draws on concepts from well-established R packages such as TwoSampleMR, MR-Presso, MendelianRandomization, and gwasvcf, adapting their proven methodologies to the Python environment. This approach ensures that users have access to tried and tested techniques with the versatility of Python's data science tools.
|
|
52
52
|
|
|
53
53
|
## Requirements for the GENAL module <a name="paragraph1"></a>
|
|
54
|
-
***Python 3.
|
|
54
|
+
***Python 3.9 or later***. https://www.python.org/ <br>
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
## Installation and How to use the GENAL module <a name="paragraph2"></a>
|
|
@@ -60,7 +60,11 @@ Genal draws on concepts from well-established R packages such as TwoSampleMR, MR
|
|
|
60
60
|
|
|
61
61
|
Download and install the package with pip:
|
|
62
62
|
```
|
|
63
|
-
pip install genal
|
|
63
|
+
pip install genal-python
|
|
64
|
+
```
|
|
65
|
+
And it can be imported in a python environment with:
|
|
66
|
+
```python
|
|
67
|
+
import genal
|
|
64
68
|
```
|
|
65
69
|
|
|
66
70
|
The main genal functionalities require a working installation of PLINK v1.9 that can be downloaded here: https://www.cog-genomics.org/plink/
|
|
@@ -81,6 +85,7 @@ For this tutorial, we will build a Polygenic Risk Score (PRS) for systolic blood
|
|
|
81
85
|
- Include risk score calculations with proxies
|
|
82
86
|
- Perform Mendelian Randomization
|
|
83
87
|
- Analyze SBP as an exposure and acute ischemic stroke as an outcome
|
|
88
|
+
- Plot the results
|
|
84
89
|
- Conduct sensitivity analyses using the weighted median, MR-Egger, and MR-PRESSO methods
|
|
85
90
|
- Calibrate SNP-trait weights with individual-level genetic data
|
|
86
91
|
- Execute single-SNP association tests for calibrating SBP genetic instruments
|
|
@@ -155,15 +160,15 @@ Now that we have loaded the data into a `genal.Geno` object, we can begin cleani
|
|
|
155
160
|
Genal can run all the basic cleaning and preprocessing steps in one command:
|
|
156
161
|
|
|
157
162
|
```python
|
|
158
|
-
SBP_Geno.preprocess_data(preprocessing =
|
|
163
|
+
SBP_Geno.preprocess_data(preprocessing = 'Fill_delete')
|
|
159
164
|
```
|
|
160
165
|
|
|
161
166
|
The `preprocessing` argument specifies the global level of preprocessing applied to the data:
|
|
162
|
-
- `preprocessing =
|
|
163
|
-
- `preprocessing =
|
|
164
|
-
- `preprocessing =
|
|
167
|
+
- `preprocessing = 'None'`: The data won't be modified.
|
|
168
|
+
- `preprocessing = 'Fill'`: Missing columns will be added based on reference data and invalid values set to NaN, but no rows will be deleted.
|
|
169
|
+
- `preprocessing = 'Fill_delete'`: Missing columns will be added, and all rows containing missing, duplicated, or invalid values will be deleted. This option is recommended before running genetic methods.
|
|
165
170
|
|
|
166
|
-
By default, and depending on the global preprocessing level (
|
|
171
|
+
By default, and depending on the global preprocessing level ('None', 'Fill', 'Fill_delete') chosen, the `preprocess_data` method of `genal.Geno` will run the following checks:
|
|
167
172
|
- Ensure the CHR (chromosome) and POS (genomic position) columns are integers.
|
|
168
173
|
- Ensure the EA (effect allele) and NEA (non-effect allele) columns are uppercase characters containing A, T, C, G letters. Multiallelic values are set to NaN.
|
|
169
174
|
- Validate the P (p-value) column for proper values.
|
|
@@ -202,7 +207,7 @@ And we see that the SNP column with the rsids has been added based on the refere
|
|
|
202
207
|
You do not need to obtain the 1000 genome reference panel yourself, Genal will download it the first time you use it. By default, the reference panel used is the european (eur) one. You can specify another valid reference panel (afr, eas, sas, amr) with the reference_panel argument:
|
|
203
208
|
|
|
204
209
|
```python
|
|
205
|
-
SBP_Geno.preprocess_data(preprocessing =
|
|
210
|
+
SBP_Geno.preprocess_data(preprocessing = 'Fill_delete', reference_panel = "afr")
|
|
206
211
|
```
|
|
207
212
|
|
|
208
213
|
You can also use a custom reference panel by specifying to the reference_panel argument a path to bed/bim/fam files (without the extension).
|
|
@@ -366,7 +371,7 @@ Stroke_Geno = genal.Geno(stroke_gwas, CHR = "chromosome", POS = "base_pair_locat
|
|
|
366
371
|
We preprocess it as well to put it in the correct format and make sure there is no invalid values:
|
|
367
372
|
|
|
368
373
|
```python
|
|
369
|
-
Stroke_Geno.preprocess_data(preprocessing =
|
|
374
|
+
Stroke_Geno.preprocess_data(preprocessing = 'Fill_delete')
|
|
370
375
|
```
|
|
371
376
|
|
|
372
377
|
Now, we need to extract our instruments (SNPs of the SBP_clumped data) from the outcome data to obtain their association with the outcome trait (stroke). It can be done by calling the `genal.Geno.query_outcome` method:
|
|
@@ -431,8 +436,16 @@ By default, all MR methods (inverse-variance weighted, weighted median, MR-Egger
|
|
|
431
436
|
|
|
432
437
|
For more fine-tuning, such as settings for the number of boostrapping iterations, please refer to the API.
|
|
433
438
|
|
|
434
|
-
If you
|
|
439
|
+
If you want to visualize the obtained MR results, you can use the `genal.Geno.MR_plot` method that will plot each SNP in an effect_on_exposure x effect_on_outcome plane as well as lines corresponding to different MR methods:
|
|
440
|
+
|
|
441
|
+
```python
|
|
442
|
+
SBP_clumped.MR_plot(filename="MR_plot_SBP_AS")
|
|
443
|
+
```
|
|
444
|
+
|
|
445
|
+

|
|
446
|
+
You can select which MR methods you wish to plot with the 'methods' argument. Note that for an MR method to be plotted, they must be included in the latest `genal.Geno.MR` call of this `genal.Geno` object.
|
|
435
447
|
|
|
448
|
+
If you wish to include the heterogeneity values (Cochran's Q) in the results, you can use the heterogeneity argument in the `genal.Geno.MR` call. Here, the heterogeneity for the inverse-variance weighted method:
|
|
436
449
|
|
|
437
450
|
```python
|
|
438
451
|
SBP_clumped.MR(action = 3, methods = ["IVW"], exposure_name = "SBP", outcome_name = "Stroke_eur", heterogeneity = True)
|
|
@@ -29,7 +29,7 @@ The module prioritizes user-friendliness and intuitive operation, aiming to redu
|
|
|
29
29
|
Genal draws on concepts from well-established R packages such as TwoSampleMR, MR-Presso, MendelianRandomization, and gwasvcf, adapting their proven methodologies to the Python environment. This approach ensures that users have access to tried and tested techniques with the versatility of Python's data science tools.
|
|
30
30
|
|
|
31
31
|
## Requirements for the GENAL module <a name="paragraph1"></a>
|
|
32
|
-
***Python 3.
|
|
32
|
+
***Python 3.9 or later***. https://www.python.org/ <br>
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
## Installation and How to use the GENAL module <a name="paragraph2"></a>
|
|
@@ -38,7 +38,11 @@ Genal draws on concepts from well-established R packages such as TwoSampleMR, MR
|
|
|
38
38
|
|
|
39
39
|
Download and install the package with pip:
|
|
40
40
|
```
|
|
41
|
-
pip install genal
|
|
41
|
+
pip install genal-python
|
|
42
|
+
```
|
|
43
|
+
And it can be imported in a python environment with:
|
|
44
|
+
```python
|
|
45
|
+
import genal
|
|
42
46
|
```
|
|
43
47
|
|
|
44
48
|
The main genal functionalities require a working installation of PLINK v1.9 that can be downloaded here: https://www.cog-genomics.org/plink/
|
|
@@ -59,6 +63,7 @@ For this tutorial, we will build a Polygenic Risk Score (PRS) for systolic blood
|
|
|
59
63
|
- Include risk score calculations with proxies
|
|
60
64
|
- Perform Mendelian Randomization
|
|
61
65
|
- Analyze SBP as an exposure and acute ischemic stroke as an outcome
|
|
66
|
+
- Plot the results
|
|
62
67
|
- Conduct sensitivity analyses using the weighted median, MR-Egger, and MR-PRESSO methods
|
|
63
68
|
- Calibrate SNP-trait weights with individual-level genetic data
|
|
64
69
|
- Execute single-SNP association tests for calibrating SBP genetic instruments
|
|
@@ -133,15 +138,15 @@ Now that we have loaded the data into a `genal.Geno` object, we can begin cleani
|
|
|
133
138
|
Genal can run all the basic cleaning and preprocessing steps in one command:
|
|
134
139
|
|
|
135
140
|
```python
|
|
136
|
-
SBP_Geno.preprocess_data(preprocessing =
|
|
141
|
+
SBP_Geno.preprocess_data(preprocessing = 'Fill_delete')
|
|
137
142
|
```
|
|
138
143
|
|
|
139
144
|
The `preprocessing` argument specifies the global level of preprocessing applied to the data:
|
|
140
|
-
- `preprocessing =
|
|
141
|
-
- `preprocessing =
|
|
142
|
-
- `preprocessing =
|
|
145
|
+
- `preprocessing = 'None'`: The data won't be modified.
|
|
146
|
+
- `preprocessing = 'Fill'`: Missing columns will be added based on reference data and invalid values set to NaN, but no rows will be deleted.
|
|
147
|
+
- `preprocessing = 'Fill_delete'`: Missing columns will be added, and all rows containing missing, duplicated, or invalid values will be deleted. This option is recommended before running genetic methods.
|
|
143
148
|
|
|
144
|
-
By default, and depending on the global preprocessing level (
|
|
149
|
+
By default, and depending on the global preprocessing level ('None', 'Fill', 'Fill_delete') chosen, the `preprocess_data` method of `genal.Geno` will run the following checks:
|
|
145
150
|
- Ensure the CHR (chromosome) and POS (genomic position) columns are integers.
|
|
146
151
|
- Ensure the EA (effect allele) and NEA (non-effect allele) columns are uppercase characters containing A, T, C, G letters. Multiallelic values are set to NaN.
|
|
147
152
|
- Validate the P (p-value) column for proper values.
|
|
@@ -180,7 +185,7 @@ And we see that the SNP column with the rsids has been added based on the refere
|
|
|
180
185
|
You do not need to obtain the 1000 genome reference panel yourself, Genal will download it the first time you use it. By default, the reference panel used is the european (eur) one. You can specify another valid reference panel (afr, eas, sas, amr) with the reference_panel argument:
|
|
181
186
|
|
|
182
187
|
```python
|
|
183
|
-
SBP_Geno.preprocess_data(preprocessing =
|
|
188
|
+
SBP_Geno.preprocess_data(preprocessing = 'Fill_delete', reference_panel = "afr")
|
|
184
189
|
```
|
|
185
190
|
|
|
186
191
|
You can also use a custom reference panel by specifying to the reference_panel argument a path to bed/bim/fam files (without the extension).
|
|
@@ -344,7 +349,7 @@ Stroke_Geno = genal.Geno(stroke_gwas, CHR = "chromosome", POS = "base_pair_locat
|
|
|
344
349
|
We preprocess it as well to put it in the correct format and make sure there is no invalid values:
|
|
345
350
|
|
|
346
351
|
```python
|
|
347
|
-
Stroke_Geno.preprocess_data(preprocessing =
|
|
352
|
+
Stroke_Geno.preprocess_data(preprocessing = 'Fill_delete')
|
|
348
353
|
```
|
|
349
354
|
|
|
350
355
|
Now, we need to extract our instruments (SNPs of the SBP_clumped data) from the outcome data to obtain their association with the outcome trait (stroke). It can be done by calling the `genal.Geno.query_outcome` method:
|
|
@@ -409,8 +414,16 @@ By default, all MR methods (inverse-variance weighted, weighted median, MR-Egger
|
|
|
409
414
|
|
|
410
415
|
For more fine-tuning, such as settings for the number of boostrapping iterations, please refer to the API.
|
|
411
416
|
|
|
412
|
-
If you
|
|
417
|
+
If you want to visualize the obtained MR results, you can use the `genal.Geno.MR_plot` method that will plot each SNP in an effect_on_exposure x effect_on_outcome plane as well as lines corresponding to different MR methods:
|
|
418
|
+
|
|
419
|
+
```python
|
|
420
|
+
SBP_clumped.MR_plot(filename="MR_plot_SBP_AS")
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+

|
|
424
|
+
You can select which MR methods you wish to plot with the 'methods' argument. Note that for an MR method to be plotted, they must be included in the latest `genal.Geno.MR` call of this `genal.Geno` object.
|
|
413
425
|
|
|
426
|
+
If you wish to include the heterogeneity values (Cochran's Q) in the results, you can use the heterogeneity argument in the `genal.Geno.MR` call. Here, the heterogeneity for the inverse-variance weighted method:
|
|
414
427
|
|
|
415
428
|
```python
|
|
416
429
|
SBP_clumped.MR(action = 3, methods = ["IVW"], exposure_name = "SBP", outcome_name = "Stroke_eur", heterogeneity = True)
|
|
Binary file
|
|
@@ -167,7 +167,7 @@ class Geno:
|
|
|
167
167
|
|
|
168
168
|
def preprocess_data(
|
|
169
169
|
self,
|
|
170
|
-
preprocessing=
|
|
170
|
+
preprocessing='Fill',
|
|
171
171
|
reference_panel="eur",
|
|
172
172
|
effect_column=None,
|
|
173
173
|
keep_multi=None,
|
|
@@ -179,11 +179,11 @@ class Geno:
|
|
|
179
179
|
Clean and preprocess the main dataframe of Single Nucleotide Polymorphisms (SNP) data.
|
|
180
180
|
|
|
181
181
|
Args:
|
|
182
|
-
preprocessing (
|
|
183
|
-
-
|
|
184
|
-
-
|
|
185
|
-
-
|
|
186
|
-
Defaults to
|
|
182
|
+
preprocessing (str, optional): Level of preprocessing to apply. Options include:
|
|
183
|
+
- "None": The dataframe is not modified.
|
|
184
|
+
- "Fill": Missing columns are added based on reference data and invalid values set to NaN, but no rows are deleted.
|
|
185
|
+
- "Fill_delete": Missing columns are added, and rows with missing, duplicated, or invalid values are deleted.
|
|
186
|
+
Defaults to 'Fill'.
|
|
187
187
|
reference_panel (str or pd.DataFrame, optional): Reference panel for SNP adjustments. Can be a string representing ancestry classification ("eur", "afr", "eas", "sas", "amr") or a DataFrame with ["CHR","SNP","POS","A1","A2"] columns or a path to a .bim file. Defaults to "eur".
|
|
188
188
|
effect_column (str, optional): Specifies the type of effect column ("BETA" or "OR"). If None, the method tries to determine it. Odds Ratios will be log-transformed and the standard error adjusted. Defaults to None.
|
|
189
189
|
keep_multi (bool, optional): Determines if multiallelic SNPs should be kept. If None, defers to preprocessing value. Defaults to None.
|
|
@@ -207,7 +207,7 @@ class Geno:
|
|
|
207
207
|
|
|
208
208
|
# Ensure CHR and POS columns are integers if preprocessing is enabled
|
|
209
209
|
for int_col in ["CHR", "POS"]:
|
|
210
|
-
if int_col in data.columns and preprocessing
|
|
210
|
+
if int_col in data.columns and preprocessing in ['Fill', 'Fill_delete']:
|
|
211
211
|
check_int_column(data, int_col)
|
|
212
212
|
self.checks[int_col] = True
|
|
213
213
|
|
|
@@ -237,7 +237,7 @@ class Geno:
|
|
|
237
237
|
and "NEA" not in data.columns
|
|
238
238
|
and "EA" in data.columns
|
|
239
239
|
)
|
|
240
|
-
if missing_nea_condition and preprocessing
|
|
240
|
+
if missing_nea_condition and preprocessing in ['Fill', 'Fill_delete']:
|
|
241
241
|
data = fill_nea(data, self.get_reference_panel(reference_panel))
|
|
242
242
|
|
|
243
243
|
# Fill missing EA and NEA columns from reference data if necessary and preprocessing is enabled
|
|
@@ -247,7 +247,7 @@ class Geno:
|
|
|
247
247
|
and "NEA" not in data.columns
|
|
248
248
|
and "EA" not in data.columns
|
|
249
249
|
)
|
|
250
|
-
if missing_ea_nea_condition and preprocessing
|
|
250
|
+
if missing_ea_nea_condition and preprocessing in ['Fill', 'Fill_delete']:
|
|
251
251
|
data = fill_ea_nea(data, self.get_reference_panel(reference_panel))
|
|
252
252
|
|
|
253
253
|
# Convert effect column to Beta estimates if present
|
|
@@ -256,18 +256,18 @@ class Geno:
|
|
|
256
256
|
self.checks["BETA"] = True
|
|
257
257
|
|
|
258
258
|
# Ensure P column contains valid values
|
|
259
|
-
if "P" in data.columns and preprocessing
|
|
259
|
+
if "P" in data.columns and preprocessing in ['Fill', 'Fill_delete']:
|
|
260
260
|
check_p_column(data)
|
|
261
261
|
self.checks["P"] = True
|
|
262
262
|
|
|
263
263
|
# Fill missing SE or P columns if necessary
|
|
264
|
-
if preprocessing
|
|
264
|
+
if preprocessing in ['Fill', 'Fill_delete']:
|
|
265
265
|
fill_se_p(data)
|
|
266
266
|
|
|
267
267
|
# Process allele columns
|
|
268
268
|
for allele_col in ["EA", "NEA"]:
|
|
269
269
|
check_allele_condition = (allele_col in data.columns) and (
|
|
270
|
-
(preprocessing
|
|
270
|
+
(preprocessing in ['Fill', 'Fill_delete']) or (not keep_multi)
|
|
271
271
|
)
|
|
272
272
|
if check_allele_condition:
|
|
273
273
|
check_allele_column(data, allele_col, keep_multi)
|
|
@@ -285,8 +285,8 @@ class Geno:
|
|
|
285
285
|
f"Warning: the data doesn't include a {column} column. This may become an issue later on."
|
|
286
286
|
)
|
|
287
287
|
|
|
288
|
-
# Remove missing values if preprocessing level is set to
|
|
289
|
-
if preprocessing ==
|
|
288
|
+
# Remove missing values if preprocessing level is set to 'Fill_delete'
|
|
289
|
+
if preprocessing == 'Fill_delete':
|
|
290
290
|
remove_na(data)
|
|
291
291
|
self.checks["NA_removal"] = True
|
|
292
292
|
|
|
@@ -547,7 +547,7 @@ class Geno:
|
|
|
547
547
|
if "EA" not in self.checks:
|
|
548
548
|
check_allele_column(data_prs, "EA", keep_multi=False)
|
|
549
549
|
if "BETA" not in self.checks:
|
|
550
|
-
check_beta_column(data_prs, effect_column=None, preprocessing=
|
|
550
|
+
check_beta_column(data_prs, effect_column=None, preprocessing='Fill_delete')
|
|
551
551
|
|
|
552
552
|
initial_rows = data_prs.shape[0]
|
|
553
553
|
data_prs.dropna(subset=["SNP", "P", "BETA"], inplace=True)
|
|
@@ -21,7 +21,7 @@ def remove_na(data):
|
|
|
21
21
|
n_del = nrows - data.shape[0]
|
|
22
22
|
if n_del > 0:
|
|
23
23
|
print(
|
|
24
|
-
f"Deleted {n_del}({n_del/nrows*100:.3f}%) rows containing NA values in columns {columns_na}. Use preprocessing =
|
|
24
|
+
f"Deleted {n_del}({n_del/nrows*100:.3f}%) rows containing NA values in columns {columns_na}. Use preprocessing = 'Fill' to keep the rows containing NA values."
|
|
25
25
|
)
|
|
26
26
|
return
|
|
27
27
|
|
|
@@ -100,7 +100,7 @@ def check_beta_column(data, effect_column, preprocessing):
|
|
|
100
100
|
If no effect_column argument is specified, determine if the BETA column are beta estimates or odds ratios.
|
|
101
101
|
"""
|
|
102
102
|
if effect_column is None:
|
|
103
|
-
if preprocessing ==
|
|
103
|
+
if preprocessing == 'None':
|
|
104
104
|
return data
|
|
105
105
|
median = np.median(data.BETA)
|
|
106
106
|
if 0.5 < median < 1.5:
|
|
@@ -303,9 +303,9 @@ def check_arguments(
|
|
|
303
303
|
"""
|
|
304
304
|
|
|
305
305
|
# Validate preprocessing value
|
|
306
|
-
if preprocessing not in [
|
|
306
|
+
if preprocessing not in ['None', 'Fill', 'Fill_delete']:
|
|
307
307
|
raise ValueError(
|
|
308
|
-
"preprocessing must be one of [
|
|
308
|
+
"preprocessing must be one of ['None', 'Fill', 'Fill_delete']. Refer to the Geno class docstring for details."
|
|
309
309
|
)
|
|
310
310
|
|
|
311
311
|
# Validate effect_column value
|
|
@@ -326,11 +326,11 @@ def check_arguments(
|
|
|
326
326
|
# Helper functions for preprocessing logic
|
|
327
327
|
def keeptype_column(arg):
|
|
328
328
|
"""Helper function to decide whether to keep multi-values/duplicates."""
|
|
329
|
-
return True if arg is None and preprocessing
|
|
329
|
+
return True if arg is None and preprocessing in ['None', 'Fill'] else arg
|
|
330
330
|
|
|
331
331
|
def filltype_column(arg):
|
|
332
332
|
"""Helper function to decide whether to fill snpids/coordinates."""
|
|
333
|
-
return False if arg is None and preprocessing ==
|
|
333
|
+
return False if arg is None and preprocessing == 'None' else arg
|
|
334
334
|
|
|
335
335
|
# Apply preprocessing logic
|
|
336
336
|
keep_multi = keeptype_column(keep_multi)
|
|
@@ -385,7 +385,7 @@ def save_data(data, name, path="", fmt="h5", sep="\t", header=True):
|
|
|
385
385
|
print(f"Data saved to {path_name}")
|
|
386
386
|
|
|
387
387
|
|
|
388
|
-
def Combine_Geno(Gs, name="noname", clumped=False, preprocessing=
|
|
388
|
+
def Combine_Geno(Gs, name="noname", clumped=False, preprocessing='None'):
|
|
389
389
|
"""
|
|
390
390
|
Combine a list of GWAS objects into one.
|
|
391
391
|
|
|
@@ -393,7 +393,7 @@ def Combine_Geno(Gs, name="noname", clumped=False, preprocessing=0):
|
|
|
393
393
|
- Gs (list): List of GWAS objects.
|
|
394
394
|
- name (str, optional): Name for the combined object. Default is "noname".
|
|
395
395
|
- clumped (bool, optional): If True, uses the clumped data of each object. Default is False.
|
|
396
|
-
- preprocessing (int, optional): Level of preprocessing to apply. Default is
|
|
396
|
+
- preprocessing (int, optional): Level of preprocessing to apply. Default is 'None'.
|
|
397
397
|
|
|
398
398
|
Returns:
|
|
399
399
|
Geno object: Combined Geno object.
|
|
@@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "genal-python" # Updated name for PyPI
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.1"
|
|
8
8
|
authors = [{name = "Cyprien Rivier", email = "riviercyprien@gmail.com"}]
|
|
9
9
|
description = "A python toolkit for polygenic risk scoring and mendelian randomization."
|
|
10
10
|
readme = "README.md"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff
RENAMED
|
File without changes
|
{genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff2
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-bold-italic.woff
RENAMED
|
File without changes
|
{genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-bold-italic.woff2
RENAMED
|
File without changes
|
{genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-bold.woff
RENAMED
|
File without changes
|
{genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-bold.woff2
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-normal.woff
RENAMED
|
File without changes
|
{genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/css/fonts/lato-normal.woff2
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/documentation_options.js
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{genal_python-0.0.dev0 → genal_python-0.1}/docs/_build/html/_static/js/html5shiv-printshiv.min.js
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|