genal-python 0.6__tar.gz → 0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {genal_python-0.6 → genal_python-0.7}/PKG-INFO +13 -10
- {genal_python-0.6 → genal_python-0.7}/README.md +1 -1
- {genal_python-0.6 → genal_python-0.7}/genal/Geno.py +74 -18
- {genal_python-0.6 → genal_python-0.7}/genal/__init__.py +1 -1
- {genal_python-0.6 → genal_python-0.7}/genal/geno_tools.py +5 -2
- genal_python-0.7/genal/snp_query.py +86 -0
- {genal_python-0.6 → genal_python-0.7}/genal/tools.py +5 -6
- {genal_python-0.6 → genal_python-0.7}/pyproject.toml +14 -11
- {genal_python-0.6 → genal_python-0.7}/requirements.txt +3 -2
- {genal_python-0.6 → genal_python-0.7}/.gitignore +0 -0
- {genal_python-0.6 → genal_python-0.7}/LICENSE +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/Images/MR_plot_SBP_AS.png +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/Makefile +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/doctrees/api.doctree +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/doctrees/environment.pickle +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/doctrees/genal.doctree +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/doctrees/index.doctree +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/doctrees/introduction.doctree +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/doctrees/modules.doctree +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/doctrees/source/genal.doctree +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/doctrees/source/modules.doctree +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/.buildinfo +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_sources/api.rst.txt +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_sources/genal.rst.txt +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_sources/index.rst.txt +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_sources/introduction.rst.txt +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_sources/modules.rst.txt +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_sources/source/genal.rst.txt +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_sources/source/modules.rst.txt +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/_sphinx_javascript_frameworks_compat.js +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/basic.css +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/badge_only.css +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff2 +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff2 +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/fontawesome-webfont.eot +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/fontawesome-webfont.svg +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/fontawesome-webfont.ttf +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff2 +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/lato-bold-italic.woff +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/lato-bold-italic.woff2 +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/lato-bold.woff +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/lato-bold.woff2 +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/lato-normal-italic.woff +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/lato-normal-italic.woff2 +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/lato-normal.woff +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/lato-normal.woff2 +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/theme.css +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/doctools.js +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/documentation_options.js +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/file.png +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/jquery.js +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/js/badge_only.js +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/js/html5shiv-printshiv.min.js +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/js/html5shiv.min.js +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/js/theme.js +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/language_data.js +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/minus.png +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/plus.png +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/pygments.css +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/searchtools.js +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/sphinx_highlight.js +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/api.html +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/genal.html +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/genindex.html +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/index.html +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/introduction.html +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/modules.html +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/objects.inv +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/py-modindex.html +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/search.html +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/searchindex.js +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/source/genal.html +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/_build/html/source/modules.html +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/make.bat +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/requirements.txt +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/source/api.rst +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/source/conf.py +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/source/genal.rst +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/source/index.rst +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/source/introduction.rst +0 -0
- {genal_python-0.6 → genal_python-0.7}/docs/source/modules.rst +0 -0
- {genal_python-0.6 → genal_python-0.7}/genal/MR.py +0 -0
- {genal_python-0.6 → genal_python-0.7}/genal/MR_tools.py +0 -0
- {genal_python-0.6 → genal_python-0.7}/genal/MRpresso.py +0 -0
- {genal_python-0.6 → genal_python-0.7}/genal/association.py +0 -0
- {genal_python-0.6 → genal_python-0.7}/genal/clump.py +0 -0
- {genal_python-0.6 → genal_python-0.7}/genal/constants.py +0 -0
- {genal_python-0.6 → genal_python-0.7}/genal/extract_prs.py +0 -0
- {genal_python-0.6 → genal_python-0.7}/genal/lift.py +0 -0
- {genal_python-0.6 → genal_python-0.7}/genal/proxy.py +0 -0
- {genal_python-0.6 → genal_python-0.7}/gitignore +0 -0
- {genal_python-0.6 → genal_python-0.7}/readthedocs.yaml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: genal-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7
|
|
4
4
|
Summary: A python toolkit for polygenic risk scoring and mendelian randomization.
|
|
5
5
|
Author-email: Cyprien Rivier <riviercyprien@gmail.com>
|
|
6
6
|
Requires-Python: >=3.7
|
|
@@ -8,16 +8,19 @@ Description-Content-Type: text/markdown
|
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Dist:
|
|
11
|
+
Requires-Dist: aiohttp==3.9.5
|
|
12
|
+
Requires-Dist: nest_asyncio==1.5.5
|
|
13
|
+
Requires-Dist: numpy>=1.24.4, <2.0
|
|
12
14
|
Requires-Dist: pandas>=2.0.3
|
|
13
|
-
Requires-Dist: plotnine
|
|
14
|
-
Requires-Dist: psutil
|
|
15
|
-
Requires-Dist: pyliftover
|
|
15
|
+
Requires-Dist: plotnine==0.12.3
|
|
16
|
+
Requires-Dist: psutil==5.9.1
|
|
17
|
+
Requires-Dist: pyliftover==0.4
|
|
16
18
|
Requires-Dist: scikit_learn>=1.3.0
|
|
17
|
-
Requires-Dist: scipy>=1.11.
|
|
18
|
-
Requires-Dist:
|
|
19
|
-
Requires-Dist:
|
|
20
|
-
Requires-Dist:
|
|
19
|
+
Requires-Dist: scipy>=1.11.4
|
|
20
|
+
Requires-Dist: sphinx_rtd_theme==1.3.0
|
|
21
|
+
Requires-Dist: statsmodels==0.14.0
|
|
22
|
+
Requires-Dist: tqdm==4.66.1
|
|
23
|
+
Requires-Dist: wget==3.2
|
|
21
24
|
Project-URL: Home, https://github.com/CypRiv/genal
|
|
22
25
|
|
|
23
26
|
<center><h1> genal: A Python Toolkit for Genetic Risk Scoring and Mendelian Randomization </h1></center>
|
|
@@ -479,7 +482,7 @@ As expected, many MR methods indicate that SBP is strongly associated with strok
|
|
|
479
482
|
To investigate horizontal pleiotropy in more details, a very useful method is Mendelian Randomization Pleiotropy RESidual Sum and Outlier (MR-PRESSO). MR-PRESSO is a method designed to detect and correct for horizontal pleiotropy. It will identify which instruments are likely to be pleiotropic on their effect on the outcome, and it will rerun an inverse-variance weighted MR after excluding them. It can be run using the `genal.Geno.MRpresso` method:
|
|
480
483
|
|
|
481
484
|
```python
|
|
482
|
-
SBP_clumped.MRpresso(action = 2, n_iterations = 30000)
|
|
485
|
+
mod_table, GlobalTest, OutlierTest, BiasTest = SBP_clumped.MRpresso(action = 2, n_iterations = 30000)
|
|
483
486
|
```
|
|
484
487
|
|
|
485
488
|
As with the `genal.Geno.MR` method, the `action` argument determines how the pleiotropic SNPs will be treated. The output is a list containing:
|
|
@@ -457,7 +457,7 @@ As expected, many MR methods indicate that SBP is strongly associated with strok
|
|
|
457
457
|
To investigate horizontal pleiotropy in more details, a very useful method is Mendelian Randomization Pleiotropy RESidual Sum and Outlier (MR-PRESSO). MR-PRESSO is a method designed to detect and correct for horizontal pleiotropy. It will identify which instruments are likely to be pleiotropic on their effect on the outcome, and it will rerun an inverse-variance weighted MR after excluding them. It can be run using the `genal.Geno.MRpresso` method:
|
|
458
458
|
|
|
459
459
|
```python
|
|
460
|
-
SBP_clumped.MRpresso(action = 2, n_iterations = 30000)
|
|
460
|
+
mod_table, GlobalTest, OutlierTest, BiasTest = SBP_clumped.MRpresso(action = 2, n_iterations = 30000)
|
|
461
461
|
```
|
|
462
462
|
|
|
463
463
|
As with the `genal.Geno.MR` method, the `action` argument determines how the pleiotropic SNPs will be treated. The output is a list containing:
|
|
@@ -33,6 +33,7 @@ from .geno_tools import (
|
|
|
33
33
|
)
|
|
34
34
|
from .association import set_phenotype_func, association_test_func
|
|
35
35
|
from .extract_prs import extract_snps_func, prs_func
|
|
36
|
+
from .snp_query import async_query_gwas_catalog
|
|
36
37
|
from .constants import STANDARD_COLUMNS, REF_PANEL_COLUMNS, CHECKS_DICT, MR_METHODS_NAMES
|
|
37
38
|
|
|
38
39
|
# Do all the MR steps (query_outcome, harmonize etc) based on CHR/POS and not SNPs
|
|
@@ -40,7 +41,6 @@ from .constants import STANDARD_COLUMNS, REF_PANEL_COLUMNS, CHECKS_DICT, MR_METH
|
|
|
40
41
|
# Get proxies (simply return a list of proxies)
|
|
41
42
|
# Multi-MR with python MR
|
|
42
43
|
# Warning that users might not have shell (for the .ram attribute)
|
|
43
|
-
# Phenoscanner
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
|
|
@@ -64,6 +64,7 @@ class Geno:
|
|
|
64
64
|
name (str): ID of the object (for internal reference and debugging purposes).
|
|
65
65
|
reference_panel (pd.DataFrame): Reference population SNP data used for SNP info
|
|
66
66
|
adjustments. Initialized when first needed.
|
|
67
|
+
reference_panel_name (str): string to identify the reference_panel (path or population string)
|
|
67
68
|
|
|
68
69
|
Methods:
|
|
69
70
|
preprocess_data():
|
|
@@ -313,24 +314,27 @@ class Geno:
|
|
|
313
314
|
Raises:
|
|
314
315
|
ValueError: If the provided DataFrame doesn't have the necessary columns.
|
|
315
316
|
"""
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
if not hasattr(self, "reference_panel"):
|
|
317
|
+
# Check if the user provided a dataframe
|
|
318
|
+
if isinstance(reference_panel, pd.DataFrame):
|
|
319
319
|
# If the provided reference_panel is a DataFrame, verify its structure and dtypes
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
)
|
|
320
|
+
for col in REF_PANEL_COLUMNS:
|
|
321
|
+
if col not in reference_panel.columns:
|
|
322
|
+
raise ValueError(
|
|
323
|
+
f"The {col} column is not present in the reference_panel provided and is necessary."
|
|
324
|
+
)
|
|
326
325
|
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
326
|
+
print(
|
|
327
|
+
"Using the provided reference_panel dataframe as the reference panel."
|
|
328
|
+
)
|
|
329
|
+
self.reference_panel = reference_panel.copy()
|
|
330
|
+
self.reference_panel_name = "USER_PROVIDED"
|
|
331
|
+
|
|
332
|
+
# Else, check if there is already a reference_panel with the same ID. If not, load it based on provided string
|
|
333
|
+
elif not (hasattr(self, "reference_panel") and
|
|
334
|
+
hasattr(self, "reference_panel_name") and
|
|
335
|
+
self.reference_panel_name==reference_panel):
|
|
336
|
+
self.reference_panel = load_reference_panel(reference_panel)
|
|
337
|
+
self.reference_panel_name = reference_panel
|
|
334
338
|
|
|
335
339
|
return self.reference_panel
|
|
336
340
|
|
|
@@ -1067,7 +1071,7 @@ class Geno:
|
|
|
1067
1071
|
cpus (int, optional): number of cpu cores to be used for the parallel random data generation.
|
|
1068
1072
|
|
|
1069
1073
|
Returns:
|
|
1070
|
-
|
|
1074
|
+
tuple: Contains the following elements:
|
|
1071
1075
|
- mod_table: DataFrame containing the original (before outlier removal)
|
|
1072
1076
|
and outlier-corrected (after outlier removal) inverse variance-weighted MR results.
|
|
1073
1077
|
- GlobalTest: p-value of the global MR-PRESSO test indicating the presence of horizontal pleiotropy.
|
|
@@ -1163,6 +1167,58 @@ class Geno:
|
|
|
1163
1167
|
)
|
|
1164
1168
|
|
|
1165
1169
|
return data
|
|
1170
|
+
|
|
1171
|
+
def query_gwas_catalog(
|
|
1172
|
+
self,
|
|
1173
|
+
p_threshold=5e-8,
|
|
1174
|
+
return_p=False,
|
|
1175
|
+
return_study=False,
|
|
1176
|
+
replace=True):
|
|
1177
|
+
"""
|
|
1178
|
+
Queries the GWAS Catalog Rest API and add an "ASSOC" column containing associated traits for each SNP.
|
|
1179
|
+
|
|
1180
|
+
Args:
|
|
1181
|
+
p_threshold (float, optional): Only associations that are at least as significant are reported. Default is 5e-8.
|
|
1182
|
+
return_p (bool, optional): If True, include the p-value in the results. Default is False.
|
|
1183
|
+
return_study (bool, optional): If True, include the ID of the study from which the association is derived in the results. Default is False.
|
|
1184
|
+
replace (bool, optional): If True, updates the data attribute in place. Default is True.
|
|
1185
|
+
|
|
1186
|
+
Returns:
|
|
1187
|
+
pd.DataFrame: Data attribute with an additional column "ASSOC".
|
|
1188
|
+
The elements of this column are lists of strings or tuples depending on the `return_p` and `return_study` flags. If the SNP could not be queried, the value is set to "FAILED_QUERY".
|
|
1189
|
+
"""
|
|
1190
|
+
# Ensure mandatory column is present in the input data
|
|
1191
|
+
if "SNP" not in self.data.columns:
|
|
1192
|
+
raise ValueError(f"The SNP column is necessary for the GWAS query!")
|
|
1193
|
+
|
|
1194
|
+
# Select appropriate data or copy of data depending on replace argument
|
|
1195
|
+
if not replace:
|
|
1196
|
+
data = self.data.copy()
|
|
1197
|
+
else:
|
|
1198
|
+
data = self.data
|
|
1199
|
+
|
|
1200
|
+
print(
|
|
1201
|
+
f"Querying the GWAS Catalog and creating the ASSOC column. "
|
|
1202
|
+
f"Only associations with a p-value <= {p_threshold} are reported. Use the p_threshold argument to change the threshold. "
|
|
1203
|
+
f"To report the p-value of each association, use return_p=True. To report the study ID of the association, use return_study=True. "
|
|
1204
|
+
f"The .data attribute will {'be' if replace else 'not be'} modified. "
|
|
1205
|
+
f"{'Use replace=False to leave it as is.' if replace else ''}"
|
|
1206
|
+
)
|
|
1207
|
+
|
|
1208
|
+
# Call the async function to query all SNPs
|
|
1209
|
+
results_snps, errors = async_query_gwas_catalog(
|
|
1210
|
+
data.SNP.to_list(),
|
|
1211
|
+
p_threshold=p_threshold,
|
|
1212
|
+
return_p=return_p,
|
|
1213
|
+
return_study=return_study)
|
|
1214
|
+
|
|
1215
|
+
# Create the column
|
|
1216
|
+
data["ASSOC"] = data['SNP'].map(results_snps).fillna("FAILED_QUERY")
|
|
1217
|
+
|
|
1218
|
+
print("The ASSOC column has been successfully created.")
|
|
1219
|
+
|
|
1220
|
+
return data, errors
|
|
1221
|
+
|
|
1166
1222
|
|
|
1167
1223
|
def standardize(self):
|
|
1168
1224
|
"""
|
|
@@ -213,9 +213,11 @@ def fill_snpids_func(data, reference_panel_df):
|
|
|
213
213
|
+ ":"
|
|
214
214
|
+ data.loc[missing_snp_condition, "POS"].astype(str)
|
|
215
215
|
+ ":"
|
|
216
|
+
+ data.loc[missing_snp_condition, "NEA"].astype(str)
|
|
217
|
+
+ ":"
|
|
216
218
|
+ data.loc[missing_snp_condition, "EA"].astype(str)
|
|
217
219
|
)
|
|
218
|
-
print_statement = f" and their ID set to CHR:POS:EA"
|
|
220
|
+
print_statement = f" and their ID set to CHR:POS:NEA:EA"
|
|
219
221
|
|
|
220
222
|
perc_missing = n_missing / data.shape[0] * 100
|
|
221
223
|
|
|
@@ -239,7 +241,8 @@ def fill_snpids_func(data, reference_panel_df):
|
|
|
239
241
|
def check_int_column(data, int_col):
|
|
240
242
|
"""Set the type of the int_col column to Int32 and non-numeric values to NA."""
|
|
241
243
|
nrows = data.shape[0]
|
|
242
|
-
|
|
244
|
+
if not pd.api.types.is_integer_dtype(data[int_col].dtype):
|
|
245
|
+
data[int_col] = pd.to_numeric(data[int_col].astype(str).str.strip(), errors="coerce")
|
|
243
246
|
data[int_col] = data[int_col].round(0).astype("Int32")
|
|
244
247
|
n_nan = data[int_col].isna().sum()
|
|
245
248
|
if n_nan > 0:
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import aiohttp
|
|
2
|
+
import asyncio
|
|
3
|
+
import numpy as np
|
|
4
|
+
import nest_asyncio
|
|
5
|
+
from tqdm.asyncio import tqdm_asyncio
|
|
6
|
+
|
|
7
|
+
# Using nest_asyncio to allow execution in notebooks
|
|
8
|
+
nest_asyncio.apply()
|
|
9
|
+
|
|
10
|
+
# Function to query GWAS Catalog API for SNP associations
|
|
11
|
+
async def query_gwas_catalog_coroutine(snps, p_threshold=5e-8, return_p=False, return_study=False):
|
|
12
|
+
|
|
13
|
+
results_global = {} # Dictionary storing the SNP (keys) and results for each SNP: a list of single strings or tuples
|
|
14
|
+
errors = [] # List storing SNP for which the GWAS Catalog could not be queried
|
|
15
|
+
|
|
16
|
+
async def fetch(session, url):
|
|
17
|
+
async with session.get(url) as response:
|
|
18
|
+
if response.status == 200:
|
|
19
|
+
return await response.json()
|
|
20
|
+
return None
|
|
21
|
+
|
|
22
|
+
async def process_snp(session, snp):
|
|
23
|
+
#print(f"Processing SNP {snp}")
|
|
24
|
+
|
|
25
|
+
results_snp = [] # List storing the results for each association found for this SNP
|
|
26
|
+
|
|
27
|
+
base_url = f"https://www.ebi.ac.uk/gwas/rest/api/singleNucleotidePolymorphisms/{snp}/associations?projection=associationBySnp"
|
|
28
|
+
base_data = await fetch(session, base_url)
|
|
29
|
+
|
|
30
|
+
if base_data:
|
|
31
|
+
# Process each association found for this SNP
|
|
32
|
+
for assoc in base_data.get('_embedded', {}).get('associations', []):
|
|
33
|
+
pvalue = assoc.get("pvalue", np.nan)
|
|
34
|
+
# If the pvalue of the association does not pass the threshold, the association is not processed further nor reported
|
|
35
|
+
if pvalue < p_threshold:
|
|
36
|
+
trait = assoc.get("efoTraits", [])[0].get("trait", "")
|
|
37
|
+
|
|
38
|
+
# If the return_study flag is active: query the page containing the GWAS Catalog study ID
|
|
39
|
+
if return_study:
|
|
40
|
+
study_url = assoc.get("_links", {}).get("study", {}).get("href", {})
|
|
41
|
+
study_data = await fetch(session, study_url)
|
|
42
|
+
study_id = study_data.get("accessionId", "") if study_data else "Not found"
|
|
43
|
+
else:
|
|
44
|
+
study_id = None
|
|
45
|
+
|
|
46
|
+
# Return a tuple or a string depending on the return flags
|
|
47
|
+
if return_p and return_study:
|
|
48
|
+
result_assoc = (trait, "{:.4g}".format(pvalue), study_id)
|
|
49
|
+
elif return_p:
|
|
50
|
+
result_assoc = (trait, "{:.4g}".format(pvalue))
|
|
51
|
+
elif return_study:
|
|
52
|
+
result_assoc = (trait, study_id)
|
|
53
|
+
else:
|
|
54
|
+
result_assoc = trait
|
|
55
|
+
results_snp.append(result_assoc)
|
|
56
|
+
|
|
57
|
+
else:
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
# Clean the associations depending on the flag
|
|
61
|
+
# If the P-value and Study ID are not returned, display each trait only once
|
|
62
|
+
if not return_p and not return_study:
|
|
63
|
+
results_snp = list(set(results_snp))
|
|
64
|
+
# If the P-value must be returned, return each trait once with the lowest p-value
|
|
65
|
+
elif return_p and not return_study:
|
|
66
|
+
min_trait = {}
|
|
67
|
+
for trait, pvalue in results_snp:
|
|
68
|
+
if trait not in min_trait or pvalue < min_trait[trait]:
|
|
69
|
+
min_trait[trait] = pvalue
|
|
70
|
+
results_snp = [(trait, min_trait[trait]) for trait in min_trait]
|
|
71
|
+
|
|
72
|
+
results_global[snp] = results_snp
|
|
73
|
+
else:
|
|
74
|
+
errors.append(snp)
|
|
75
|
+
|
|
76
|
+
async with aiohttp.ClientSession() as session:
|
|
77
|
+
tasks = [process_snp(session, snp) for snp in snps]
|
|
78
|
+
await tqdm_asyncio.gather(*tasks)
|
|
79
|
+
|
|
80
|
+
return results_global, errors
|
|
81
|
+
|
|
82
|
+
# Main function to start the event loop and run the asynchronous query
|
|
83
|
+
def async_query_gwas_catalog(snps, p_threshold=5e-8, return_p=False, return_study=False):
|
|
84
|
+
loop = asyncio.get_event_loop()
|
|
85
|
+
results_global, errors = loop.run_until_complete(query_gwas_catalog_coroutine(snps, p_threshold, return_p, return_study))
|
|
86
|
+
return results_global, errors
|
|
@@ -98,7 +98,7 @@ def set_reference_folder(path=""):
|
|
|
98
98
|
None: The function prints messages to inform the user of the status and any errors.
|
|
99
99
|
"""
|
|
100
100
|
|
|
101
|
-
# If no path is provided, set default path to
|
|
101
|
+
# If no path is provided, set default path to root/.genal/Reference_files
|
|
102
102
|
if not path:
|
|
103
103
|
path = default_ref_path
|
|
104
104
|
print(f"No path provided, defaulting to {default_ref_path}.")
|
|
@@ -188,9 +188,9 @@ def get_reference_panel_path(reference_panel="eur"):
|
|
|
188
188
|
print(
|
|
189
189
|
"If you have already downloaded it, use genal.set_reference_folder(path) to avoid downloading again."
|
|
190
190
|
)
|
|
191
|
-
url = f"https://storage.googleapis.com/genal_files/
|
|
191
|
+
url = f"https://storage.googleapis.com/genal_files/reference_panels.tgz"
|
|
192
192
|
try:
|
|
193
|
-
wget.download(url, out=os.path.join(ref_path, "
|
|
193
|
+
wget.download(url, out=os.path.join(ref_path, "reference_panels.tgz"))
|
|
194
194
|
except Exception as e:
|
|
195
195
|
print(f"Download unsuccessful: {e}")
|
|
196
196
|
print(
|
|
@@ -199,7 +199,7 @@ def get_reference_panel_path(reference_panel="eur"):
|
|
|
199
199
|
raise FileNotFoundError(f"Reference panel {reference_panel} not found.")
|
|
200
200
|
|
|
201
201
|
print("Download successful. Decompressing...")
|
|
202
|
-
with tarfile.open(os.path.join(ref_path, "
|
|
202
|
+
with tarfile.open(os.path.join(ref_path, "reference_panels.tgz"), "r:gz") as tar_ref:
|
|
203
203
|
tar_ref.extractall(ref_path)
|
|
204
204
|
else:
|
|
205
205
|
print(f"Using the {ref_panel_name} reference panel.")
|
|
@@ -207,7 +207,6 @@ def get_reference_panel_path(reference_panel="eur"):
|
|
|
207
207
|
return ref_panel_path
|
|
208
208
|
|
|
209
209
|
|
|
210
|
-
## Need to do the multi option
|
|
211
210
|
def load_reference_panel(reference_panel="eur"):
|
|
212
211
|
"""Load the bim file from the reference panel specified."""
|
|
213
212
|
|
|
@@ -227,7 +226,7 @@ def load_reference_panel(reference_panel="eur"):
|
|
|
227
226
|
|
|
228
227
|
#Load it and return it
|
|
229
228
|
reference_panel_df = pd.read_csv(
|
|
230
|
-
ref_panel_path + ".bim", sep
|
|
229
|
+
ref_panel_path + ".bim", sep="\t", names=["CHR","SNP","F","POS","A1","A2"]
|
|
231
230
|
)
|
|
232
231
|
return reference_panel_df
|
|
233
232
|
|
|
@@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "genal-python" # Updated name for PyPI
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.7"
|
|
8
8
|
authors = [{name = "Cyprien Rivier", email = "riviercyprien@gmail.com"}]
|
|
9
9
|
description = "A python toolkit for polygenic risk scoring and mendelian randomization."
|
|
10
10
|
readme = "README.md"
|
|
@@ -18,16 +18,19 @@ classifiers = [
|
|
|
18
18
|
|
|
19
19
|
# Dependencies section
|
|
20
20
|
dependencies = [
|
|
21
|
-
"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
21
|
+
"aiohttp==3.9.5",
|
|
22
|
+
"nest_asyncio==1.5.5",
|
|
23
|
+
"numpy>=1.24.4, <2.0",
|
|
24
|
+
"pandas>=2.0.3",
|
|
25
|
+
"plotnine==0.12.3",
|
|
26
|
+
"psutil==5.9.1",
|
|
27
|
+
"pyliftover==0.4",
|
|
28
|
+
"scikit_learn>=1.3.0",
|
|
29
|
+
"scipy>=1.11.4",
|
|
30
|
+
"sphinx_rtd_theme==1.3.0",
|
|
31
|
+
"statsmodels==0.14.0",
|
|
32
|
+
"tqdm==4.66.1",
|
|
33
|
+
"wget==3.2"
|
|
31
34
|
]
|
|
32
35
|
|
|
33
36
|
[tool.setuptools.package-dir]
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
aiohttp==3.9.5
|
|
2
|
+
nest_asyncio==1.5.5
|
|
1
3
|
numpy>=1.24.4, <2.0
|
|
2
4
|
pandas>=2.0.3
|
|
3
5
|
plotnine==0.12.3
|
|
@@ -5,8 +7,7 @@ psutil==5.9.1
|
|
|
5
7
|
pyliftover==0.4
|
|
6
8
|
scikit_learn>=1.3.0
|
|
7
9
|
scipy>=1.11.4
|
|
8
|
-
setuptools==62.3.3
|
|
9
10
|
sphinx_rtd_theme==1.3.0
|
|
10
|
-
statsmodels
|
|
11
|
+
statsmodels==0.14.0
|
|
11
12
|
tqdm==4.66.1
|
|
12
13
|
wget==3.2
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff
RENAMED
|
File without changes
|
{genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff2
RENAMED
|
File without changes
|
{genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff
RENAMED
|
File without changes
|
{genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff2
RENAMED
|
File without changes
|
{genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/fontawesome-webfont.eot
RENAMED
|
File without changes
|
{genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/fontawesome-webfont.svg
RENAMED
|
File without changes
|
{genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/fontawesome-webfont.ttf
RENAMED
|
File without changes
|
{genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff
RENAMED
|
File without changes
|
{genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff2
RENAMED
|
File without changes
|
{genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/lato-bold-italic.woff
RENAMED
|
File without changes
|
{genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/lato-bold-italic.woff2
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/lato-normal-italic.woff
RENAMED
|
File without changes
|
{genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/css/fonts/lato-normal-italic.woff2
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{genal_python-0.6 → genal_python-0.7}/docs/_build/html/_static/js/html5shiv-printshiv.min.js
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|