gwaslab 3.5.7__tar.gz → 3.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- {gwaslab-3.5.7/src/gwaslab.egg-info → gwaslab-3.6.0}/PKG-INFO +46 -68
- {gwaslab-3.5.7 → gwaslab-3.6.0}/README.md +42 -65
- {gwaslab-3.5.7 → gwaslab-3.6.0}/pyproject.toml +2 -2
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/__init__.py +2 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/bd_common_data.py +1 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/bd_get_hapmap3.py +0 -1
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/data/formatbook.json +78 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/data/reference.json +3 -1
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/g_Sumstats.py +110 -25
- gwaslab-3.6.0/src/gwaslab/g_SumstatsMulti.py +287 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/g_SumstatsPair.py +101 -16
- gwaslab-3.6.0/src/gwaslab/g_Sumstats_polars.py +245 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/g_headers.py +12 -3
- gwaslab-3.6.0/src/gwaslab/g_meta.py +136 -0
- gwaslab-3.6.0/src/gwaslab/g_meta_update.py +48 -0
- gwaslab-3.6.0/src/gwaslab/g_vchange_status_polars.py +44 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/g_version.py +2 -2
- gwaslab-3.6.0/src/gwaslab/hm_casting.py +315 -0
- gwaslab-3.6.0/src/gwaslab/hm_casting_polars.py +202 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/hm_harmonize_sumstats.py +19 -8
- gwaslab-3.6.0/src/gwaslab/io_load_ld.py +529 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/io_preformat_input.py +11 -0
- gwaslab-3.6.0/src/gwaslab/io_preformat_input_polars.py +632 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/io_process_args.py +25 -1
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/io_read_ldsc.py +34 -3
- gwaslab-3.6.0/src/gwaslab/io_read_pipcs.py +79 -0
- gwaslab-3.6.0/src/gwaslab/prscs_gigrnd.py +122 -0
- gwaslab-3.6.0/src/gwaslab/prscs_mcmc_gtb.py +136 -0
- gwaslab-3.6.0/src/gwaslab/prscs_parse_genet.py +98 -0
- gwaslab-3.6.0/src/gwaslab/qc_build.py +53 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/qc_check_datatype.py +10 -8
- gwaslab-3.6.0/src/gwaslab/qc_check_datatype_polars.py +128 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/qc_fix_sumstats.py +25 -23
- gwaslab-3.6.0/src/gwaslab/qc_fix_sumstats_polars.py +193 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_ex_calculate_ldmatrix.py +49 -19
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_ex_gwascatalog.py +71 -28
- gwaslab-3.6.0/src/gwaslab/util_ex_infer_ancestry.py +65 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_ex_ldsc.py +67 -21
- gwaslab-3.6.0/src/gwaslab/util_ex_match_ldmatrix.py +396 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_ex_run_2samplemr.py +0 -2
- gwaslab-3.6.0/src/gwaslab/util_ex_run_ccgwas.py +155 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_ex_run_coloc.py +1 -1
- gwaslab-3.6.0/src/gwaslab/util_ex_run_hyprcoloc.py +117 -0
- gwaslab-3.6.0/src/gwaslab/util_ex_run_magma.py +74 -0
- gwaslab-3.6.0/src/gwaslab/util_ex_run_mesusie.py +155 -0
- gwaslab-3.6.0/src/gwaslab/util_ex_run_mtag.py +92 -0
- gwaslab-3.6.0/src/gwaslab/util_ex_run_prscs.py +85 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_ex_run_susie.py +40 -9
- gwaslab-3.6.0/src/gwaslab/util_in_estimate_ess.py +18 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_in_fill_data.py +20 -1
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_in_filter_value.py +10 -5
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_in_get_sig.py +71 -13
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_in_meta.py +168 -4
- gwaslab-3.6.0/src/gwaslab/util_in_meta_polars.py +174 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_aux_annotate_plot.py +13 -2
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_compare_effect.py +87 -23
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_credible_sets.py +55 -11
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_effect.py +22 -12
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_miamiplot2.py +3 -2
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_mqqplot.py +94 -84
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_qqplot.py +9 -7
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_regional2.py +2 -1
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_stackedregional.py +4 -1
- {gwaslab-3.5.7 → gwaslab-3.6.0/src/gwaslab.egg-info}/PKG-INFO +46 -68
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab.egg-info/SOURCES.txt +23 -0
- gwaslab-3.5.7/src/gwaslab/g_meta.py +0 -59
- gwaslab-3.5.7/src/gwaslab/hm_casting.py +0 -256
- gwaslab-3.5.7/src/gwaslab/io_read_pipcs.py +0 -23
- {gwaslab-3.5.7 → gwaslab-3.6.0}/LICENSE +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/setup.cfg +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/bd_config.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/bd_download.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/cache_manager.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/data/chrx_par/chrx_par_hg19.bed.gz +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/data/chrx_par/chrx_par_hg38.bed.gz +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/g_Log.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/g_Phenotypes.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/g_SumstatsSet.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/g_SumstatsT.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/g_Sumstats_summary.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/g_vchange_status.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/hm_rsid_to_chrpos.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/io_read_tabular.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/io_to_formats.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/io_to_pickle.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/ldsc_irwls.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/ldsc_jackknife.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/ldsc_ldscore.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/ldsc_parse.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/ldsc_regressions.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/ldsc_sumstats.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/run_script.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_abf_finemapping.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_ex_calculate_prs.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_ex_ldproxyfinder.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_ex_plink_filter.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_ex_process_h5.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_ex_process_ref.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_ex_run_clumping.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_in_calculate_gc.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_in_calculate_power.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_in_convert_h2.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_in_correct_winnerscurse.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_in_get_density.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_in_merge.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/util_in_snphwe.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_aux_chromatin.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_aux_property.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_aux_quickfix.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_aux_reposition_text.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_aux_save_figure.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_compare_af.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_forestplot.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_miamiplot.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_phe_heatmap.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_regionalplot.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_rg_heatmap.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_scatter_with_reg.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab/viz_plot_trumpetplot.py +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab.egg-info/dependency_links.txt +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab.egg-info/requires.txt +0 -0
- {gwaslab-3.5.7 → gwaslab-3.6.0}/src/gwaslab.egg-info/top_level.txt +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: gwaslab
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.6.0
|
|
4
4
|
Summary: A collection of handy tools for GWAS SumStats
|
|
5
5
|
Author-email: Yunye <yunye@gwaslab.com>
|
|
6
6
|
Project-URL: Homepage, https://cloufield.github.io/gwaslab/
|
|
7
7
|
Project-URL: Github, https://github.com/Cloufield/gwaslab
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved ::
|
|
9
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
11
|
Requires-Python: <3.13,>=3.9
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
@@ -25,52 +25,74 @@ Requires-Dist: scikit-allel>=1.3.5
|
|
|
25
25
|
Requires-Dist: pyensembl==2.2.3
|
|
26
26
|
Requires-Dist: gtfparse==1.3.0
|
|
27
27
|
Requires-Dist: h5py>=3.10.0
|
|
28
|
+
Dynamic: license-file
|
|
28
29
|
|
|
29
30
|
# GWASLab
|
|
30
31
|
|
|
31
|
-
<img width="600" alt="image" src="https://
|
|
32
|
+
<img width="600" alt="image" src="https://github.com/user-attachments/assets/109262c6-c870-4078-94b5-66cf8c6b13c4" />
|
|
32
33
|
|
|
33
34
|

|
|
34
35
|
[](https://pepy.tech/project/gwaslab)
|
|
35
36
|

|
|
36
|
-
[](https://hits.seeyoufarm.com)
|
|
37
37
|

|
|
38
38
|
|
|
39
|
-
* A handy Python toolkit for handling GWAS summary statistics (sumstats).
|
|
39
|
+
* A handy Python-based toolkit for handling GWAS summary statistics (sumstats).
|
|
40
40
|
* Each process is modularized and can be customized to your needs.
|
|
41
41
|
* Sumstats-specific manipulations are designed as methods of a Python object, `gwaslab.Sumstats`.
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
## Installation
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
### install via pip
|
|
46
46
|
|
|
47
|
-
|
|
47
|
+
The latest version of GWASLab now supports Python 3.9, 3.10, 3.11, and 3.12.
|
|
48
48
|
|
|
49
|
-
|
|
49
|
+
```bash
|
|
50
|
+
pip install gwaslab
|
|
51
|
+
```
|
|
50
52
|
|
|
51
|
-
### install
|
|
53
|
+
### install in conda environment
|
|
54
|
+
|
|
55
|
+
Create a Python 3.9, 3.10, 3.11 or 3.12 environment and install gwaslab using pip:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
conda env create -n gwaslab -c conda-forge python=3.12
|
|
59
|
+
|
|
60
|
+
conda activate gwaslab
|
|
52
61
|
|
|
62
|
+
pip install gwaslab
|
|
53
63
|
```
|
|
54
|
-
|
|
64
|
+
|
|
65
|
+
or create a new environment using yml file [environment.yml](https://github.com/Cloufield/gwaslab/blob/main/environment.yml)
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
conda env create -n gwaslab -f environment.yml
|
|
55
69
|
```
|
|
56
70
|
|
|
71
|
+
### install using docker (deprecated)
|
|
72
|
+
|
|
73
|
+
A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main/docker/Dockerfile) for building local images.
|
|
74
|
+
|
|
75
|
+
## Quick start
|
|
76
|
+
|
|
57
77
|
```python
|
|
78
|
+
|
|
58
79
|
import gwaslab as gl
|
|
80
|
+
|
|
59
81
|
# load plink2 output
|
|
60
|
-
mysumstats = gl.Sumstats("
|
|
82
|
+
mysumstats = gl.Sumstats("sumstats.txt.gz", fmt="plink2")
|
|
61
83
|
|
|
62
|
-
# load sumstats with auto mode (auto-detecting
|
|
84
|
+
# or load sumstats with auto mode (auto-detecting commonly used headers)
|
|
63
85
|
# assuming ALT/A1 is EA, and frq is EAF
|
|
64
|
-
mysumstats = gl.Sumstats("
|
|
86
|
+
mysumstats = gl.Sumstats("sumstats.txt.gz", fmt="auto")
|
|
65
87
|
|
|
66
88
|
# or you can specify the columns:
|
|
67
|
-
mysumstats = gl.Sumstats("
|
|
89
|
+
mysumstats = gl.Sumstats("sumstats.txt.gz",
|
|
68
90
|
snpid="SNP",
|
|
69
91
|
chrom="CHR",
|
|
70
92
|
pos="POS",
|
|
71
93
|
ea="ALT",
|
|
72
94
|
nea="REF",
|
|
73
|
-
|
|
95
|
+
eaf="Frq",
|
|
74
96
|
beta="BETA",
|
|
75
97
|
se="SE",
|
|
76
98
|
p="P",
|
|
@@ -83,26 +105,9 @@ mysumstats.plot_mqq()
|
|
|
83
105
|
...
|
|
84
106
|
```
|
|
85
107
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
Create a Python 3.9 environment and install gwaslab using pip:
|
|
89
|
-
|
|
90
|
-
```
|
|
91
|
-
conda env create -n gwaslab_test -c conda-forge python=3.9
|
|
92
|
-
conda activate gwaslab
|
|
93
|
-
pip install gwaslab==3.4.45
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
or create a new environment using yml file [environment_3.4.40.yml](https://github.com/Cloufield/gwaslab/blob/main/environment_3.4.40.yml)
|
|
97
|
-
|
|
98
|
-
```
|
|
99
|
-
conda env create -n gwaslab -f environment_3.4.40.yml
|
|
100
|
-
```
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
### install using docker
|
|
108
|
+
## Documentation and tutorials
|
|
104
109
|
|
|
105
|
-
|
|
110
|
+
Documentation and tutorials for GWASLab are avaiable at [here](https://cloufield.github.io/gwaslab/).
|
|
106
111
|
|
|
107
112
|
## Functions
|
|
108
113
|
|
|
@@ -149,7 +154,7 @@ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main
|
|
|
149
154
|
- Scatter plot: allele frequency comparison
|
|
150
155
|
- Scatter plot: trumpet plot (plot of MAF and effect size with power lines)
|
|
151
156
|
|
|
152
|
-
|
|
157
|
+
#### Visualization Examples
|
|
153
158
|
|
|
154
159
|
<img width="600" alt="image" src="https://user-images.githubusercontent.com/40289485/233836639-34b03c47-5a59-4fd4-9677-5e13b02aab15.png">
|
|
155
160
|
<img width="600" alt="image" src="https://user-images.githubusercontent.com/40289485/197393168-e3e7076f-2801-4d66-9526-80778d44f3da.png">
|
|
@@ -165,42 +170,15 @@ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main
|
|
|
165
170
|
- Sumstats summary: give you a quick overview of the sumstats.
|
|
166
171
|
- ...
|
|
167
172
|
|
|
168
|
-
##
|
|
169
|
-
|
|
170
|
-
environment.yml
|
|
173
|
+
## Issues
|
|
171
174
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
channels:
|
|
175
|
-
- conda-forge
|
|
176
|
-
- defaults
|
|
177
|
-
dependencies:
|
|
178
|
-
- python=3.8.16=h7a1cb2a_3
|
|
179
|
-
- jupyter==1.0.0
|
|
180
|
-
- pip==23.1.2
|
|
181
|
-
- pip:
|
|
182
|
-
- adjusttext==0.8
|
|
183
|
-
- biopython==1.81
|
|
184
|
-
- gwaslab==3.4.16
|
|
185
|
-
- liftover==1.1.16
|
|
186
|
-
- matplotlib==3.7.1
|
|
187
|
-
- numpy==1.24.2
|
|
188
|
-
- pandas==1.4.4
|
|
189
|
-
- scikit-allel==1.3.5
|
|
190
|
-
- scikit-learn==1.2.2
|
|
191
|
-
- scipy==1.10.1
|
|
192
|
-
- seaborn==0.11.2
|
|
193
|
-
- statsmodels==0.13
|
|
194
|
-
- adjustText==0.8
|
|
195
|
-
- pysam==0.19
|
|
196
|
-
- pyensembl==2.2.3
|
|
197
|
-
- h5py==3.10.0
|
|
198
|
-
```
|
|
175
|
+
- GWASLab is currently under active development, with frequent updates.
|
|
176
|
+
- Note: Known issues are documented at https://cloufield.github.io/gwaslab/KnownIssues/.
|
|
199
177
|
|
|
200
178
|
## How to cite
|
|
201
179
|
- GWASLab preprint: He, Y., Koido, M., Shimmori, Y., Kamatani, Y. (2023). GWASLab: a Python package for processing and visualizing GWAS summary statistics. Preprint at Jxiv, 2023-5. https://doi.org/10.51094/jxiv.370
|
|
202
180
|
|
|
203
|
-
## Sample
|
|
181
|
+
## Sample data used for tutorial
|
|
204
182
|
- Sample GWAS data used in GWASLab is obtained from: http://jenger.riken.jp/ (Suzuki, Ken, et al. "Identification of 28 new susceptibility loci for type 2 diabetes in the Japanese population." Nature genetics 51.3 (2019): 379-386.).
|
|
205
183
|
|
|
206
184
|
## Acknowledgement
|
|
@@ -1,48 +1,69 @@
|
|
|
1
1
|
# GWASLab
|
|
2
2
|
|
|
3
|
-
<img width="600" alt="image" src="https://
|
|
3
|
+
<img width="600" alt="image" src="https://github.com/user-attachments/assets/109262c6-c870-4078-94b5-66cf8c6b13c4" />
|
|
4
4
|
|
|
5
5
|

|
|
6
6
|
[](https://pepy.tech/project/gwaslab)
|
|
7
7
|

|
|
8
|
-
[](https://hits.seeyoufarm.com)
|
|
9
8
|

|
|
10
9
|
|
|
11
|
-
* A handy Python toolkit for handling GWAS summary statistics (sumstats).
|
|
10
|
+
* A handy Python-based toolkit for handling GWAS summary statistics (sumstats).
|
|
12
11
|
* Each process is modularized and can be customized to your needs.
|
|
13
12
|
* Sumstats-specific manipulations are designed as methods of a Python object, `gwaslab.Sumstats`.
|
|
14
13
|
|
|
15
|
-
|
|
14
|
+
## Installation
|
|
16
15
|
|
|
17
|
-
|
|
16
|
+
### install via pip
|
|
18
17
|
|
|
19
|
-
|
|
18
|
+
The latest version of GWASLab now supports Python 3.9, 3.10, 3.11, and 3.12.
|
|
20
19
|
|
|
21
|
-
|
|
20
|
+
```bash
|
|
21
|
+
pip install gwaslab
|
|
22
|
+
```
|
|
22
23
|
|
|
23
|
-
### install
|
|
24
|
+
### install in conda environment
|
|
25
|
+
|
|
26
|
+
Create a Python 3.9, 3.10, 3.11 or 3.12 environment and install gwaslab using pip:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
conda env create -n gwaslab -c conda-forge python=3.12
|
|
30
|
+
|
|
31
|
+
conda activate gwaslab
|
|
24
32
|
|
|
33
|
+
pip install gwaslab
|
|
25
34
|
```
|
|
26
|
-
|
|
35
|
+
|
|
36
|
+
or create a new environment using yml file [environment.yml](https://github.com/Cloufield/gwaslab/blob/main/environment.yml)
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
conda env create -n gwaslab -f environment.yml
|
|
27
40
|
```
|
|
28
41
|
|
|
42
|
+
### install using docker (deprecated)
|
|
43
|
+
|
|
44
|
+
A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main/docker/Dockerfile) for building local images.
|
|
45
|
+
|
|
46
|
+
## Quick start
|
|
47
|
+
|
|
29
48
|
```python
|
|
49
|
+
|
|
30
50
|
import gwaslab as gl
|
|
51
|
+
|
|
31
52
|
# load plink2 output
|
|
32
|
-
mysumstats = gl.Sumstats("
|
|
53
|
+
mysumstats = gl.Sumstats("sumstats.txt.gz", fmt="plink2")
|
|
33
54
|
|
|
34
|
-
# load sumstats with auto mode (auto-detecting
|
|
55
|
+
# or load sumstats with auto mode (auto-detecting commonly used headers)
|
|
35
56
|
# assuming ALT/A1 is EA, and frq is EAF
|
|
36
|
-
mysumstats = gl.Sumstats("
|
|
57
|
+
mysumstats = gl.Sumstats("sumstats.txt.gz", fmt="auto")
|
|
37
58
|
|
|
38
59
|
# or you can specify the columns:
|
|
39
|
-
mysumstats = gl.Sumstats("
|
|
60
|
+
mysumstats = gl.Sumstats("sumstats.txt.gz",
|
|
40
61
|
snpid="SNP",
|
|
41
62
|
chrom="CHR",
|
|
42
63
|
pos="POS",
|
|
43
64
|
ea="ALT",
|
|
44
65
|
nea="REF",
|
|
45
|
-
|
|
66
|
+
eaf="Frq",
|
|
46
67
|
beta="BETA",
|
|
47
68
|
se="SE",
|
|
48
69
|
p="P",
|
|
@@ -55,26 +76,9 @@ mysumstats.plot_mqq()
|
|
|
55
76
|
...
|
|
56
77
|
```
|
|
57
78
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
Create a Python 3.9 environment and install gwaslab using pip:
|
|
61
|
-
|
|
62
|
-
```
|
|
63
|
-
conda env create -n gwaslab_test -c conda-forge python=3.9
|
|
64
|
-
conda activate gwaslab
|
|
65
|
-
pip install gwaslab==3.4.45
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
or create a new environment using yml file [environment_3.4.40.yml](https://github.com/Cloufield/gwaslab/blob/main/environment_3.4.40.yml)
|
|
69
|
-
|
|
70
|
-
```
|
|
71
|
-
conda env create -n gwaslab -f environment_3.4.40.yml
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
### install using docker
|
|
79
|
+
## Documentation and tutorials
|
|
76
80
|
|
|
77
|
-
|
|
81
|
+
Documentation and tutorials for GWASLab are avaiable at [here](https://cloufield.github.io/gwaslab/).
|
|
78
82
|
|
|
79
83
|
## Functions
|
|
80
84
|
|
|
@@ -121,7 +125,7 @@ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main
|
|
|
121
125
|
- Scatter plot: allele frequency comparison
|
|
122
126
|
- Scatter plot: trumpet plot (plot of MAF and effect size with power lines)
|
|
123
127
|
|
|
124
|
-
|
|
128
|
+
#### Visualization Examples
|
|
125
129
|
|
|
126
130
|
<img width="600" alt="image" src="https://user-images.githubusercontent.com/40289485/233836639-34b03c47-5a59-4fd4-9677-5e13b02aab15.png">
|
|
127
131
|
<img width="600" alt="image" src="https://user-images.githubusercontent.com/40289485/197393168-e3e7076f-2801-4d66-9526-80778d44f3da.png">
|
|
@@ -137,42 +141,15 @@ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main
|
|
|
137
141
|
- Sumstats summary: give you a quick overview of the sumstats.
|
|
138
142
|
- ...
|
|
139
143
|
|
|
140
|
-
##
|
|
141
|
-
|
|
142
|
-
environment.yml
|
|
144
|
+
## Issues
|
|
143
145
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
channels:
|
|
147
|
-
- conda-forge
|
|
148
|
-
- defaults
|
|
149
|
-
dependencies:
|
|
150
|
-
- python=3.8.16=h7a1cb2a_3
|
|
151
|
-
- jupyter==1.0.0
|
|
152
|
-
- pip==23.1.2
|
|
153
|
-
- pip:
|
|
154
|
-
- adjusttext==0.8
|
|
155
|
-
- biopython==1.81
|
|
156
|
-
- gwaslab==3.4.16
|
|
157
|
-
- liftover==1.1.16
|
|
158
|
-
- matplotlib==3.7.1
|
|
159
|
-
- numpy==1.24.2
|
|
160
|
-
- pandas==1.4.4
|
|
161
|
-
- scikit-allel==1.3.5
|
|
162
|
-
- scikit-learn==1.2.2
|
|
163
|
-
- scipy==1.10.1
|
|
164
|
-
- seaborn==0.11.2
|
|
165
|
-
- statsmodels==0.13
|
|
166
|
-
- adjustText==0.8
|
|
167
|
-
- pysam==0.19
|
|
168
|
-
- pyensembl==2.2.3
|
|
169
|
-
- h5py==3.10.0
|
|
170
|
-
```
|
|
146
|
+
- GWASLab is currently under active development, with frequent updates.
|
|
147
|
+
- Note: Known issues are documented at https://cloufield.github.io/gwaslab/KnownIssues/.
|
|
171
148
|
|
|
172
149
|
## How to cite
|
|
173
150
|
- GWASLab preprint: He, Y., Koido, M., Shimmori, Y., Kamatani, Y. (2023). GWASLab: a Python package for processing and visualizing GWAS summary statistics. Preprint at Jxiv, 2023-5. https://doi.org/10.51094/jxiv.370
|
|
174
151
|
|
|
175
|
-
## Sample
|
|
152
|
+
## Sample data used for tutorial
|
|
176
153
|
- Sample GWAS data used in GWASLab is obtained from: http://jenger.riken.jp/ (Suzuki, Ken, et al. "Identification of 28 new susceptibility loci for type 2 diabetes in the Japanese population." Nature genetics 51.3 (2019): 379-386.).
|
|
177
154
|
|
|
178
155
|
## Acknowledgement
|
|
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
|
|
|
7
7
|
|
|
8
8
|
[project]
|
|
9
9
|
name = "gwaslab"
|
|
10
|
-
version = "3.
|
|
10
|
+
version = "3.6.0"
|
|
11
11
|
authors = [
|
|
12
12
|
{ name="Yunye", email="yunye@gwaslab.com" },
|
|
13
13
|
]
|
|
@@ -34,7 +34,7 @@ dependencies = [
|
|
|
34
34
|
requires-python = ">=3.9,<3.13"
|
|
35
35
|
classifiers = [
|
|
36
36
|
"Programming Language :: Python :: 3",
|
|
37
|
-
"License :: OSI Approved ::
|
|
37
|
+
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
|
38
38
|
"Operating System :: OS Independent",
|
|
39
39
|
]
|
|
40
40
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from gwaslab.g_Sumstats import Sumstats
|
|
2
|
+
from gwaslab.g_Sumstats_polars import Sumstatsp
|
|
2
3
|
from gwaslab.g_SumstatsT import SumstatsT
|
|
3
4
|
from gwaslab.g_SumstatsPair import SumstatsPair
|
|
5
|
+
from gwaslab.g_SumstatsMulti import SumstatsMulti
|
|
4
6
|
from gwaslab.util_in_convert_h2 import h2_obs_to_liab
|
|
5
7
|
from gwaslab.util_in_convert_h2 import _get_per_snp_r2
|
|
6
8
|
from gwaslab.util_in_convert_h2 import h2_se_to_p
|
|
@@ -337,6 +337,7 @@ def _maketrans(complement_mapping):
|
|
|
337
337
|
"""
|
|
338
338
|
keys = "".join(complement_mapping.keys()).encode("ASCII")
|
|
339
339
|
values = "".join(complement_mapping.values()).encode("ASCII")
|
|
340
|
+
|
|
340
341
|
return bytes.maketrans(keys + keys.lower(), values + values.lower())
|
|
341
342
|
|
|
342
343
|
####################################################################################################################
|
|
@@ -43,7 +43,6 @@ def gethapmap3(sumstats,rsid="rsID",chrom="CHR", pos="POS", ea="EA", nea="NEA",b
|
|
|
43
43
|
else:
|
|
44
44
|
additional_cols=[]
|
|
45
45
|
hapmap3_ref = pd.read_csv(data_path,sep="\s+",usecols=["#CHROM","POS","rsid"]+additional_cols, dtype={"#CHROM":"string","POS":"string"})
|
|
46
|
-
|
|
47
46
|
#rsid A1 A2 #CHROM POS
|
|
48
47
|
#rs3094315 G A 1 752566
|
|
49
48
|
|
|
@@ -237,6 +237,16 @@
|
|
|
237
237
|
"TotalSampleSize": "N",
|
|
238
238
|
"Nsample": "N",
|
|
239
239
|
"num_samples": "N",
|
|
240
|
+
"Neff": "N_EFF",
|
|
241
|
+
"N_EFF": "N_EFF",
|
|
242
|
+
"N_CASE": "N_CASE",
|
|
243
|
+
"Ncase": "N_CASE",
|
|
244
|
+
"ncase": "N_CASE",
|
|
245
|
+
"n_case": "N_CASE",
|
|
246
|
+
"Ncontrol": "N_CONTROL",
|
|
247
|
+
"N_control": "N_CONTROL",
|
|
248
|
+
"N_Control": "N_CONTROL",
|
|
249
|
+
"NCONTROL": "N_CONTROL",
|
|
240
250
|
"beta": "BETA",
|
|
241
251
|
"BETA": "BETA",
|
|
242
252
|
"Beta": "BETA",
|
|
@@ -461,6 +471,31 @@
|
|
|
461
471
|
"Direction": "DIRECTION"
|
|
462
472
|
}
|
|
463
473
|
},
|
|
474
|
+
"genomicsem": {
|
|
475
|
+
"meta_data": {
|
|
476
|
+
"format_name": "genomicSEM",
|
|
477
|
+
"format_source": "https://github.com/GenomicSEM/GenomicSEM/wiki/4.-Common-Factor-GWAS",
|
|
478
|
+
"format_source2": "https://github.com/GenomicSEM/GenomicSEM/wiki/5.-Multivariate-GWAS",
|
|
479
|
+
"format_version": 20241210
|
|
480
|
+
},
|
|
481
|
+
"format_dict": {
|
|
482
|
+
"SNP": "rsID",
|
|
483
|
+
"A2": "NEA",
|
|
484
|
+
"A1": "EA",
|
|
485
|
+
"Frq": "EAF",
|
|
486
|
+
"MAF": "MAF",
|
|
487
|
+
"N": "N",
|
|
488
|
+
"est": "BETA",
|
|
489
|
+
"se_c": "SE",
|
|
490
|
+
"Pval_Estimate": "P",
|
|
491
|
+
"Z_Estimate": "Z",
|
|
492
|
+
"Q": "Q",
|
|
493
|
+
"Q_df": "DOF",
|
|
494
|
+
"Q_pval": "P_HET",
|
|
495
|
+
"CHR": "CHR",
|
|
496
|
+
"BP": "POS"
|
|
497
|
+
}
|
|
498
|
+
},
|
|
464
499
|
"plink_fam": {
|
|
465
500
|
"meta_data": {
|
|
466
501
|
"format_name": "plink_fam",
|
|
@@ -880,6 +915,21 @@
|
|
|
880
915
|
"POS": "POS"
|
|
881
916
|
}
|
|
882
917
|
},
|
|
918
|
+
"mesusie": {
|
|
919
|
+
"meta_data": {
|
|
920
|
+
"format_name": "MESuSiE",
|
|
921
|
+
"format_source": "https://borangao.github.io/meSuSie_Analysis/installation.html",
|
|
922
|
+
"format_version": 20221109
|
|
923
|
+
},
|
|
924
|
+
"format_dict": {
|
|
925
|
+
"SNP": "SNPID",
|
|
926
|
+
"N": "N",
|
|
927
|
+
"Beta": "BETA",
|
|
928
|
+
"Se": "SE",
|
|
929
|
+
"Z": "Z",
|
|
930
|
+
"POS": "POS"
|
|
931
|
+
}
|
|
932
|
+
},
|
|
883
933
|
"plink2_linear": {
|
|
884
934
|
"meta_data": {
|
|
885
935
|
"format_name": "PLINK2 .glm.linear",
|
|
@@ -1032,6 +1082,34 @@
|
|
|
1032
1082
|
"ci_upper": "OR_95U"
|
|
1033
1083
|
}
|
|
1034
1084
|
},
|
|
1085
|
+
"ccgwas": {
|
|
1086
|
+
"meta_data": {
|
|
1087
|
+
"format_name": "CCGWAS",
|
|
1088
|
+
"format_source": "https://github.com/wouterpeyrot/CCGWAS",
|
|
1089
|
+
"format_version": "20220901",
|
|
1090
|
+
"last_check_date": "20250416",
|
|
1091
|
+
"format_separator": "\t",
|
|
1092
|
+
"format_citation": "Peyrot, W. J., & Price, A. L. (2021). Identifying loci with different allele frequencies among cases of eight psychiatric disorders using CC-GWAS. Nature genetics, 53(4), 445-454.",
|
|
1093
|
+
"format_comment": null,
|
|
1094
|
+
"format_na": null,
|
|
1095
|
+
"format_other_cols": [
|
|
1096
|
+
"Exact_beta",
|
|
1097
|
+
"Exact_se",
|
|
1098
|
+
"Exact_pval",
|
|
1099
|
+
"CCGWAS_signif"
|
|
1100
|
+
]
|
|
1101
|
+
},
|
|
1102
|
+
"format_dict": {
|
|
1103
|
+
"SNP": "SNPID",
|
|
1104
|
+
"CHR": "CHR",
|
|
1105
|
+
"BP": "POS",
|
|
1106
|
+
"A2": "NEA",
|
|
1107
|
+
"A1": "EA",
|
|
1108
|
+
"OLS_beta": "BETA",
|
|
1109
|
+
"OLS_se": "SE",
|
|
1110
|
+
"OLS_pval": "P"
|
|
1111
|
+
}
|
|
1112
|
+
},
|
|
1035
1113
|
"fastgwa": {
|
|
1036
1114
|
"meta_data": {
|
|
1037
1115
|
"format_name": "fastgwa",
|
|
@@ -103,7 +103,9 @@
|
|
|
103
103
|
"13to19":"https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/chm13v2-hg19.chain",
|
|
104
104
|
"13to38":"https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/chm13v2-grch38.chain",
|
|
105
105
|
"18to19":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz",
|
|
106
|
-
"18to38":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg38.over.chain.gz"
|
|
106
|
+
"18to38":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg38.over.chain.gz",
|
|
107
|
+
"1kg_hm3_hg38_eaf":"https://www.dropbox.com/scl/fi/ymkqfsaec6mwjzlvxsm45/PAN.hapmap3.hg38.EAF.tsv.gz?rlkey=p1auef5y1kk7ui41k6j3s8b0z&dl=1",
|
|
108
|
+
"1kg_hm3_hg19_eaf":"https://www.dropbox.com/scl/fi/dmv9wtfchv6ahim86d49r/PAN.hapmap3.hg19.EAF.tsv.gz?rlkey=ywne2gj1rlm2nj42q9lt2d99n&dl=1"
|
|
107
109
|
}
|
|
108
110
|
|
|
109
111
|
|