gwaslab 3.5.8__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/data/reference.json +3 -1
- gwaslab/g_Sumstats.py +13 -2
- gwaslab/g_meta.py +2 -1
- gwaslab/g_version.py +2 -2
- gwaslab/util_ex_infer_ancestry.py +65 -0
- gwaslab/util_ex_run_magma.py +74 -0
- gwaslab/viz_aux_annotate_plot.py +13 -2
- gwaslab/viz_plot_mqqplot.py +10 -3
- gwaslab/viz_plot_qqplot.py +3 -1
- {gwaslab-3.5.8.dist-info → gwaslab-3.6.0.dist-info}/METADATA +42 -66
- {gwaslab-3.5.8.dist-info → gwaslab-3.6.0.dist-info}/RECORD +15 -13
- {gwaslab-3.5.8.dist-info → gwaslab-3.6.0.dist-info}/WHEEL +1 -1
- {gwaslab-3.5.8.dist-info → gwaslab-3.6.0.dist-info}/licenses/LICENSE +0 -0
- {gwaslab-3.5.8.dist-info → gwaslab-3.6.0.dist-info}/licenses/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.5.8.dist-info → gwaslab-3.6.0.dist-info}/top_level.txt +0 -0
gwaslab/data/reference.json
CHANGED
|
@@ -103,7 +103,9 @@
|
|
|
103
103
|
"13to19":"https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/chm13v2-hg19.chain",
|
|
104
104
|
"13to38":"https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/chm13v2-grch38.chain",
|
|
105
105
|
"18to19":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz",
|
|
106
|
-
"18to38":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg38.over.chain.gz"
|
|
106
|
+
"18to38":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg38.over.chain.gz",
|
|
107
|
+
"1kg_hm3_hg38_eaf":"https://www.dropbox.com/scl/fi/ymkqfsaec6mwjzlvxsm45/PAN.hapmap3.hg38.EAF.tsv.gz?rlkey=p1auef5y1kk7ui41k6j3s8b0z&dl=1",
|
|
108
|
+
"1kg_hm3_hg19_eaf":"https://www.dropbox.com/scl/fi/dmv9wtfchv6ahim86d49r/PAN.hapmap3.hg19.EAF.tsv.gz?rlkey=ywne2gj1rlm2nj42q9lt2d99n&dl=1"
|
|
107
109
|
}
|
|
108
110
|
|
|
109
111
|
|
gwaslab/g_Sumstats.py
CHANGED
|
@@ -84,6 +84,8 @@ from gwaslab.util_ex_ldsc import _estimate_rg_by_ldsc
|
|
|
84
84
|
from gwaslab.util_ex_ldsc import _estimate_h2_cts_by_ldsc
|
|
85
85
|
from gwaslab.util_ex_ldsc import _estimate_partitioned_h2_by_ldsc
|
|
86
86
|
from gwaslab.util_ex_ldproxyfinder import _extract_ld_proxy
|
|
87
|
+
from gwaslab.util_ex_run_magma import _run_magma
|
|
88
|
+
from gwaslab.util_ex_infer_ancestry import _infer_ancestry
|
|
87
89
|
from gwaslab.bd_get_hapmap3 import gethapmap3
|
|
88
90
|
from gwaslab.util_abf_finemapping import abf_finemapping
|
|
89
91
|
from gwaslab.util_abf_finemapping import make_cs
|
|
@@ -674,6 +676,9 @@ class Sumstats():
|
|
|
674
676
|
fig,outliers = plotdaf(self.data, **kwargs)
|
|
675
677
|
return fig, outliers
|
|
676
678
|
|
|
679
|
+
def infer_ancestry(self, **kwargs):
|
|
680
|
+
self.meta["gwaslab"]["inferred_ancestry"] = _infer_ancestry(self.data, **kwargs)
|
|
681
|
+
|
|
677
682
|
def plot_gwheatmap(self, **kwargs):
|
|
678
683
|
fig = _gwheatmap(self.data, **kwargs)
|
|
679
684
|
return fig
|
|
@@ -882,8 +887,14 @@ class Sumstats():
|
|
|
882
887
|
if build is None:
|
|
883
888
|
build = self.meta["gwaslab"]["genome_build"]
|
|
884
889
|
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
|
|
885
|
-
_run_prscs(sst_file = insumstats[["rsID","CHR","POS","EA","NEA","BETA","SE"]],
|
|
886
|
-
|
|
890
|
+
_run_prscs(sst_file = insumstats[["rsID","CHR","POS","EA","NEA","BETA","SE"]],
|
|
891
|
+
log=self.log,
|
|
892
|
+
**kwargs)
|
|
893
|
+
|
|
894
|
+
def run_magma(self, build=None, verbose=True, **kwargs):
|
|
895
|
+
_run_magma(self.data,
|
|
896
|
+
study=self.meta["gwaslab"]["study_name"],
|
|
897
|
+
build=build, verbose=verbose, log=self.log, **kwargs)
|
|
887
898
|
## LDSC ##############################################################################################
|
|
888
899
|
def estimate_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, how="right", **kwargs):
|
|
889
900
|
if build is None:
|
gwaslab/g_meta.py
CHANGED
|
@@ -6,7 +6,7 @@ def _init_meta(object="Sumstats"):
|
|
|
6
6
|
"gwas_id":"Unknown",
|
|
7
7
|
"samples":{
|
|
8
8
|
"sample_size":"Unknown",
|
|
9
|
-
"sample_ancestry":"
|
|
9
|
+
"sample_ancestry":"Unknown",
|
|
10
10
|
"ancestry_method":"self-reported|genetically determined",
|
|
11
11
|
} ,
|
|
12
12
|
"trait_description":"Unknown",
|
|
@@ -37,6 +37,7 @@ def _init_meta(object="Sumstats"):
|
|
|
37
37
|
"species":"homo sapiens",
|
|
38
38
|
"genome_build":"99",
|
|
39
39
|
"sample_prevalence":"Unknown",
|
|
40
|
+
"inferred_ancestry":"Unknown",
|
|
40
41
|
"population_prevalence":"Unknown",
|
|
41
42
|
"variants":{
|
|
42
43
|
"variant_number":"Unknown",
|
gwaslab/g_version.py
CHANGED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from gwaslab.g_Log import Log
|
|
4
|
+
|
|
5
|
+
def _infer_ancestry(sumstats,
|
|
6
|
+
ancestry_af=None,
|
|
7
|
+
build="19",
|
|
8
|
+
log=Log(),
|
|
9
|
+
verbose=True):
|
|
10
|
+
log.write("Start to infer ancestry based on Fst...", verbose=verbose)
|
|
11
|
+
ref_af = pd.read_csv(ancestry_af, sep="\t")
|
|
12
|
+
|
|
13
|
+
data_af = pd.merge(sumstats[["CHR","POS","EA","NEA","EAF"]] ,ref_af,on=["CHR","POS"],how="inner")
|
|
14
|
+
|
|
15
|
+
log.write(f" -Estimating Fst using {len(data_af)} variants...", verbose=verbose)
|
|
16
|
+
|
|
17
|
+
is_filp = data_af["EA"] == data_af["ALT"]
|
|
18
|
+
data_af.loc[is_filp, ["EA","NEA"]] = data_af.loc[is_filp, ["NEA","EA"]]
|
|
19
|
+
data_af.loc[is_filp, "EAF"] = 1 - data_af.loc[is_filp, "EAF"]
|
|
20
|
+
|
|
21
|
+
headers = []
|
|
22
|
+
for i in ['GBR', 'FIN', 'CHS', 'PUR', 'CDX',
|
|
23
|
+
'CLM', 'IBS', 'PEL', 'PJL', 'KHV', 'ACB', 'GWD', 'ESN', 'BEB', 'MSL',
|
|
24
|
+
'STU', 'ITU', 'CEU', 'YRI', 'CHB', 'JPT', 'LWK', 'ASW', 'MXL', 'TSI',
|
|
25
|
+
'GIH', 'EUR', 'EAS', 'AMR', 'SAS', 'AFR']:
|
|
26
|
+
headers.append(f"FST_{i}")
|
|
27
|
+
data_af[f"FST_{i}"] = data_af.apply(lambda x: calculate_fst(x["EAF"], x[i]), axis=1)
|
|
28
|
+
|
|
29
|
+
for i,value in data_af[headers].mean().sort_values().items():
|
|
30
|
+
log.write( f" -{i} : {value}", verbose=verbose)
|
|
31
|
+
|
|
32
|
+
closest_ancestry = data_af[headers].mean().sort_values().idxmin()
|
|
33
|
+
|
|
34
|
+
log.write(f" -Closest Ancestry: {closest_ancestry.split('_')[1]}", verbose=verbose)
|
|
35
|
+
log.write("Finished inferring ancestry.", verbose=verbose)
|
|
36
|
+
return closest_ancestry.split("_")[1]
|
|
37
|
+
|
|
38
|
+
def calculate_fst(p_1, p_2):
|
|
39
|
+
# https://bios1140.github.io/understanding-fst-the-fixation-index.html
|
|
40
|
+
# calculate q1 and q2
|
|
41
|
+
q_1 = 1 - p_1
|
|
42
|
+
q_2 = 1 - p_2
|
|
43
|
+
|
|
44
|
+
# calculate total allele frequency
|
|
45
|
+
p_t = (p_1 + p_2)/2
|
|
46
|
+
q_t = 1 - p_t
|
|
47
|
+
|
|
48
|
+
# calculate expected heterozygosity
|
|
49
|
+
# first calculate expected heterozygosity for the two populations
|
|
50
|
+
# pop1
|
|
51
|
+
hs_1 = 2*p_1*q_1
|
|
52
|
+
# pop2
|
|
53
|
+
hs_2 = 2*p_2*q_2
|
|
54
|
+
# then take the mean of this
|
|
55
|
+
hs = (hs_1 + hs_2)/2
|
|
56
|
+
|
|
57
|
+
# next calculate expected heterozygosity for the metapopulations
|
|
58
|
+
ht = 2*p_t*q_t
|
|
59
|
+
|
|
60
|
+
# calculate fst
|
|
61
|
+
fst = (ht - hs)/ht
|
|
62
|
+
|
|
63
|
+
# return output
|
|
64
|
+
return fst
|
|
65
|
+
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import os
|
|
3
|
+
import gc
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import numpy as np
|
|
6
|
+
from gwaslab.g_Log import Log
|
|
7
|
+
from gwaslab.util_in_filter_value import _exclude_hla
|
|
8
|
+
|
|
9
|
+
def _run_magma(sumstats,
|
|
10
|
+
magma="magma",
|
|
11
|
+
study="Study1",
|
|
12
|
+
exclude_hla=True,
|
|
13
|
+
window="35,10",
|
|
14
|
+
id_to_use="rsID",
|
|
15
|
+
ref=None,
|
|
16
|
+
ncbi=None,
|
|
17
|
+
set_annot=None,
|
|
18
|
+
out="./",
|
|
19
|
+
delete=True,
|
|
20
|
+
ncol="N",
|
|
21
|
+
build="19",
|
|
22
|
+
log=Log(),
|
|
23
|
+
verbose=True):
|
|
24
|
+
|
|
25
|
+
log.write(" Start to run magma from command line:", verbose=verbose)
|
|
26
|
+
|
|
27
|
+
if exclude_hla==True:
|
|
28
|
+
sumstats = _exclude_hla(sumstats, build =build)
|
|
29
|
+
|
|
30
|
+
snploc="{}{}.rsid.chr.pos.tsv".format(out,study)
|
|
31
|
+
pval="{}{}.rsid.p.n.tsv".format(out, study)
|
|
32
|
+
|
|
33
|
+
log.write(f" -writing temp file for --snp-loc:{snploc}", verbose=verbose)
|
|
34
|
+
sumstats.dropna()[[id_to_use,"CHR","POS"]].rename(columns={id_to_use:"SNP"}).to_csv("{}{}.rsid.chr.pos.tsv".format(out,study),index=None, sep="\t")
|
|
35
|
+
|
|
36
|
+
log.write(f" -writing temp file for --pval:{pval}", verbose=verbose)
|
|
37
|
+
sumstats.dropna()[[id_to_use,"P","N"]].rename(columns={id_to_use:"SNP"}).to_csv("{}{}.rsid.p.n.tsv".format(out,study),index=None, sep="\t")
|
|
38
|
+
|
|
39
|
+
log.write(f" --annotate window: {window}", verbose=verbose)
|
|
40
|
+
log.write(f" --gene-loc: {ncbi}", verbose=verbose)
|
|
41
|
+
log.write(f" --bfile: {ref}", verbose=verbose)
|
|
42
|
+
log.write(f" Output prefix: {out}", verbose=verbose)
|
|
43
|
+
|
|
44
|
+
bash_script=f'''
|
|
45
|
+
|
|
46
|
+
#!/bin/bash
|
|
47
|
+
|
|
48
|
+
{magma} --annotate window={window} --snp-loc {snploc} --gene-loc {ncbi} --out {study}
|
|
49
|
+
|
|
50
|
+
{magma} --bfile {ref} --pval {pval} ncol={ncol} --gene-annot {study}.genes.annot --out {study}
|
|
51
|
+
|
|
52
|
+
'''
|
|
53
|
+
|
|
54
|
+
if set_annot is not None:
|
|
55
|
+
bash_script+=f'''
|
|
56
|
+
{magma} --gene-results {study}.genes.raw --set-annot {set_annot} --out {study}
|
|
57
|
+
'''
|
|
58
|
+
log.write(f"Script: {bash_script}")
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
log.write(" Running magma from command line...", verbose=verbose)
|
|
62
|
+
output = subprocess.check_output(bash_script, stderr=subprocess.STDOUT, shell=True,text=True)
|
|
63
|
+
output = output + "\n"
|
|
64
|
+
|
|
65
|
+
if delete == True:
|
|
66
|
+
os.remove(snploc)
|
|
67
|
+
os.remove(pval)
|
|
68
|
+
|
|
69
|
+
except subprocess.CalledProcessError as e:
|
|
70
|
+
log.warning("ERROR!")
|
|
71
|
+
log.write(e.output)
|
|
72
|
+
|
|
73
|
+
log.write("Finished running magma.", verbose=verbose)
|
|
74
|
+
|
gwaslab/viz_aux_annotate_plot.py
CHANGED
|
@@ -26,6 +26,7 @@ def annotate_single(
|
|
|
26
26
|
anno_alias,
|
|
27
27
|
anno_style,
|
|
28
28
|
anno_args,
|
|
29
|
+
anno_args_single,
|
|
29
30
|
arm_scale,
|
|
30
31
|
anno_max_iter,
|
|
31
32
|
arm_scale_d,
|
|
@@ -216,13 +217,23 @@ def annotate_single(
|
|
|
216
217
|
if anno_style == "tight" :
|
|
217
218
|
anno_default["rotation"] = 90
|
|
218
219
|
################################################################################################################################
|
|
219
|
-
|
|
220
|
+
# anno args for all
|
|
220
221
|
for key,value in anno_args.items():
|
|
221
222
|
anno_default[key]=value
|
|
222
|
-
|
|
223
|
+
|
|
224
|
+
# anno args for highlight group
|
|
225
|
+
if len(highlight_i) >0:
|
|
223
226
|
if row["i"] in highlight_i:
|
|
224
227
|
for key,value in highlight_anno_args.items():
|
|
225
228
|
anno_default[key]=value
|
|
229
|
+
|
|
230
|
+
# anno args for specifc
|
|
231
|
+
#try:
|
|
232
|
+
if row[snpid] in anno_args_single.keys():
|
|
233
|
+
for key,value in anno_args_single[row[snpid]].items():
|
|
234
|
+
anno_default[key]=value
|
|
235
|
+
#except:
|
|
236
|
+
# pass
|
|
226
237
|
################################################################################################################################
|
|
227
238
|
if anno_adjust==True:
|
|
228
239
|
if _invert==False:
|
gwaslab/viz_plot_mqqplot.py
CHANGED
|
@@ -154,6 +154,7 @@ def mqqplot(insumstats,
|
|
|
154
154
|
anno_alias=None,
|
|
155
155
|
anno_d=None,
|
|
156
156
|
anno_args=None,
|
|
157
|
+
anno_args_single=None,
|
|
157
158
|
anno_style="right",
|
|
158
159
|
anno_fixed_arm_length=None,
|
|
159
160
|
anno_source = "ensembl",
|
|
@@ -209,7 +210,9 @@ def mqqplot(insumstats,
|
|
|
209
210
|
drop_chr_start=False,
|
|
210
211
|
title =None,
|
|
211
212
|
mtitle=None,
|
|
213
|
+
mtitle_pad=1.08,
|
|
212
214
|
qtitle=None,
|
|
215
|
+
qtitle_pad=1.08,
|
|
213
216
|
ylabel=None,
|
|
214
217
|
xlabel=None,
|
|
215
218
|
title_pad=1.08,
|
|
@@ -256,6 +259,7 @@ def mqqplot(insumstats,
|
|
|
256
259
|
anno_alias = _update_args(anno_alias, dict())
|
|
257
260
|
anno_d = _update_args(anno_d,dict())
|
|
258
261
|
anno_args = _update_args(anno_args,dict())
|
|
262
|
+
anno_args_single = _update_args(anno_args_single,dict())
|
|
259
263
|
arrow_kwargs = _update_args(arrow_kwargs,dict())
|
|
260
264
|
|
|
261
265
|
colors = _update_arg(colors, ["#597FBD","#74BAD3"])
|
|
@@ -1002,7 +1006,7 @@ def mqqplot(insumstats,
|
|
|
1002
1006
|
|
|
1003
1007
|
|
|
1004
1008
|
if mtitle and anno and len(to_annotate)>0:
|
|
1005
|
-
pad=(ax1.transData.transform((skip,
|
|
1009
|
+
pad=(ax1.transData.transform((skip, mtitle_pad*maxy))[1]-ax1.transData.transform((skip, maxy)))[1]
|
|
1006
1010
|
ax1.set_title(mtitle,pad=pad,fontsize=title_fontsize,family=font_family)
|
|
1007
1011
|
elif mtitle:
|
|
1008
1012
|
ax1.set_title(mtitle,fontsize=title_fontsize,family=font_family)
|
|
@@ -1023,6 +1027,7 @@ def mqqplot(insumstats,
|
|
|
1023
1027
|
anno_alias=anno_alias,
|
|
1024
1028
|
anno_style=anno_style,
|
|
1025
1029
|
anno_args=anno_args,
|
|
1030
|
+
anno_args_single=anno_args_single,
|
|
1026
1031
|
arm_scale=arm_scale,
|
|
1027
1032
|
anno_max_iter=anno_max_iter,
|
|
1028
1033
|
arm_scale_d=arm_scale_d,
|
|
@@ -1074,6 +1079,7 @@ def mqqplot(insumstats,
|
|
|
1074
1079
|
fontsize=fontsize,
|
|
1075
1080
|
font_family=font_family,
|
|
1076
1081
|
qtitle=qtitle,
|
|
1082
|
+
qtitle_pad=qtitle_pad,
|
|
1077
1083
|
title_fontsize=title_fontsize,
|
|
1078
1084
|
include_chrXYMT=include_chrXYMT,
|
|
1079
1085
|
cut_line_color=cut_line_color,
|
|
@@ -1106,9 +1112,10 @@ def mqqplot(insumstats,
|
|
|
1106
1112
|
# Titles
|
|
1107
1113
|
if title and anno and len(to_annotate)>0:
|
|
1108
1114
|
# increase height if annotation
|
|
1109
|
-
fig.suptitle(title , fontsize = title_fontsize ,x=0.5, y=
|
|
1115
|
+
fig.suptitle(title , fontsize = title_fontsize ,x=0.5, y=title_pad)
|
|
1110
1116
|
else:
|
|
1111
|
-
|
|
1117
|
+
title_pad = title_pad -0.05
|
|
1118
|
+
fig.suptitle(title , fontsize = title_fontsize, x=0.5,y=title_pad)
|
|
1112
1119
|
## Add annotation arrows and texts
|
|
1113
1120
|
|
|
1114
1121
|
# Saving figure
|
gwaslab/viz_plot_qqplot.py
CHANGED
|
@@ -30,6 +30,7 @@ def _plot_qq(
|
|
|
30
30
|
fontsize,
|
|
31
31
|
font_family,
|
|
32
32
|
qtitle,
|
|
33
|
+
qtitle_pad,
|
|
33
34
|
title_fontsize,
|
|
34
35
|
include_chrXYMT,
|
|
35
36
|
cut_line_color,
|
|
@@ -182,7 +183,8 @@ def _plot_qq(
|
|
|
182
183
|
ax2.tick_params(axis='both', which='both', labelsize=fontsize,labelfontfamily=font_family)
|
|
183
184
|
#
|
|
184
185
|
if qtitle:
|
|
185
|
-
ax2.
|
|
186
|
+
pad=(ax2.transData.transform((skip, qtitle_pad*maxy))[1]-ax2.transData.transform((skip, maxy)))[1]
|
|
187
|
+
ax2.set_title(qtitle,fontsize=title_fontsize,pad=pad,family=font_family)
|
|
186
188
|
|
|
187
189
|
log.write("Finished creating QQ plot successfully!",verbose=verbose)
|
|
188
190
|
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gwaslab
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.6.0
|
|
4
4
|
Summary: A collection of handy tools for GWAS SumStats
|
|
5
5
|
Author-email: Yunye <yunye@gwaslab.com>
|
|
6
6
|
Project-URL: Homepage, https://cloufield.github.io/gwaslab/
|
|
7
7
|
Project-URL: Github, https://github.com/Cloufield/gwaslab
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved ::
|
|
9
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
11
|
Requires-Python: <3.13,>=3.9
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
@@ -36,43 +36,63 @@ Dynamic: license-file
|
|
|
36
36
|

|
|
37
37
|

|
|
38
38
|
|
|
39
|
-
* A handy Python toolkit for handling GWAS summary statistics (sumstats).
|
|
39
|
+
* A handy Python-based toolkit for handling GWAS summary statistics (sumstats).
|
|
40
40
|
* Each process is modularized and can be customized to your needs.
|
|
41
41
|
* Sumstats-specific manipulations are designed as methods of a Python object, `gwaslab.Sumstats`.
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
## Installation
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
### install via pip
|
|
46
46
|
|
|
47
|
-
|
|
47
|
+
The latest version of GWASLab now supports Python 3.9, 3.10, 3.11, and 3.12.
|
|
48
48
|
|
|
49
|
-
|
|
49
|
+
```bash
|
|
50
|
+
pip install gwaslab
|
|
51
|
+
```
|
|
50
52
|
|
|
51
|
-
### install
|
|
53
|
+
### install in conda environment
|
|
52
54
|
|
|
53
|
-
|
|
55
|
+
Create a Python 3.9, 3.10, 3.11 or 3.12 environment and install gwaslab using pip:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
conda env create -n gwaslab -c conda-forge python=3.12
|
|
59
|
+
|
|
60
|
+
conda activate gwaslab
|
|
54
61
|
|
|
62
|
+
pip install gwaslab
|
|
55
63
|
```
|
|
56
|
-
|
|
64
|
+
|
|
65
|
+
or create a new environment using yml file [environment.yml](https://github.com/Cloufield/gwaslab/blob/main/environment.yml)
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
conda env create -n gwaslab -f environment.yml
|
|
57
69
|
```
|
|
58
70
|
|
|
71
|
+
### install using docker (deprecated)
|
|
72
|
+
|
|
73
|
+
A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main/docker/Dockerfile) for building local images.
|
|
74
|
+
|
|
75
|
+
## Quick start
|
|
76
|
+
|
|
59
77
|
```python
|
|
78
|
+
|
|
60
79
|
import gwaslab as gl
|
|
80
|
+
|
|
61
81
|
# load plink2 output
|
|
62
|
-
mysumstats = gl.Sumstats("
|
|
82
|
+
mysumstats = gl.Sumstats("sumstats.txt.gz", fmt="plink2")
|
|
63
83
|
|
|
64
|
-
# load sumstats with auto mode (auto-detecting
|
|
84
|
+
# or load sumstats with auto mode (auto-detecting commonly used headers)
|
|
65
85
|
# assuming ALT/A1 is EA, and frq is EAF
|
|
66
|
-
mysumstats = gl.Sumstats("
|
|
86
|
+
mysumstats = gl.Sumstats("sumstats.txt.gz", fmt="auto")
|
|
67
87
|
|
|
68
88
|
# or you can specify the columns:
|
|
69
|
-
mysumstats = gl.Sumstats("
|
|
89
|
+
mysumstats = gl.Sumstats("sumstats.txt.gz",
|
|
70
90
|
snpid="SNP",
|
|
71
91
|
chrom="CHR",
|
|
72
92
|
pos="POS",
|
|
73
93
|
ea="ALT",
|
|
74
94
|
nea="REF",
|
|
75
|
-
|
|
95
|
+
eaf="Frq",
|
|
76
96
|
beta="BETA",
|
|
77
97
|
se="SE",
|
|
78
98
|
p="P",
|
|
@@ -85,26 +105,9 @@ mysumstats.plot_mqq()
|
|
|
85
105
|
...
|
|
86
106
|
```
|
|
87
107
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
Create a Python 3.9 environment and install gwaslab using pip:
|
|
91
|
-
|
|
92
|
-
```
|
|
93
|
-
conda env create -n gwaslab_test -c conda-forge python=3.9
|
|
94
|
-
conda activate gwaslab
|
|
95
|
-
pip install gwaslab==3.4.45
|
|
96
|
-
```
|
|
97
|
-
|
|
98
|
-
or create a new environment using yml file [environment_3.4.40.yml](https://github.com/Cloufield/gwaslab/blob/main/environment_3.4.40.yml)
|
|
99
|
-
|
|
100
|
-
```
|
|
101
|
-
conda env create -n gwaslab -f environment_3.4.40.yml
|
|
102
|
-
```
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
### install using docker
|
|
108
|
+
## Documentation and tutorials
|
|
106
109
|
|
|
107
|
-
|
|
110
|
+
Documentation and tutorials for GWASLab are avaiable at [here](https://cloufield.github.io/gwaslab/).
|
|
108
111
|
|
|
109
112
|
## Functions
|
|
110
113
|
|
|
@@ -151,7 +154,7 @@ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main
|
|
|
151
154
|
- Scatter plot: allele frequency comparison
|
|
152
155
|
- Scatter plot: trumpet plot (plot of MAF and effect size with power lines)
|
|
153
156
|
|
|
154
|
-
|
|
157
|
+
#### Visualization Examples
|
|
155
158
|
|
|
156
159
|
<img width="600" alt="image" src="https://user-images.githubusercontent.com/40289485/233836639-34b03c47-5a59-4fd4-9677-5e13b02aab15.png">
|
|
157
160
|
<img width="600" alt="image" src="https://user-images.githubusercontent.com/40289485/197393168-e3e7076f-2801-4d66-9526-80778d44f3da.png">
|
|
@@ -167,42 +170,15 @@ A docker file is available [here](https://github.com/Cloufield/gwaslab/blob/main
|
|
|
167
170
|
- Sumstats summary: give you a quick overview of the sumstats.
|
|
168
171
|
- ...
|
|
169
172
|
|
|
170
|
-
##
|
|
171
|
-
|
|
172
|
-
environment.yml
|
|
173
|
+
## Issues
|
|
173
174
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
channels:
|
|
177
|
-
- conda-forge
|
|
178
|
-
- defaults
|
|
179
|
-
dependencies:
|
|
180
|
-
- python=3.8.16=h7a1cb2a_3
|
|
181
|
-
- jupyter==1.0.0
|
|
182
|
-
- pip==23.1.2
|
|
183
|
-
- pip:
|
|
184
|
-
- adjusttext==0.8
|
|
185
|
-
- biopython==1.81
|
|
186
|
-
- gwaslab==3.4.16
|
|
187
|
-
- liftover==1.1.16
|
|
188
|
-
- matplotlib==3.7.1
|
|
189
|
-
- numpy==1.24.2
|
|
190
|
-
- pandas==1.4.4
|
|
191
|
-
- scikit-allel==1.3.5
|
|
192
|
-
- scikit-learn==1.2.2
|
|
193
|
-
- scipy==1.10.1
|
|
194
|
-
- seaborn==0.11.2
|
|
195
|
-
- statsmodels==0.13
|
|
196
|
-
- adjustText==0.8
|
|
197
|
-
- pysam==0.19
|
|
198
|
-
- pyensembl==2.2.3
|
|
199
|
-
- h5py==3.10.0
|
|
200
|
-
```
|
|
175
|
+
- GWASLab is currently under active development, with frequent updates.
|
|
176
|
+
- Note: Known issues are documented at https://cloufield.github.io/gwaslab/KnownIssues/.
|
|
201
177
|
|
|
202
178
|
## How to cite
|
|
203
179
|
- GWASLab preprint: He, Y., Koido, M., Shimmori, Y., Kamatani, Y. (2023). GWASLab: a Python package for processing and visualizing GWAS summary statistics. Preprint at Jxiv, 2023-5. https://doi.org/10.51094/jxiv.370
|
|
204
180
|
|
|
205
|
-
## Sample
|
|
181
|
+
## Sample data used for tutorial
|
|
206
182
|
- Sample GWAS data used in GWASLab is obtained from: http://jenger.riken.jp/ (Suzuki, Ken, et al. "Identification of 28 new susceptibility loci for type 2 diabetes in the Japanese population." Nature genetics 51.3 (2019): 379-386.).
|
|
207
183
|
|
|
208
184
|
## Acknowledgement
|
|
@@ -6,7 +6,7 @@ gwaslab/bd_get_hapmap3.py,sha256=qWTvIRZsd7F3nT9sN2NSXUsxZJRf5k4HLgJ6kN0qaUc,410
|
|
|
6
6
|
gwaslab/cache_manager.py,sha256=HOTnSkCOyGEPLRl90WT8D_6pAdI8d8AzenMIDGuCeWc,28113
|
|
7
7
|
gwaslab/g_Log.py,sha256=C3Zv-_6c3C9ms8bgQ-ytplz22sjk7euqXYkWr9zNeAs,1573
|
|
8
8
|
gwaslab/g_Phenotypes.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
gwaslab/g_Sumstats.py,sha256=
|
|
9
|
+
gwaslab/g_Sumstats.py,sha256=l1zmEOifAZvnv7BoLzyLcTV6Yp1KX5n5HH930GQ9xlE,43502
|
|
10
10
|
gwaslab/g_SumstatsMulti.py,sha256=2y8brnKAs5I9tUexQJiFwOsFLC4MyACn7IybvvXsRDU,13828
|
|
11
11
|
gwaslab/g_SumstatsPair.py,sha256=PmVPGU32degDuU5RDG9igyT1sFAbEDh6alrRulUphyk,13181
|
|
12
12
|
gwaslab/g_SumstatsSet.py,sha256=AiTISWPfmu8NTGa8j9Yuts8DNw1pEUENYyPoS0HXp5I,29866
|
|
@@ -14,11 +14,11 @@ gwaslab/g_SumstatsT.py,sha256=u_DighLMnMxwTLnqm-B58pA0G6WXRj6pudPyKMVKjSU,2133
|
|
|
14
14
|
gwaslab/g_Sumstats_polars.py,sha256=F9d8y-6SJLRibrZrvWtwE2XpxZdG3A9DqqTT8TW4sS0,8528
|
|
15
15
|
gwaslab/g_Sumstats_summary.py,sha256=FECvvFXJVKaCX5dggBvvk9YvJ6AbdbcLfjltysX7wEE,6380
|
|
16
16
|
gwaslab/g_headers.py,sha256=B0bTVqzf2pdUwAX1U8MoYDO7nQEKAtgVnn-9bVXsJGE,6766
|
|
17
|
-
gwaslab/g_meta.py,sha256=
|
|
17
|
+
gwaslab/g_meta.py,sha256=pXtSsQfFPv2UJmOxKMmLtcsp_Ku_H73YP7PnlfMd0sg,6472
|
|
18
18
|
gwaslab/g_meta_update.py,sha256=dWgz4kcq9bvsXycCjfhoYtcJHlEP4HRdYoQ8KFqMyTQ,2490
|
|
19
19
|
gwaslab/g_vchange_status.py,sha256=w3zsYYOcCaI3PTeboonvkQjudzUAfVIgATzRdiPViZs,1939
|
|
20
20
|
gwaslab/g_vchange_status_polars.py,sha256=kxyGQCur0ibVFBCyZghA-XNf_kLDXKK-l7VC-Om2IdA,1839
|
|
21
|
-
gwaslab/g_version.py,sha256
|
|
21
|
+
gwaslab/g_version.py,sha256=o8J0x9TeG_k6wUFeFHrx1HQA4JK3mxYQakjlpBACIX8,1889
|
|
22
22
|
gwaslab/hm_casting.py,sha256=xoq1E4Tp5VC4aLWfq9-_AfiQzb1WZAHrnZG33W4sCOE,14178
|
|
23
23
|
gwaslab/hm_casting_polars.py,sha256=_3ZeMg3mRsa7vHpxObFErOledpaydhC_PxStcwlYO2c,8371
|
|
24
24
|
gwaslab/hm_harmonize_sumstats.py,sha256=2MeWnWmMHpLWjUMTVqExpg_3mE1VIaBsh6Mz3ffJCMc,84761
|
|
@@ -51,6 +51,7 @@ gwaslab/util_abf_finemapping.py,sha256=LRcopjtkT-iXtKPAJIzR4qjPdhD7nrS_BGit4EW89
|
|
|
51
51
|
gwaslab/util_ex_calculate_ldmatrix.py,sha256=TeXazNXInGYrBR58Q3-f51yrHz5-_A3tXNieYJA2-i8,16924
|
|
52
52
|
gwaslab/util_ex_calculate_prs.py,sha256=9uJ588Sdj4V0vw3OZ9NeLECwOvW67f0IdLandVPS5RY,9442
|
|
53
53
|
gwaslab/util_ex_gwascatalog.py,sha256=RZ_qIsfjONUbF3AGMp-0feTBubOEEZUufYGdvv0Sk7g,9211
|
|
54
|
+
gwaslab/util_ex_infer_ancestry.py,sha256=AXLEhuJUBT6XIYjADB-envFRGIZ-misnegGdtK8SsFc,2192
|
|
54
55
|
gwaslab/util_ex_ldproxyfinder.py,sha256=HadmnjEv5EvZCHR5SFSAbqxZWJryEErJftCS0WhZUbs,16898
|
|
55
56
|
gwaslab/util_ex_ldsc.py,sha256=uEVrsGrT5A-ni7Vw7esvqQ7NwiR1P10Q_N1_uZNRzRY,21600
|
|
56
57
|
gwaslab/util_ex_match_ldmatrix.py,sha256=8nFziC8LiUy83bG9vW_u_CzbZek-9R9eVIfFdRBDkrA,17495
|
|
@@ -62,6 +63,7 @@ gwaslab/util_ex_run_ccgwas.py,sha256=8uOAaJ44zAhlxj_A2vth5AzRLk2_BYlYBXEccFbB6T0
|
|
|
62
63
|
gwaslab/util_ex_run_clumping.py,sha256=yvUhKi83KhXfE5yPy2i47B58BMACZ_r5gt3-uN8Znbo,7908
|
|
63
64
|
gwaslab/util_ex_run_coloc.py,sha256=evLJ_ct_RV7UKLnYGJ_VfT6MeuGeYDtj8M8xqqk-X-U,6297
|
|
64
65
|
gwaslab/util_ex_run_hyprcoloc.py,sha256=sElGLXsBKx3PpDTOET7ulvjBShtDOJ3OyDuOp3Yl_y0,4722
|
|
66
|
+
gwaslab/util_ex_run_magma.py,sha256=0vUac7iVySnRx8QoYBncSfIEke5MwTfmYzDLcSmw1Us,2439
|
|
65
67
|
gwaslab/util_ex_run_mesusie.py,sha256=g0x7-zwoN9fbHYJQAue3vAMIeFu3x8H9KjgiYI1u9Mk,5532
|
|
66
68
|
gwaslab/util_ex_run_mtag.py,sha256=TxfDwjbGy5C1SiTVDMNopcSjl9p5mgCjJVBfeLq__bg,2952
|
|
67
69
|
gwaslab/util_ex_run_prscs.py,sha256=_BOUqX3fgALdGx-xDyWlREXCafZZddhcK8ufoESUaac,3396
|
|
@@ -79,7 +81,7 @@ gwaslab/util_in_merge.py,sha256=KB5VKRTUUZ1XGYUxgCP_l3cWYanjthMdwiZ_DincCZQ,2020
|
|
|
79
81
|
gwaslab/util_in_meta.py,sha256=3OxpnigfaShV7995-1HCbGCwdIjSbsDhC2X7hnT28jo,18712
|
|
80
82
|
gwaslab/util_in_meta_polars.py,sha256=i_2DGSDX8KlR3gtuDPXw88a_rbGYWIb58J1MGDMbJx8,8426
|
|
81
83
|
gwaslab/util_in_snphwe.py,sha256=-KpIDx6vn_nah6H55IkV2OyjXQVXV13XyBL069WE1wM,1751
|
|
82
|
-
gwaslab/viz_aux_annotate_plot.py,sha256=
|
|
84
|
+
gwaslab/viz_aux_annotate_plot.py,sha256=IyGhqFQofrVW9A8UxJ5oNVH96czQ-2_i4s1lVvf2hww,25696
|
|
83
85
|
gwaslab/viz_aux_chromatin.py,sha256=aWZaXOSvGyZY7wQcoFDaqHRYCSHZbi_K4Q70HruN9ts,4125
|
|
84
86
|
gwaslab/viz_aux_property.py,sha256=UIaivghnLXYpTwkKnXRK0F28Jbn9L6OaICk3K73WZaU,33
|
|
85
87
|
gwaslab/viz_aux_quickfix.py,sha256=cGX5i3WBmvKIiqck8V00caDg-pvKOO709Ux3DBXsUrM,18693
|
|
@@ -92,9 +94,9 @@ gwaslab/viz_plot_effect.py,sha256=qbM6c1IB2HlUlMNgFZlJ5G8ODQJ8-oSWD8t0Q8DDuz8,10
|
|
|
92
94
|
gwaslab/viz_plot_forestplot.py,sha256=xgOnefh737CgdQxu5naVyRNBX1NQXPFKzf51fbh6afs,6771
|
|
93
95
|
gwaslab/viz_plot_miamiplot.py,sha256=rCFEp7VNuVqeBBG3WRkmFAtFklbF79BvIQQYiSY70VY,31238
|
|
94
96
|
gwaslab/viz_plot_miamiplot2.py,sha256=eVS2v1YFEO0xZ-2zs_DCCvEx-Hqbt0wQkbgCHiRfqeE,16247
|
|
95
|
-
gwaslab/viz_plot_mqqplot.py,sha256=
|
|
97
|
+
gwaslab/viz_plot_mqqplot.py,sha256=mlcsA_wEnv7N9mP6CVL15WXWHJ0-8TwqHuznJXgPwCE,71103
|
|
96
98
|
gwaslab/viz_plot_phe_heatmap.py,sha256=qoXVeFTIm-n8IinNbDdPFVBSz2yGCGK6QzTstXv6aj4,9532
|
|
97
|
-
gwaslab/viz_plot_qqplot.py,sha256=
|
|
99
|
+
gwaslab/viz_plot_qqplot.py,sha256=cB4vRlFv69zWY9NMLfSkfAbirYp3_EEW2kQiBTEMDoc,7483
|
|
98
100
|
gwaslab/viz_plot_regional2.py,sha256=BoL1V56ww9B2_vFkABgln_f6OrzobiFjUISI5X6XXMM,43146
|
|
99
101
|
gwaslab/viz_plot_regionalplot.py,sha256=8u-5-yfy-UaXhaxVVz3Y5k2kBAoqzczUw1hyyD450iI,37983
|
|
100
102
|
gwaslab/viz_plot_rg_heatmap.py,sha256=z-G4gxK5-H_e13jV8RQnNzXPrKSQ0c7q41-KpMrA-cs,13861
|
|
@@ -102,16 +104,16 @@ gwaslab/viz_plot_scatter_with_reg.py,sha256=PmUZDQl2q4Dme3HLPXEwf_TrMjwJADA-uFXN
|
|
|
102
104
|
gwaslab/viz_plot_stackedregional.py,sha256=Jzu5NvX45L4iTLfzXjNkxeaA9SGggtGGxEJdGUzSsuU,19163
|
|
103
105
|
gwaslab/viz_plot_trumpetplot.py,sha256=y4sAFjzMaSLuWrdr9_ao-wPYCK5DlP2ykiqulWsoN_k,42680
|
|
104
106
|
gwaslab/data/formatbook.json,sha256=j6XRTlJvqex4HLBtGLhiaU8JV4_cD1LWl9uqB3Oou4s,40718
|
|
105
|
-
gwaslab/data/reference.json,sha256=
|
|
107
|
+
gwaslab/data/reference.json,sha256=oaE_HLFeYuEpcZ_rCGY3GwiAJWsDxRtjJOAc188WG28,12620
|
|
106
108
|
gwaslab/data/chrx_par/chrx_par_hg19.bed.gz,sha256=LocZg_ozhZjQiIpgWCO4EYCW9xgkEKpRy1m-YdIpzQs,83
|
|
107
109
|
gwaslab/data/chrx_par/chrx_par_hg38.bed.gz,sha256=VFW11MnQVC-Iu-ZGvUDcEhVpb-HVRsVTg-W-GNJyxP4,82
|
|
108
110
|
gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz,sha256=qD9RsC5S2h6l-OdpW09DWXHIi2kcPebctMnhxt8mzfU,10282886
|
|
109
111
|
gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz,sha256=Y8ZT2FIAhbhlgCJdE9qQVAiwnV_fcsPt72usBa7RSBM,10225828
|
|
110
112
|
gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz,sha256=R7IkssKu0L4WwkU9SrS84xCMdrkkKL0gnTNO_OKbG0Y,219
|
|
111
113
|
gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz,sha256=76CIU0pibDJ72Y6UY-TbIKE9gEPwTELAaIbCXyjm80Q,470
|
|
112
|
-
gwaslab-3.
|
|
113
|
-
gwaslab-3.
|
|
114
|
-
gwaslab-3.
|
|
115
|
-
gwaslab-3.
|
|
116
|
-
gwaslab-3.
|
|
117
|
-
gwaslab-3.
|
|
114
|
+
gwaslab-3.6.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
115
|
+
gwaslab-3.6.0.dist-info/licenses/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
|
|
116
|
+
gwaslab-3.6.0.dist-info/METADATA,sha256=1zEw4VZiB6U6BVKjPqj2NkOSZtq1K9Q6WhpUcVlwigk,7020
|
|
117
|
+
gwaslab-3.6.0.dist-info/WHEEL,sha256=GHB6lJx2juba1wDgXDNlMTyM13ckjBMKf-OnwgKOCtA,91
|
|
118
|
+
gwaslab-3.6.0.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
|
|
119
|
+
gwaslab-3.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|