gwaslab 3.4.46__py3-none-any.whl → 3.4.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/g_Sumstats.py +12 -2
- gwaslab/g_version.py +7 -7
- gwaslab/io_read_ldsc.py +16 -2
- gwaslab/io_to_formats.py +8 -3
- gwaslab/qc_fix_sumstats.py +5 -2
- gwaslab/util_abf_finemapping.py +67 -0
- gwaslab/util_ex_calculate_ldmatrix.py +20 -7
- gwaslab/util_ex_calculate_prs.py +13 -7
- gwaslab/util_ex_ldsc.py +8 -1
- gwaslab/util_ex_process_ref.py +22 -11
- gwaslab/util_ex_run_clumping.py +6 -6
- gwaslab/viz_aux_annotate_plot.py +2 -1
- gwaslab/viz_aux_chromatin.py +4 -3
- gwaslab/viz_aux_quickfix.py +2 -1
- gwaslab/viz_plot_compare_effect.py +4 -2
- gwaslab/viz_plot_miamiplot2.py +5 -8
- gwaslab/viz_plot_mqqplot.py +121 -62
- gwaslab/viz_plot_regional2.py +838 -0
- gwaslab/viz_plot_stackedregional.py +81 -48
- {gwaslab-3.4.46.dist-info → gwaslab-3.4.48.dist-info}/METADATA +15 -15
- {gwaslab-3.4.46.dist-info → gwaslab-3.4.48.dist-info}/RECORD +25 -23
- {gwaslab-3.4.46.dist-info → gwaslab-3.4.48.dist-info}/WHEEL +1 -1
- {gwaslab-3.4.46.dist-info → gwaslab-3.4.48.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.46.dist-info → gwaslab-3.4.48.dist-info}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.4.46.dist-info → gwaslab-3.4.48.dist-info}/top_level.txt +0 -0
gwaslab/g_Sumstats.py
CHANGED
|
@@ -76,6 +76,8 @@ from gwaslab.util_ex_ldsc import _estimate_rg_by_ldsc
|
|
|
76
76
|
from gwaslab.util_ex_ldsc import _estimate_h2_cts_by_ldsc
|
|
77
77
|
from gwaslab.util_ex_ldsc import _estimate_partitioned_h2_by_ldsc
|
|
78
78
|
from gwaslab.bd_get_hapmap3 import gethapmap3
|
|
79
|
+
from gwaslab.util_abf_finemapping import abf_finemapping
|
|
80
|
+
from gwaslab.util_abf_finemapping import make_cs
|
|
79
81
|
import gc
|
|
80
82
|
|
|
81
83
|
#20220309
|
|
@@ -135,6 +137,7 @@ class Sumstats():
|
|
|
135
137
|
self.data = pd.DataFrame()
|
|
136
138
|
self.log = Log()
|
|
137
139
|
self.ldsc_h2 = None
|
|
140
|
+
self.ldsc_h2_results = None
|
|
138
141
|
self.ldsc_rg = None
|
|
139
142
|
self.ldsc_h2_cts = None
|
|
140
143
|
self.ldsc_partitioned_h2_summary = None
|
|
@@ -756,13 +759,20 @@ class Sumstats():
|
|
|
756
759
|
else:
|
|
757
760
|
output = lambdaGC(self.data[["CHR",mode]],mode=mode,**kwargs)
|
|
758
761
|
self.meta["Genomic inflation factor"] = output
|
|
759
|
-
return output
|
|
762
|
+
return output
|
|
763
|
+
|
|
764
|
+
def abf_finemapping(self, region=None, chrpos=None, snpid=None,**kwargs):
|
|
765
|
+
region_data = abf_finemapping(self.data.copy(),region=region,chrpos=chrpos,snpid=snpid,log=self.log, **kwargs)
|
|
766
|
+
credible_sets = make_cs(region_data,threshold=0.95,log=self.log)
|
|
767
|
+
return region_data, credible_sets
|
|
768
|
+
|
|
769
|
+
|
|
760
770
|
## LDSC ##############################################################################################
|
|
761
771
|
def estimate_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
|
|
762
772
|
if build is None:
|
|
763
773
|
build = self.meta["gwaslab"]["genome_build"]
|
|
764
774
|
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True, how="right" )
|
|
765
|
-
self.ldsc_h2 = _estimate_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
|
|
775
|
+
self.ldsc_h2, self.ldsc_h2_results = _estimate_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
|
|
766
776
|
|
|
767
777
|
def estimate_rg_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
|
|
768
778
|
if build is None:
|
gwaslab/g_version.py
CHANGED
|
@@ -15,16 +15,16 @@ def _get_version():
|
|
|
15
15
|
def gwaslab_info():
|
|
16
16
|
# version meta information
|
|
17
17
|
dic={
|
|
18
|
-
"version":"3.4.
|
|
19
|
-
"release_date":"
|
|
18
|
+
"version":"3.4.48",
|
|
19
|
+
"release_date":"20240822"
|
|
20
20
|
}
|
|
21
21
|
return dic
|
|
22
22
|
|
|
23
|
-
def _checking_plink_version(
|
|
24
|
-
if
|
|
25
|
-
which_plink_script = "
|
|
26
|
-
elif
|
|
27
|
-
which_plink_script = "
|
|
23
|
+
def _checking_plink_version(plink=None,plink2=None,log=Log(), verbose=True):
|
|
24
|
+
if plink is not None:
|
|
25
|
+
which_plink_script = "{} --version".format(plink)
|
|
26
|
+
elif plink2 is not None:
|
|
27
|
+
which_plink_script = "{} --version".format(plink2)
|
|
28
28
|
output = subprocess.check_output(which_plink_script, stderr=subprocess.STDOUT, shell=True,text=True)
|
|
29
29
|
log.write(" -PLINK version: {}".format(output.strip()))
|
|
30
30
|
return log
|
gwaslab/io_read_ldsc.py
CHANGED
|
@@ -198,16 +198,29 @@ def read_greml(filelist=[]):
|
|
|
198
198
|
return summary
|
|
199
199
|
|
|
200
200
|
def parse_ldsc_summary(ldsc_summary):
|
|
201
|
-
|
|
201
|
+
|
|
202
202
|
lines = ldsc_summary.split("\n")
|
|
203
|
+
|
|
204
|
+
columns = ['h2_obs', 'h2_se','Lambda_gc','Mean_chi2','Intercept','Intercept_se',"Ratio","Ratio_se","Catagories"]
|
|
205
|
+
|
|
206
|
+
summary = pd.DataFrame(columns = columns)
|
|
207
|
+
|
|
203
208
|
row={}
|
|
209
|
+
|
|
204
210
|
try:
|
|
205
211
|
objects = re.compile('[a-zA-Z\s\d]+:|[-0-9.]+[e]?[-0-9.]+|NA').findall(lines[0])
|
|
206
212
|
row["h2_obs"]=objects[1]
|
|
207
213
|
row["h2_se"]=objects[2]
|
|
208
214
|
|
|
209
|
-
##
|
|
215
|
+
##check categories
|
|
216
|
+
if len(lines) == 6:
|
|
217
|
+
objects = re.compile(' -Categories:(.+)').findall(lines[1])
|
|
218
|
+
row["Catagories"] = objects[0].strip()
|
|
219
|
+
lines.pop(1)
|
|
220
|
+
else:
|
|
221
|
+
row["Catagories"] = "NA"
|
|
210
222
|
|
|
223
|
+
##next line lambda gc
|
|
211
224
|
objects = re.compile('[a-zA-Z\s\d]+:|[-0-9.]+[e]?[-0-9.]+|NA').findall(lines[1])
|
|
212
225
|
row["Lambda_gc"] = objects[1]
|
|
213
226
|
##next line Mean_chi2
|
|
@@ -240,6 +253,7 @@ def parse_ldsc_summary(ldsc_summary):
|
|
|
240
253
|
row["Intercept_se"]="NA"
|
|
241
254
|
row["Ratio"]="NA"
|
|
242
255
|
row["Ratio_se"]="NA"
|
|
256
|
+
row["Catagories"] = "NA"
|
|
243
257
|
|
|
244
258
|
#summary = summary.append(row,ignore_index=True)
|
|
245
259
|
row = pd.DataFrame([row], columns = summary.columns)
|
gwaslab/io_to_formats.py
CHANGED
|
@@ -212,8 +212,10 @@ def tofmt(sumstats,
|
|
|
212
212
|
log.write(" -Start outputting sumstats in "+fmt+" format...")
|
|
213
213
|
|
|
214
214
|
if "CHR" in sumstats.columns:
|
|
215
|
+
# output X,Y,MT instead of 23,24,25
|
|
215
216
|
if xymt_number is False and pd.api.types.is_integer_dtype(sumstats["CHR"]):
|
|
216
217
|
sumstats["CHR"]= sumstats["CHR"].map(get_number_to_chr(xymt=xymt,prefix=chr_prefix))
|
|
218
|
+
# add prefix to CHR
|
|
217
219
|
elif chr_prefix is not None:
|
|
218
220
|
sumstats["CHR"]= chr_prefix + sumstats["CHR"].astype("string")
|
|
219
221
|
|
|
@@ -437,17 +439,20 @@ def _configure_output_cols_and_args(sumstats, rename_dictionary, cols, no_status
|
|
|
437
439
|
ouput_cols.append(i)
|
|
438
440
|
|
|
439
441
|
# + additional cols and remove duplicated
|
|
440
|
-
|
|
442
|
+
ouput_cols_final = []
|
|
443
|
+
for i in ouput_cols + cols:
|
|
444
|
+
if i not in ouput_cols_final:
|
|
445
|
+
ouput_cols_final.append(i)
|
|
441
446
|
|
|
442
447
|
# remove STATUS
|
|
443
448
|
try:
|
|
444
449
|
if no_status == True:
|
|
445
|
-
|
|
450
|
+
ouput_cols_final.remove("STATUS")
|
|
446
451
|
except:
|
|
447
452
|
pass
|
|
448
453
|
|
|
449
454
|
#filter and rename to target fromat headers
|
|
450
|
-
sumstats = sumstats[
|
|
455
|
+
sumstats = sumstats[ouput_cols_final]
|
|
451
456
|
sumstats = sumstats.rename(columns=rename_dictionary)
|
|
452
457
|
|
|
453
458
|
# configure target format args and reorder columns
|
gwaslab/qc_fix_sumstats.py
CHANGED
|
@@ -1497,7 +1497,11 @@ def liftover_variant(sumstats,
|
|
|
1497
1497
|
status="STATUS",
|
|
1498
1498
|
from_build="19",
|
|
1499
1499
|
to_build="38"):
|
|
1500
|
-
|
|
1500
|
+
try:
|
|
1501
|
+
converter = get_lifter("hg"+from_build,"hg"+to_build,one_based=True)
|
|
1502
|
+
except:
|
|
1503
|
+
converter = get_lifter("hg"+from_build,"hg"+to_build)
|
|
1504
|
+
|
|
1501
1505
|
dic= get_number_to_chr(in_chr=False,xymt=["X","Y","M"])
|
|
1502
1506
|
dic2= get_chr_to_number(out_chr=False)
|
|
1503
1507
|
for i in sumstats[chrom].unique():
|
|
@@ -1549,7 +1553,6 @@ def parallelizeliftovervariant(sumstats,n_cores=1,chrom="CHR", pos="POS", from_b
|
|
|
1549
1553
|
pool.close()
|
|
1550
1554
|
pool.join()
|
|
1551
1555
|
############################################################################
|
|
1552
|
-
|
|
1553
1556
|
unmap_num = len(sumstats.loc[sumstats[pos].isna(),:])
|
|
1554
1557
|
|
|
1555
1558
|
if remove is True:
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
from gwaslab.g_Log import Log
|
|
4
|
+
from gwaslab.util_in_filter_value import _get_flanking_by_chrpos
|
|
5
|
+
from gwaslab.util_in_filter_value import _get_flanking_by_id
|
|
6
|
+
|
|
7
|
+
# Calculate PIP based on approximate Bayesian factor (ABF)
|
|
8
|
+
# Wakefield, J. A bayesian measure of the probability of false discovery in genetic epidemiology studies. Am J Hum Genet 81, 208–227 (2007).
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def calc_abf(insumstats,w=0.2,log=Log(),verbose=True,**kwargs):
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
log.write("Start to calculate approximate Bayesian factor for {} variants".format(len(insumstats)),verbose=verbose)
|
|
16
|
+
log.write(" - Reference: akefield, J. A bayesian measure of the probability of false discovery in genetic epidemiology studies. Am J Hum Genet 81, 208–227 (2007).",verbose=verbose)
|
|
17
|
+
log.write(" - Priors for the standard deviation W of the effect size parameter β : {} ".format(w),verbose=verbose)
|
|
18
|
+
# binary -> w=0.2
|
|
19
|
+
# quant -> w=0.15
|
|
20
|
+
omega = w**2
|
|
21
|
+
se = insumstats["SE"]
|
|
22
|
+
v = se**2
|
|
23
|
+
r = omega / (omega+v)
|
|
24
|
+
beta = insumstats["BETA"]
|
|
25
|
+
z = beta/se
|
|
26
|
+
insumstats = insumstats.copy()
|
|
27
|
+
|
|
28
|
+
# (6) ABF -> reciprocal
|
|
29
|
+
insumstats.loc[:, "log_ABF"] = 1/2* (np.log(1-r) + (r * z**2))
|
|
30
|
+
|
|
31
|
+
return insumstats
|
|
32
|
+
|
|
33
|
+
def calc_PIP(insumstats,log=Log(),verbose=True,**kwargs):
|
|
34
|
+
# Calculate the logarithmic sum of each ABF to find the logarithm of total_abf
|
|
35
|
+
log_total_abf = np.log(np.sum(np.exp(insumstats["log_ABF"] - np.max(insumstats["log_ABF"])))) + np.max(insumstats["log_ABF"])
|
|
36
|
+
insumstats = insumstats.copy()
|
|
37
|
+
log.write("Start to calculate PIP for {} variants".format(len(insumstats)),verbose=verbose)
|
|
38
|
+
# Calculate PIP on a logarithmic scale by subtracting log_total_abf from each log_abf
|
|
39
|
+
insumstats.loc[:, "log_PIP"] = insumstats['log_ABF'] - log_total_abf
|
|
40
|
+
# Convert PIP on logarithmic scale to exponential and back to normal scale
|
|
41
|
+
insumstats.loc[:, "PIP"] = np.exp(insumstats['log_PIP'])
|
|
42
|
+
return insumstats
|
|
43
|
+
|
|
44
|
+
def abf_finemapping(insumstats,region=None,chrpos=None,snpid=None, log=Log(),**kwargs):
|
|
45
|
+
|
|
46
|
+
if region is not None:
|
|
47
|
+
region_data = insumstats[(insumstats["CHR"] == region[0]) & (insumstats["POS"] >= region[1]) & (insumstats["POS"] <= region[2])]
|
|
48
|
+
elif chrpos is not None:
|
|
49
|
+
region_data = _get_flanking_by_chrpos(insumstats, chrpos=chrpos,**kwargs)
|
|
50
|
+
elif snpid is not None:
|
|
51
|
+
region_data = _get_flanking_by_id(insumstats, snpid=snpid,**kwargs)
|
|
52
|
+
|
|
53
|
+
region_data = calc_abf(region_data,log=log,**kwargs)
|
|
54
|
+
region_data = calc_PIP(region_data,log=log,**kwargs)
|
|
55
|
+
return region_data
|
|
56
|
+
|
|
57
|
+
def make_cs(insumstats,threshold=0.95,log=Log(),verbose=True):
|
|
58
|
+
insumstats = insumstats.sort_values(by="PIP",ascending=False)
|
|
59
|
+
pip_sum = 0
|
|
60
|
+
cs = pd.DataFrame()
|
|
61
|
+
for index, row in insumstats.iterrows():
|
|
62
|
+
cs = pd.concat([cs,pd.DataFrame(row).T])
|
|
63
|
+
pip_sum += row["PIP"]
|
|
64
|
+
if pip_sum > threshold:
|
|
65
|
+
break
|
|
66
|
+
log.write("Finished constructing a {}% credible set with {} variant(s)".format(str(threshold * 100),str(len(cs))),verbose=verbose)
|
|
67
|
+
return cs
|
|
@@ -17,6 +17,8 @@ def tofinemapping(sumstats,
|
|
|
17
17
|
vcf=None,
|
|
18
18
|
loci=None,
|
|
19
19
|
out="./",
|
|
20
|
+
plink="plink",
|
|
21
|
+
plink2="plink2",
|
|
20
22
|
windowsizekb=1000,
|
|
21
23
|
n_cores=1,
|
|
22
24
|
mode="r",
|
|
@@ -56,6 +58,9 @@ def tofinemapping(sumstats,
|
|
|
56
58
|
else:
|
|
57
59
|
sig_df = sumstats.loc[sumstats["SNPID"].isin(loci),:]
|
|
58
60
|
|
|
61
|
+
log.write(" -plink1.9 path: {}".format(plink),verbose=verbose)
|
|
62
|
+
log.write(" -plink2 path: {}".format(plink2),verbose=verbose)
|
|
63
|
+
|
|
59
64
|
# Drop duplicate!!!!
|
|
60
65
|
log.write(" -Dropping duplicated SNPIDs...",verbose=verbose)
|
|
61
66
|
sumstats = sumstats.drop_duplicates(subset=["SNPID"]).copy()
|
|
@@ -68,11 +73,13 @@ def tofinemapping(sumstats,
|
|
|
68
73
|
if exclude_hla==True:
|
|
69
74
|
sig_df = _exclude_hla(sig_df, log=log, verbose=verbose)
|
|
70
75
|
|
|
76
|
+
sig_df = sig_df.reset_index()
|
|
77
|
+
|
|
71
78
|
## for each lead variant
|
|
72
79
|
for index, row in sig_df.iterrows():
|
|
73
80
|
# extract snplist in each locus
|
|
74
81
|
gc.collect()
|
|
75
|
-
|
|
82
|
+
log.write(" -Locus #{}---------------------------------------------------------------".format(index+1))
|
|
76
83
|
log.write(" -Processing locus with lead variant {} at CHR {} POS {} ...".format(row["SNPID"],row["CHR"],row["POS"]))
|
|
77
84
|
locus_sumstats = _extract_variants_in_locus(sumstats, windowsizekb, locus = (row["CHR"],row["POS"]))
|
|
78
85
|
|
|
@@ -84,7 +91,10 @@ def tofinemapping(sumstats,
|
|
|
84
91
|
n_cores=n_cores,
|
|
85
92
|
log=log,
|
|
86
93
|
load_bim=True,
|
|
87
|
-
overwrite=overwrite
|
|
94
|
+
overwrite=overwrite,
|
|
95
|
+
plink=plink,
|
|
96
|
+
plink2=plink2,
|
|
97
|
+
**kwargs)
|
|
88
98
|
|
|
89
99
|
## check available snps with reference file
|
|
90
100
|
matched_sumstats = _align_sumstats_with_bim(row=row,
|
|
@@ -114,7 +124,10 @@ def tofinemapping(sumstats,
|
|
|
114
124
|
windowsizekb=windowsizekb,
|
|
115
125
|
out=out,
|
|
116
126
|
plink_log=plink_log,
|
|
117
|
-
log=log,
|
|
127
|
+
log=log,
|
|
128
|
+
filetype=filetype,
|
|
129
|
+
plink=plink,
|
|
130
|
+
plink2=plink2,
|
|
118
131
|
verbose=verbose)
|
|
119
132
|
|
|
120
133
|
|
|
@@ -143,12 +156,12 @@ def tofinemapping(sumstats,
|
|
|
143
156
|
|
|
144
157
|
|
|
145
158
|
|
|
146
|
-
def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, windowsizekb,out,plink_log,log,memory,mode,filetype,verbose=True):
|
|
159
|
+
def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, windowsizekb,out,plink_log,log,memory,mode,filetype,plink,plink2,verbose=True):
|
|
147
160
|
'''
|
|
148
161
|
Calculate LD r matrix by calling PLINK; return file name and log
|
|
149
162
|
'''
|
|
150
163
|
log.write(" -Start to calculate LD r matrix...",verbose=verbose)
|
|
151
|
-
log = _checking_plink_version(
|
|
164
|
+
log = _checking_plink_version(plink=plink, log=log)
|
|
152
165
|
if "@" in bfile_prefix:
|
|
153
166
|
bfile_to_use = bfile_prefix.replace("@",str(row["CHR"]))
|
|
154
167
|
else:
|
|
@@ -165,7 +178,7 @@ def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, w
|
|
|
165
178
|
raise ValueError("Please use bfile instead of pfile for PLINK1.")
|
|
166
179
|
|
|
167
180
|
script_vcf_to_bfile = """
|
|
168
|
-
|
|
181
|
+
{} \
|
|
169
182
|
--bfile {} \
|
|
170
183
|
--keep-allele-order \
|
|
171
184
|
--extract {} \
|
|
@@ -175,7 +188,7 @@ def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, w
|
|
|
175
188
|
--threads {} {}\
|
|
176
189
|
--write-snplist \
|
|
177
190
|
--out {}
|
|
178
|
-
""".format(bfile_to_use, snplist_path , row["CHR"], mode, n_cores, memory_flag if memory is not None else "", output_prefix)
|
|
191
|
+
""".format(plink, bfile_to_use, snplist_path , row["CHR"], mode, n_cores, memory_flag if memory is not None else "", output_prefix)
|
|
179
192
|
|
|
180
193
|
try:
|
|
181
194
|
output = subprocess.check_output(script_vcf_to_bfile, stderr=subprocess.STDOUT, shell=True,text=True)
|
gwaslab/util_ex_calculate_prs.py
CHANGED
|
@@ -18,6 +18,8 @@ def _calculate_prs(sumstats,
|
|
|
18
18
|
memory=None,
|
|
19
19
|
overwrite=False,
|
|
20
20
|
mode=None,delete=True,
|
|
21
|
+
plink="plink",
|
|
22
|
+
plink2="plink2",
|
|
21
23
|
log=Log(),**kwargs):
|
|
22
24
|
|
|
23
25
|
#matching_alleles
|
|
@@ -30,14 +32,18 @@ def _calculate_prs(sumstats,
|
|
|
30
32
|
chrlist.sort()
|
|
31
33
|
plink_log = ""
|
|
32
34
|
#process reference fileWWW
|
|
33
|
-
bfile_prefix, plink_log, ref_bim, filetype = _process_plink_input_files(
|
|
35
|
+
bfile_prefix, plink_log, ref_bim, filetype = _process_plink_input_files(
|
|
36
|
+
chrlist=chrlist,
|
|
34
37
|
bfile=bfile,
|
|
35
38
|
vcf=vcf,
|
|
36
39
|
plink_log=plink_log,
|
|
37
40
|
n_cores=n_cores,
|
|
38
41
|
log=log,
|
|
39
42
|
load_bim=False,
|
|
40
|
-
overwrite=overwrite
|
|
43
|
+
overwrite=overwrite,
|
|
44
|
+
plink=plink,
|
|
45
|
+
plink2=plink2,
|
|
46
|
+
**kwargs)
|
|
41
47
|
score_file_path_list =[]
|
|
42
48
|
for index, chrom in enumerate(chrlist):
|
|
43
49
|
chr_sumstats = sumstats.loc[sumstats["CHR"]==chrom,:].copy()
|
|
@@ -61,7 +67,7 @@ def _calculate_prs(sumstats,
|
|
|
61
67
|
plink_log=plink_log,
|
|
62
68
|
log=log,
|
|
63
69
|
memory=memory,
|
|
64
|
-
mode=mode,filetype=filetype)
|
|
70
|
+
mode=mode,filetype=filetype,plink2=plink2)
|
|
65
71
|
score_file_path_list.append(score_file_path)
|
|
66
72
|
if delete == True:
|
|
67
73
|
os.remove(model_path)
|
|
@@ -71,10 +77,10 @@ def _calculate_prs(sumstats,
|
|
|
71
77
|
|
|
72
78
|
|
|
73
79
|
|
|
74
|
-
def _run_calculate_prs(study, chrom , model_path, bfile_prefix, n_cores, out, plink_log, log, memory,filetype, mode=None):
|
|
80
|
+
def _run_calculate_prs(study, chrom , model_path, bfile_prefix, n_cores, out, plink_log, log, memory,filetype, plink2,mode=None):
|
|
75
81
|
|
|
76
82
|
log.write(" -Start to calculate PRS for Chr {}...".format(chrom))
|
|
77
|
-
_checking_plink_version(
|
|
83
|
+
_checking_plink_version(plink2=plink2, log=log)
|
|
78
84
|
|
|
79
85
|
if "@" in bfile_prefix:
|
|
80
86
|
bpfile_to_use = bfile_prefix.replace("@",str(chrom))
|
|
@@ -92,13 +98,13 @@ def _run_calculate_prs(study, chrom , model_path, bfile_prefix, n_cores, out, pl
|
|
|
92
98
|
memory_flag = "--memory {}".format(memory)
|
|
93
99
|
|
|
94
100
|
script_vcf_to_bfile = """
|
|
95
|
-
|
|
101
|
+
{} \
|
|
96
102
|
{} \
|
|
97
103
|
--score {} 1 2 3 header {} cols=+scoresums,+denom ignore-dup-ids \
|
|
98
104
|
--chr {} \
|
|
99
105
|
--threads {} {}\
|
|
100
106
|
--out {}
|
|
101
|
-
""".format(file_flag, model_path , mode if mode is not None else "", chrom, n_cores, memory_flag if memory is not None else "", output_prefix)
|
|
107
|
+
""".format(plink2, file_flag, model_path , mode if mode is not None else "", chrom, n_cores, memory_flag if memory is not None else "", output_prefix)
|
|
102
108
|
|
|
103
109
|
try:
|
|
104
110
|
output = subprocess.check_output(script_vcf_to_bfile, stderr=subprocess.STDOUT, shell=True,text=True)
|
gwaslab/util_ex_ldsc.py
CHANGED
|
@@ -304,9 +304,16 @@ def _estimate_h2_by_ldsc(insumstats, log, verbose=True, munge=False, munge_args=
|
|
|
304
304
|
log.write(" -LDSC log:", verbose=verbose)
|
|
305
305
|
summary = estimate_h2(sumstats, default_args, log)
|
|
306
306
|
|
|
307
|
+
results_table = None
|
|
308
|
+
if type(summary) is tuple:
|
|
309
|
+
results_table = summary[1]
|
|
310
|
+
summary = summary[0]
|
|
311
|
+
log.write(" -Coefficient results have been stored in .ldsc_h2_results", verbose=verbose)
|
|
312
|
+
|
|
313
|
+
|
|
307
314
|
log.write(" -Results have been stored in .ldsc_h2", verbose=verbose)
|
|
308
315
|
finished(log=log,verbose=verbose,end_line=_end_line)
|
|
309
|
-
return parse_ldsc_summary(summary)
|
|
316
|
+
return parse_ldsc_summary(summary), results_table
|
|
310
317
|
|
|
311
318
|
|
|
312
319
|
####################################################################################################################
|
gwaslab/util_ex_process_ref.py
CHANGED
|
@@ -20,7 +20,9 @@ def _process_plink_input_files(chrlist,
|
|
|
20
20
|
bgen_mode="ref-first",
|
|
21
21
|
convert="bfile",
|
|
22
22
|
memory=None,
|
|
23
|
-
load_bim=False
|
|
23
|
+
load_bim=False,
|
|
24
|
+
plink="plink",
|
|
25
|
+
plink2="plink2"):
|
|
24
26
|
"""
|
|
25
27
|
Process input files (bfile,pfile,vcf,bgen) to either PLINK1 bed/bim/fam or PLINK2 pgen/psam/pvar.
|
|
26
28
|
|
|
@@ -66,7 +68,9 @@ def _process_plink_input_files(chrlist,
|
|
|
66
68
|
convert=convert,
|
|
67
69
|
memory=memory,
|
|
68
70
|
overwrite=overwrite,
|
|
69
|
-
load_bim=load_bim
|
|
71
|
+
load_bim=load_bim,
|
|
72
|
+
plink=plink,
|
|
73
|
+
plink2=plink2)
|
|
70
74
|
filetype = convert
|
|
71
75
|
elif filetype == "bgen":
|
|
72
76
|
ref_file_prefix, plink_log, ref_bims = _process_bgen(ref_file_prefix=ref_file_prefix,
|
|
@@ -81,7 +85,9 @@ def _process_plink_input_files(chrlist,
|
|
|
81
85
|
convert=convert,
|
|
82
86
|
memory=memory,
|
|
83
87
|
overwrite=overwrite,
|
|
84
|
-
load_bim=load_bim
|
|
88
|
+
load_bim=load_bim,
|
|
89
|
+
plink=plink,
|
|
90
|
+
plink2=plink2)
|
|
85
91
|
filetype = convert
|
|
86
92
|
return ref_file_prefix, plink_log, ref_bims, filetype
|
|
87
93
|
|
|
@@ -199,11 +205,13 @@ def _process_vcf(ref_file_prefix,
|
|
|
199
205
|
convert="bfile",
|
|
200
206
|
memory=None,
|
|
201
207
|
overwrite=False,
|
|
202
|
-
load_bim=False
|
|
208
|
+
load_bim=False,
|
|
209
|
+
plink="plink",
|
|
210
|
+
plink2="plink2"):
|
|
203
211
|
log.write(" -Processing VCF : {}...".format(ref_file_prefix))
|
|
204
212
|
|
|
205
213
|
#check plink version
|
|
206
|
-
log = _checking_plink_version(
|
|
214
|
+
log = _checking_plink_version(plink2=plink2,log=log)
|
|
207
215
|
|
|
208
216
|
# file path prefix to return
|
|
209
217
|
if is_wild_card==True:
|
|
@@ -243,14 +251,15 @@ def _process_vcf(ref_file_prefix,
|
|
|
243
251
|
#if not existing or overwrite is True
|
|
244
252
|
if (not is_file_exist) or overwrite:
|
|
245
253
|
script_vcf_to_bfile = """
|
|
246
|
-
|
|
254
|
+
{} \
|
|
247
255
|
--vcf {} \
|
|
248
256
|
--chr {} \
|
|
249
257
|
{} \
|
|
250
258
|
--rm-dup force-first \
|
|
251
259
|
--threads {}{}\
|
|
252
260
|
--out {}
|
|
253
|
-
""".format(
|
|
261
|
+
""".format(plink2,
|
|
262
|
+
vcf_to_load,
|
|
254
263
|
i,
|
|
255
264
|
make_flag,
|
|
256
265
|
n_cores, memory_flag,
|
|
@@ -288,11 +297,13 @@ def _process_bgen(ref_file_prefix,
|
|
|
288
297
|
convert="bfile",
|
|
289
298
|
memory=None,
|
|
290
299
|
overwrite=False,
|
|
291
|
-
load_bim=False
|
|
300
|
+
load_bim=False,
|
|
301
|
+
plink="plink",
|
|
302
|
+
plink2="plink2"):
|
|
292
303
|
log.write(" -Processing BGEN files : {}...".format(ref_file_prefix))
|
|
293
304
|
|
|
294
305
|
#check plink version
|
|
295
|
-
log = _checking_plink_version(
|
|
306
|
+
log = _checking_plink_version(log=log,plink2=plink2)
|
|
296
307
|
|
|
297
308
|
# file path prefix to return
|
|
298
309
|
if is_wild_card==True:
|
|
@@ -338,14 +349,14 @@ def _process_bgen(ref_file_prefix,
|
|
|
338
349
|
#if not existing or overwrite is True
|
|
339
350
|
if (not is_file_exist) or overwrite:
|
|
340
351
|
script_vcf_to_bfile = """
|
|
341
|
-
|
|
352
|
+
{} \
|
|
342
353
|
--bgen {} {} {}\
|
|
343
354
|
--chr {} \
|
|
344
355
|
{} \
|
|
345
356
|
--rm-dup force-first \
|
|
346
357
|
--threads {}{}\
|
|
347
358
|
--out {}
|
|
348
|
-
""".format(bgen_to_load, bgen_mode, sample_flag,
|
|
359
|
+
""".format(plink2,bgen_to_load, bgen_mode, sample_flag,
|
|
349
360
|
i,
|
|
350
361
|
make_flag,
|
|
351
362
|
n_cores, memory_flag,
|
gwaslab/util_ex_run_clumping.py
CHANGED
|
@@ -11,7 +11,7 @@ from gwaslab.g_version import _checking_plink_version
|
|
|
11
11
|
def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
|
|
12
12
|
p="P",mlog10p="MLOG10P", overwrite=False, study=None, bfile=None,
|
|
13
13
|
n_cores=1, memory=None, chrom=None, clump_p1=5e-8, clump_p2=5e-8, clump_r2=0.01, clump_kb=250,
|
|
14
|
-
log=Log(),verbose=True):
|
|
14
|
+
log=Log(),verbose=True,plink="plink",plink2="plink2"):
|
|
15
15
|
##start function with col checking##########################################################
|
|
16
16
|
_start_line = "perfrom clumping"
|
|
17
17
|
_end_line = "clumping"
|
|
@@ -111,7 +111,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
|
|
|
111
111
|
bfile_to_use = bfile
|
|
112
112
|
|
|
113
113
|
log.write(" -Performing clumping for CHR {}...".format(i),verbose=verbose)
|
|
114
|
-
log = _checking_plink_version(
|
|
114
|
+
log = _checking_plink_version(plink2=plink2, log=log)
|
|
115
115
|
if memory is not None:
|
|
116
116
|
memory_flag = "--memory {}".format(memory)
|
|
117
117
|
|
|
@@ -123,7 +123,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
|
|
|
123
123
|
if scaled == True:
|
|
124
124
|
# clumping using LOG10P
|
|
125
125
|
script = """
|
|
126
|
-
|
|
126
|
+
{} \
|
|
127
127
|
{}\
|
|
128
128
|
--chr {} \
|
|
129
129
|
--clump {} \
|
|
@@ -136,11 +136,11 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
|
|
|
136
136
|
--clump-kb {} \
|
|
137
137
|
--threads {} {}\
|
|
138
138
|
--out {}
|
|
139
|
-
""".format(file_flag, chrom, clump, mlog10p,clump_log10_p1, clump_log10_p2, clump_r2, clump_kb, n_cores, memory_flag if memory is not None else "", out_single_chr)
|
|
139
|
+
""".format(plink2, file_flag, chrom, clump, mlog10p,clump_log10_p1, clump_log10_p2, clump_r2, clump_kb, n_cores, memory_flag if memory is not None else "", out_single_chr)
|
|
140
140
|
else:
|
|
141
141
|
# clumping using P
|
|
142
142
|
script = """
|
|
143
|
-
|
|
143
|
+
{} \
|
|
144
144
|
{}\
|
|
145
145
|
--chr {} \
|
|
146
146
|
--clump {} \
|
|
@@ -152,7 +152,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
|
|
|
152
152
|
--clump-kb {} \
|
|
153
153
|
--threads {} {}\
|
|
154
154
|
--out {}
|
|
155
|
-
""".format(file_flag, chrom, clump, p, clump_p1, clump_p2, clump_r2, clump_kb, n_cores,memory_flag if memory is not None else "", out_single_chr)
|
|
155
|
+
""".format(plink2,file_flag, chrom, clump, p, clump_p1, clump_p2, clump_r2, clump_kb, n_cores,memory_flag if memory is not None else "", out_single_chr)
|
|
156
156
|
|
|
157
157
|
try:
|
|
158
158
|
output = subprocess.check_output(script, stderr=subprocess.STDOUT, shell=True,text=True)
|
gwaslab/viz_aux_annotate_plot.py
CHANGED
|
@@ -38,6 +38,7 @@ def annotate_single(
|
|
|
38
38
|
region,
|
|
39
39
|
region_anno_bbox_args,
|
|
40
40
|
skip,
|
|
41
|
+
anno_height=1,
|
|
41
42
|
amode="int",
|
|
42
43
|
snpid="SNPID",
|
|
43
44
|
chrom="CHR",
|
|
@@ -131,7 +132,7 @@ def annotate_single(
|
|
|
131
132
|
|
|
132
133
|
#xy=(row["i"],row["scaled_P"]+0.2)
|
|
133
134
|
xy=(row["i"],row["scaled_P"]+0.01*maxy)
|
|
134
|
-
xytext=(last_pos,1.15*maxy*arm_scale)
|
|
135
|
+
xytext=(last_pos,1.15*maxy*arm_scale*anno_height)
|
|
135
136
|
|
|
136
137
|
if anno_fixed_arm_length is not None:
|
|
137
138
|
armB_length_in_point = anno_fixed_arm_length
|
gwaslab/viz_aux_chromatin.py
CHANGED
|
@@ -63,6 +63,8 @@ def _plot_chromatin_state(region_chromatin_files,
|
|
|
63
63
|
fig,
|
|
64
64
|
ax,
|
|
65
65
|
xlim_i,
|
|
66
|
+
fontsize = 12,
|
|
67
|
+
font_family = "Arial",
|
|
66
68
|
log=Log(),
|
|
67
69
|
verbose=True):
|
|
68
70
|
'''
|
|
@@ -101,11 +103,10 @@ def _plot_chromatin_state(region_chromatin_files,
|
|
|
101
103
|
|
|
102
104
|
## add stripe label
|
|
103
105
|
if len(region_chromatin_labels) == len(region_chromatin_files):
|
|
104
|
-
ax.set_yticks([i*0.1 for i in range(len(region_chromatin_labels))],
|
|
105
|
-
region_chromatin_labels)
|
|
106
|
+
ax.set_yticks([i*0.1 for i in range(len(region_chromatin_labels))], region_chromatin_labels, fontsize=fontsize, family=font_family)
|
|
106
107
|
else:
|
|
107
108
|
ax.set_yticks(ticks=[])
|
|
108
109
|
|
|
109
|
-
ax.set_xticks(ticks=[])
|
|
110
|
+
#ax.set_xticks(ticks=[])
|
|
110
111
|
ax.invert_yaxis()
|
|
111
112
|
return fig
|
gwaslab/viz_aux_quickfix.py
CHANGED
|
@@ -286,8 +286,9 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
|
|
|
286
286
|
log.write(" -Converting data above cut line...",verbose=verbose)
|
|
287
287
|
if ylabels is not None:
|
|
288
288
|
ylabels = pd.Series(ylabels)
|
|
289
|
-
maxy = series.max()
|
|
290
289
|
series = series.copy()
|
|
290
|
+
|
|
291
|
+
maxy = series.max()
|
|
291
292
|
if "b" not in mode:
|
|
292
293
|
log.write(" -Maximum -log10(P) value is "+str(maxy) +" .", verbose=verbose)
|
|
293
294
|
elif "b" in mode:
|
|
@@ -77,8 +77,10 @@ def compare_effect(path1,
|
|
|
77
77
|
scaled2 = True
|
|
78
78
|
if is_q_mc=="fdr" or is_q_mc=="bon":
|
|
79
79
|
is_q = True
|
|
80
|
-
|
|
81
|
-
|
|
80
|
+
|
|
81
|
+
if is_q == True:
|
|
82
|
+
if is_q_mc not in [False,"fdr","bon","non"]:
|
|
83
|
+
raise ValueError("Please select either fdr or bon or non for is_q_mc.")
|
|
82
84
|
if save_args is None:
|
|
83
85
|
save_args = {"dpi":300,"facecolor":"white"}
|
|
84
86
|
if reg_box is None:
|
gwaslab/viz_plot_miamiplot2.py
CHANGED
|
@@ -247,7 +247,10 @@ def plot_miami2(
|
|
|
247
247
|
plt.subplots_adjust(hspace=region_hspace)
|
|
248
248
|
else:
|
|
249
249
|
fig, ax1, ax5 = figax
|
|
250
|
-
|
|
250
|
+
|
|
251
|
+
#if same_ylim==True:
|
|
252
|
+
#maxy = merged_sumstats[["scaled_P_1","scaled_P_2"]].max().max()
|
|
253
|
+
|
|
251
254
|
log.write("Start to create Manhattan plot for sumstats1...", verbose=verbose)
|
|
252
255
|
fig,log = mqqplot(merged_sumstats,
|
|
253
256
|
chrom="CHR",
|
|
@@ -284,14 +287,8 @@ def plot_miami2(
|
|
|
284
287
|
_if_quick_qc=False,
|
|
285
288
|
**mqq_args2)
|
|
286
289
|
log.write("Finished creating Manhattan plot for sumstats2".format(_get_version()), verbose=verbose)
|
|
290
|
+
|
|
287
291
|
|
|
288
|
-
if same_ylim==True:
|
|
289
|
-
ylim1_converted = ax1.get_ylim()
|
|
290
|
-
ylim2_converted = ax5.get_ylim()
|
|
291
|
-
if ylim1_converted > ylim2_converted:
|
|
292
|
-
ax5.set_ylim(ylim1_converted)
|
|
293
|
-
else:
|
|
294
|
-
ax1.set_ylim(ylim2_converted)
|
|
295
292
|
#####################################################################################################################
|
|
296
293
|
|
|
297
294
|
ax5.set_xlabel("")
|