gwaslab 3.4.47__py3-none-any.whl → 3.4.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/g_Sumstats.py +12 -2
- gwaslab/g_version.py +2 -2
- gwaslab/io_read_ldsc.py +16 -2
- gwaslab/qc_fix_sumstats.py +5 -2
- gwaslab/util_abf_finemapping.py +67 -0
- gwaslab/util_ex_ldsc.py +8 -1
- gwaslab/util_ex_run_clumping.py +6 -6
- gwaslab/viz_aux_annotate_plot.py +2 -1
- gwaslab/viz_aux_quickfix.py +2 -1
- gwaslab/viz_plot_compare_effect.py +4 -2
- gwaslab/viz_plot_miamiplot2.py +5 -8
- gwaslab/viz_plot_mqqplot.py +42 -21
- gwaslab/viz_plot_regional2.py +75 -29
- gwaslab/viz_plot_stackedregional.py +26 -12
- {gwaslab-3.4.47.dist-info → gwaslab-3.4.48.dist-info}/METADATA +15 -15
- {gwaslab-3.4.47.dist-info → gwaslab-3.4.48.dist-info}/RECORD +20 -19
- {gwaslab-3.4.47.dist-info → gwaslab-3.4.48.dist-info}/WHEEL +1 -1
- {gwaslab-3.4.47.dist-info → gwaslab-3.4.48.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.47.dist-info → gwaslab-3.4.48.dist-info}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.4.47.dist-info → gwaslab-3.4.48.dist-info}/top_level.txt +0 -0
gwaslab/g_Sumstats.py
CHANGED
|
@@ -76,6 +76,8 @@ from gwaslab.util_ex_ldsc import _estimate_rg_by_ldsc
|
|
|
76
76
|
from gwaslab.util_ex_ldsc import _estimate_h2_cts_by_ldsc
|
|
77
77
|
from gwaslab.util_ex_ldsc import _estimate_partitioned_h2_by_ldsc
|
|
78
78
|
from gwaslab.bd_get_hapmap3 import gethapmap3
|
|
79
|
+
from gwaslab.util_abf_finemapping import abf_finemapping
|
|
80
|
+
from gwaslab.util_abf_finemapping import make_cs
|
|
79
81
|
import gc
|
|
80
82
|
|
|
81
83
|
#20220309
|
|
@@ -135,6 +137,7 @@ class Sumstats():
|
|
|
135
137
|
self.data = pd.DataFrame()
|
|
136
138
|
self.log = Log()
|
|
137
139
|
self.ldsc_h2 = None
|
|
140
|
+
self.ldsc_h2_results = None
|
|
138
141
|
self.ldsc_rg = None
|
|
139
142
|
self.ldsc_h2_cts = None
|
|
140
143
|
self.ldsc_partitioned_h2_summary = None
|
|
@@ -756,13 +759,20 @@ class Sumstats():
|
|
|
756
759
|
else:
|
|
757
760
|
output = lambdaGC(self.data[["CHR",mode]],mode=mode,**kwargs)
|
|
758
761
|
self.meta["Genomic inflation factor"] = output
|
|
759
|
-
return output
|
|
762
|
+
return output
|
|
763
|
+
|
|
764
|
+
def abf_finemapping(self, region=None, chrpos=None, snpid=None,**kwargs):
|
|
765
|
+
region_data = abf_finemapping(self.data.copy(),region=region,chrpos=chrpos,snpid=snpid,log=self.log, **kwargs)
|
|
766
|
+
credible_sets = make_cs(region_data,threshold=0.95,log=self.log)
|
|
767
|
+
return region_data, credible_sets
|
|
768
|
+
|
|
769
|
+
|
|
760
770
|
## LDSC ##############################################################################################
|
|
761
771
|
def estimate_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
|
|
762
772
|
if build is None:
|
|
763
773
|
build = self.meta["gwaslab"]["genome_build"]
|
|
764
774
|
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True, how="right" )
|
|
765
|
-
self.ldsc_h2 = _estimate_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
|
|
775
|
+
self.ldsc_h2, self.ldsc_h2_results = _estimate_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
|
|
766
776
|
|
|
767
777
|
def estimate_rg_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
|
|
768
778
|
if build is None:
|
gwaslab/g_version.py
CHANGED
gwaslab/io_read_ldsc.py
CHANGED
|
@@ -198,16 +198,29 @@ def read_greml(filelist=[]):
|
|
|
198
198
|
return summary
|
|
199
199
|
|
|
200
200
|
def parse_ldsc_summary(ldsc_summary):
|
|
201
|
-
|
|
201
|
+
|
|
202
202
|
lines = ldsc_summary.split("\n")
|
|
203
|
+
|
|
204
|
+
columns = ['h2_obs', 'h2_se','Lambda_gc','Mean_chi2','Intercept','Intercept_se',"Ratio","Ratio_se","Catagories"]
|
|
205
|
+
|
|
206
|
+
summary = pd.DataFrame(columns = columns)
|
|
207
|
+
|
|
203
208
|
row={}
|
|
209
|
+
|
|
204
210
|
try:
|
|
205
211
|
objects = re.compile('[a-zA-Z\s\d]+:|[-0-9.]+[e]?[-0-9.]+|NA').findall(lines[0])
|
|
206
212
|
row["h2_obs"]=objects[1]
|
|
207
213
|
row["h2_se"]=objects[2]
|
|
208
214
|
|
|
209
|
-
##
|
|
215
|
+
##check categories
|
|
216
|
+
if len(lines) == 6:
|
|
217
|
+
objects = re.compile(' -Categories:(.+)').findall(lines[1])
|
|
218
|
+
row["Catagories"] = objects[0].strip()
|
|
219
|
+
lines.pop(1)
|
|
220
|
+
else:
|
|
221
|
+
row["Catagories"] = "NA"
|
|
210
222
|
|
|
223
|
+
##next line lambda gc
|
|
211
224
|
objects = re.compile('[a-zA-Z\s\d]+:|[-0-9.]+[e]?[-0-9.]+|NA').findall(lines[1])
|
|
212
225
|
row["Lambda_gc"] = objects[1]
|
|
213
226
|
##next line Mean_chi2
|
|
@@ -240,6 +253,7 @@ def parse_ldsc_summary(ldsc_summary):
|
|
|
240
253
|
row["Intercept_se"]="NA"
|
|
241
254
|
row["Ratio"]="NA"
|
|
242
255
|
row["Ratio_se"]="NA"
|
|
256
|
+
row["Catagories"] = "NA"
|
|
243
257
|
|
|
244
258
|
#summary = summary.append(row,ignore_index=True)
|
|
245
259
|
row = pd.DataFrame([row], columns = summary.columns)
|
gwaslab/qc_fix_sumstats.py
CHANGED
|
@@ -1497,7 +1497,11 @@ def liftover_variant(sumstats,
|
|
|
1497
1497
|
status="STATUS",
|
|
1498
1498
|
from_build="19",
|
|
1499
1499
|
to_build="38"):
|
|
1500
|
-
|
|
1500
|
+
try:
|
|
1501
|
+
converter = get_lifter("hg"+from_build,"hg"+to_build,one_based=True)
|
|
1502
|
+
except:
|
|
1503
|
+
converter = get_lifter("hg"+from_build,"hg"+to_build)
|
|
1504
|
+
|
|
1501
1505
|
dic= get_number_to_chr(in_chr=False,xymt=["X","Y","M"])
|
|
1502
1506
|
dic2= get_chr_to_number(out_chr=False)
|
|
1503
1507
|
for i in sumstats[chrom].unique():
|
|
@@ -1549,7 +1553,6 @@ def parallelizeliftovervariant(sumstats,n_cores=1,chrom="CHR", pos="POS", from_b
|
|
|
1549
1553
|
pool.close()
|
|
1550
1554
|
pool.join()
|
|
1551
1555
|
############################################################################
|
|
1552
|
-
|
|
1553
1556
|
unmap_num = len(sumstats.loc[sumstats[pos].isna(),:])
|
|
1554
1557
|
|
|
1555
1558
|
if remove is True:
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
from gwaslab.g_Log import Log
|
|
4
|
+
from gwaslab.util_in_filter_value import _get_flanking_by_chrpos
|
|
5
|
+
from gwaslab.util_in_filter_value import _get_flanking_by_id
|
|
6
|
+
|
|
7
|
+
# Calculate PIP based on approximate Bayesian factor (ABF)
|
|
8
|
+
# Wakefield, J. A bayesian measure of the probability of false discovery in genetic epidemiology studies. Am J Hum Genet 81, 208–227 (2007).
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def calc_abf(insumstats,w=0.2,log=Log(),verbose=True,**kwargs):
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
log.write("Start to calculate approximate Bayesian factor for {} variants".format(len(insumstats)),verbose=verbose)
|
|
16
|
+
log.write(" - Reference: akefield, J. A bayesian measure of the probability of false discovery in genetic epidemiology studies. Am J Hum Genet 81, 208–227 (2007).",verbose=verbose)
|
|
17
|
+
log.write(" - Priors for the standard deviation W of the effect size parameter β : {} ".format(w),verbose=verbose)
|
|
18
|
+
# binary -> w=0.2
|
|
19
|
+
# quant -> w=0.15
|
|
20
|
+
omega = w**2
|
|
21
|
+
se = insumstats["SE"]
|
|
22
|
+
v = se**2
|
|
23
|
+
r = omega / (omega+v)
|
|
24
|
+
beta = insumstats["BETA"]
|
|
25
|
+
z = beta/se
|
|
26
|
+
insumstats = insumstats.copy()
|
|
27
|
+
|
|
28
|
+
# (6) ABF -> reciprocal
|
|
29
|
+
insumstats.loc[:, "log_ABF"] = 1/2* (np.log(1-r) + (r * z**2))
|
|
30
|
+
|
|
31
|
+
return insumstats
|
|
32
|
+
|
|
33
|
+
def calc_PIP(insumstats,log=Log(),verbose=True,**kwargs):
|
|
34
|
+
# Calculate the logarithmic sum of each ABF to find the logarithm of total_abf
|
|
35
|
+
log_total_abf = np.log(np.sum(np.exp(insumstats["log_ABF"] - np.max(insumstats["log_ABF"])))) + np.max(insumstats["log_ABF"])
|
|
36
|
+
insumstats = insumstats.copy()
|
|
37
|
+
log.write("Start to calculate PIP for {} variants".format(len(insumstats)),verbose=verbose)
|
|
38
|
+
# Calculate PIP on a logarithmic scale by subtracting log_total_abf from each log_abf
|
|
39
|
+
insumstats.loc[:, "log_PIP"] = insumstats['log_ABF'] - log_total_abf
|
|
40
|
+
# Convert PIP on logarithmic scale to exponential and back to normal scale
|
|
41
|
+
insumstats.loc[:, "PIP"] = np.exp(insumstats['log_PIP'])
|
|
42
|
+
return insumstats
|
|
43
|
+
|
|
44
|
+
def abf_finemapping(insumstats,region=None,chrpos=None,snpid=None, log=Log(),**kwargs):
|
|
45
|
+
|
|
46
|
+
if region is not None:
|
|
47
|
+
region_data = insumstats[(insumstats["CHR"] == region[0]) & (insumstats["POS"] >= region[1]) & (insumstats["POS"] <= region[2])]
|
|
48
|
+
elif chrpos is not None:
|
|
49
|
+
region_data = _get_flanking_by_chrpos(insumstats, chrpos=chrpos,**kwargs)
|
|
50
|
+
elif snpid is not None:
|
|
51
|
+
region_data = _get_flanking_by_id(insumstats, snpid=snpid,**kwargs)
|
|
52
|
+
|
|
53
|
+
region_data = calc_abf(region_data,log=log,**kwargs)
|
|
54
|
+
region_data = calc_PIP(region_data,log=log,**kwargs)
|
|
55
|
+
return region_data
|
|
56
|
+
|
|
57
|
+
def make_cs(insumstats,threshold=0.95,log=Log(),verbose=True):
|
|
58
|
+
insumstats = insumstats.sort_values(by="PIP",ascending=False)
|
|
59
|
+
pip_sum = 0
|
|
60
|
+
cs = pd.DataFrame()
|
|
61
|
+
for index, row in insumstats.iterrows():
|
|
62
|
+
cs = pd.concat([cs,pd.DataFrame(row).T])
|
|
63
|
+
pip_sum += row["PIP"]
|
|
64
|
+
if pip_sum > threshold:
|
|
65
|
+
break
|
|
66
|
+
log.write("Finished constructing a {}% credible set with {} variant(s)".format(str(threshold * 100),str(len(cs))),verbose=verbose)
|
|
67
|
+
return cs
|
gwaslab/util_ex_ldsc.py
CHANGED
|
@@ -304,9 +304,16 @@ def _estimate_h2_by_ldsc(insumstats, log, verbose=True, munge=False, munge_args=
|
|
|
304
304
|
log.write(" -LDSC log:", verbose=verbose)
|
|
305
305
|
summary = estimate_h2(sumstats, default_args, log)
|
|
306
306
|
|
|
307
|
+
results_table = None
|
|
308
|
+
if type(summary) is tuple:
|
|
309
|
+
results_table = summary[1]
|
|
310
|
+
summary = summary[0]
|
|
311
|
+
log.write(" -Coefficient results have been stored in .ldsc_h2_results", verbose=verbose)
|
|
312
|
+
|
|
313
|
+
|
|
307
314
|
log.write(" -Results have been stored in .ldsc_h2", verbose=verbose)
|
|
308
315
|
finished(log=log,verbose=verbose,end_line=_end_line)
|
|
309
|
-
return parse_ldsc_summary(summary)
|
|
316
|
+
return parse_ldsc_summary(summary), results_table
|
|
310
317
|
|
|
311
318
|
|
|
312
319
|
####################################################################################################################
|
gwaslab/util_ex_run_clumping.py
CHANGED
|
@@ -11,7 +11,7 @@ from gwaslab.g_version import _checking_plink_version
|
|
|
11
11
|
def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
|
|
12
12
|
p="P",mlog10p="MLOG10P", overwrite=False, study=None, bfile=None,
|
|
13
13
|
n_cores=1, memory=None, chrom=None, clump_p1=5e-8, clump_p2=5e-8, clump_r2=0.01, clump_kb=250,
|
|
14
|
-
log=Log(),verbose=True):
|
|
14
|
+
log=Log(),verbose=True,plink="plink",plink2="plink2"):
|
|
15
15
|
##start function with col checking##########################################################
|
|
16
16
|
_start_line = "perfrom clumping"
|
|
17
17
|
_end_line = "clumping"
|
|
@@ -111,7 +111,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
|
|
|
111
111
|
bfile_to_use = bfile
|
|
112
112
|
|
|
113
113
|
log.write(" -Performing clumping for CHR {}...".format(i),verbose=verbose)
|
|
114
|
-
log = _checking_plink_version(
|
|
114
|
+
log = _checking_plink_version(plink2=plink2, log=log)
|
|
115
115
|
if memory is not None:
|
|
116
116
|
memory_flag = "--memory {}".format(memory)
|
|
117
117
|
|
|
@@ -123,7 +123,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
|
|
|
123
123
|
if scaled == True:
|
|
124
124
|
# clumping using LOG10P
|
|
125
125
|
script = """
|
|
126
|
-
|
|
126
|
+
{} \
|
|
127
127
|
{}\
|
|
128
128
|
--chr {} \
|
|
129
129
|
--clump {} \
|
|
@@ -136,11 +136,11 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
|
|
|
136
136
|
--clump-kb {} \
|
|
137
137
|
--threads {} {}\
|
|
138
138
|
--out {}
|
|
139
|
-
""".format(file_flag, chrom, clump, mlog10p,clump_log10_p1, clump_log10_p2, clump_r2, clump_kb, n_cores, memory_flag if memory is not None else "", out_single_chr)
|
|
139
|
+
""".format(plink2, file_flag, chrom, clump, mlog10p,clump_log10_p1, clump_log10_p2, clump_r2, clump_kb, n_cores, memory_flag if memory is not None else "", out_single_chr)
|
|
140
140
|
else:
|
|
141
141
|
# clumping using P
|
|
142
142
|
script = """
|
|
143
|
-
|
|
143
|
+
{} \
|
|
144
144
|
{}\
|
|
145
145
|
--chr {} \
|
|
146
146
|
--clump {} \
|
|
@@ -152,7 +152,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
|
|
|
152
152
|
--clump-kb {} \
|
|
153
153
|
--threads {} {}\
|
|
154
154
|
--out {}
|
|
155
|
-
""".format(file_flag, chrom, clump, p, clump_p1, clump_p2, clump_r2, clump_kb, n_cores,memory_flag if memory is not None else "", out_single_chr)
|
|
155
|
+
""".format(plink2,file_flag, chrom, clump, p, clump_p1, clump_p2, clump_r2, clump_kb, n_cores,memory_flag if memory is not None else "", out_single_chr)
|
|
156
156
|
|
|
157
157
|
try:
|
|
158
158
|
output = subprocess.check_output(script, stderr=subprocess.STDOUT, shell=True,text=True)
|
gwaslab/viz_aux_annotate_plot.py
CHANGED
|
@@ -38,6 +38,7 @@ def annotate_single(
|
|
|
38
38
|
region,
|
|
39
39
|
region_anno_bbox_args,
|
|
40
40
|
skip,
|
|
41
|
+
anno_height=1,
|
|
41
42
|
amode="int",
|
|
42
43
|
snpid="SNPID",
|
|
43
44
|
chrom="CHR",
|
|
@@ -131,7 +132,7 @@ def annotate_single(
|
|
|
131
132
|
|
|
132
133
|
#xy=(row["i"],row["scaled_P"]+0.2)
|
|
133
134
|
xy=(row["i"],row["scaled_P"]+0.01*maxy)
|
|
134
|
-
xytext=(last_pos,1.15*maxy*arm_scale)
|
|
135
|
+
xytext=(last_pos,1.15*maxy*arm_scale*anno_height)
|
|
135
136
|
|
|
136
137
|
if anno_fixed_arm_length is not None:
|
|
137
138
|
armB_length_in_point = anno_fixed_arm_length
|
gwaslab/viz_aux_quickfix.py
CHANGED
|
@@ -286,8 +286,9 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
|
|
|
286
286
|
log.write(" -Converting data above cut line...",verbose=verbose)
|
|
287
287
|
if ylabels is not None:
|
|
288
288
|
ylabels = pd.Series(ylabels)
|
|
289
|
-
maxy = series.max()
|
|
290
289
|
series = series.copy()
|
|
290
|
+
|
|
291
|
+
maxy = series.max()
|
|
291
292
|
if "b" not in mode:
|
|
292
293
|
log.write(" -Maximum -log10(P) value is "+str(maxy) +" .", verbose=verbose)
|
|
293
294
|
elif "b" in mode:
|
|
@@ -77,8 +77,10 @@ def compare_effect(path1,
|
|
|
77
77
|
scaled2 = True
|
|
78
78
|
if is_q_mc=="fdr" or is_q_mc=="bon":
|
|
79
79
|
is_q = True
|
|
80
|
-
|
|
81
|
-
|
|
80
|
+
|
|
81
|
+
if is_q == True:
|
|
82
|
+
if is_q_mc not in [False,"fdr","bon","non"]:
|
|
83
|
+
raise ValueError("Please select either fdr or bon or non for is_q_mc.")
|
|
82
84
|
if save_args is None:
|
|
83
85
|
save_args = {"dpi":300,"facecolor":"white"}
|
|
84
86
|
if reg_box is None:
|
gwaslab/viz_plot_miamiplot2.py
CHANGED
|
@@ -247,7 +247,10 @@ def plot_miami2(
|
|
|
247
247
|
plt.subplots_adjust(hspace=region_hspace)
|
|
248
248
|
else:
|
|
249
249
|
fig, ax1, ax5 = figax
|
|
250
|
-
|
|
250
|
+
|
|
251
|
+
#if same_ylim==True:
|
|
252
|
+
#maxy = merged_sumstats[["scaled_P_1","scaled_P_2"]].max().max()
|
|
253
|
+
|
|
251
254
|
log.write("Start to create Manhattan plot for sumstats1...", verbose=verbose)
|
|
252
255
|
fig,log = mqqplot(merged_sumstats,
|
|
253
256
|
chrom="CHR",
|
|
@@ -284,14 +287,8 @@ def plot_miami2(
|
|
|
284
287
|
_if_quick_qc=False,
|
|
285
288
|
**mqq_args2)
|
|
286
289
|
log.write("Finished creating Manhattan plot for sumstats2".format(_get_version()), verbose=verbose)
|
|
290
|
+
|
|
287
291
|
|
|
288
|
-
if same_ylim==True:
|
|
289
|
-
ylim1_converted = ax1.get_ylim()
|
|
290
|
-
ylim2_converted = ax5.get_ylim()
|
|
291
|
-
if ylim1_converted > ylim2_converted:
|
|
292
|
-
ax5.set_ylim(ylim1_converted)
|
|
293
|
-
else:
|
|
294
|
-
ax1.set_ylim(ylim2_converted)
|
|
295
292
|
#####################################################################################################################
|
|
296
293
|
|
|
297
294
|
ax5.set_xlabel("")
|
gwaslab/viz_plot_mqqplot.py
CHANGED
|
@@ -141,6 +141,7 @@ def mqqplot(insumstats,
|
|
|
141
141
|
anno_max_iter=100,
|
|
142
142
|
arm_offset=50,
|
|
143
143
|
arm_scale=1,
|
|
144
|
+
anno_height=1,
|
|
144
145
|
arm_scale_d=None,
|
|
145
146
|
cut=0,
|
|
146
147
|
skip=0,
|
|
@@ -180,6 +181,7 @@ def mqqplot(insumstats,
|
|
|
180
181
|
xpad=None,
|
|
181
182
|
xpadl=None,
|
|
182
183
|
xpadr=None,
|
|
184
|
+
xtight=False,
|
|
183
185
|
chrpad=0.03,
|
|
184
186
|
drop_chr_start=False,
|
|
185
187
|
title =None,
|
|
@@ -552,7 +554,8 @@ def mqqplot(insumstats,
|
|
|
552
554
|
cut_log = cut_log,
|
|
553
555
|
verbose =verbose,
|
|
554
556
|
lines_to_plot=lines_to_plot,
|
|
555
|
-
log = log
|
|
557
|
+
log = log
|
|
558
|
+
)
|
|
556
559
|
except:
|
|
557
560
|
log.warning("No valid data! Please check the input.")
|
|
558
561
|
return None
|
|
@@ -596,19 +599,23 @@ def mqqplot(insumstats,
|
|
|
596
599
|
sumstats.loc[sumstats["scaled_P"]>-np.log10(sig_level_plot),"s"]=4
|
|
597
600
|
sumstats["chr_hue"]=sumstats[chrom].astype("string")
|
|
598
601
|
|
|
599
|
-
if
|
|
602
|
+
if "r" in mode:
|
|
603
|
+
if vcf_path is None:
|
|
604
|
+
sumstats["LD"]=100
|
|
605
|
+
sumstats["SHAPE"]=1
|
|
600
606
|
sumstats["chr_hue"]=sumstats["LD"]
|
|
607
|
+
|
|
601
608
|
## default seetings
|
|
602
609
|
|
|
603
610
|
palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
|
|
604
|
-
|
|
605
611
|
|
|
606
612
|
legend = None
|
|
607
613
|
style=None
|
|
608
614
|
linewidth=0
|
|
609
615
|
edgecolor="black"
|
|
610
616
|
# if regional plot assign colors
|
|
611
|
-
if
|
|
617
|
+
if "r" in mode:
|
|
618
|
+
#if vcf_path is not None:
|
|
612
619
|
legend=None
|
|
613
620
|
linewidth=1
|
|
614
621
|
if len(region_ref) == 1:
|
|
@@ -631,10 +638,9 @@ def mqqplot(insumstats,
|
|
|
631
638
|
palette[(i+1)*100 + j ] = hex_color
|
|
632
639
|
|
|
633
640
|
edgecolor="none"
|
|
634
|
-
scatter_args["markers"]= region_marker_shapes[:len(region_ref)]
|
|
641
|
+
scatter_args["markers"]= {(i+1):m for i,m in enumerate(region_marker_shapes[:len(region_ref)])}
|
|
635
642
|
style="SHAPE"
|
|
636
|
-
|
|
637
|
-
|
|
643
|
+
|
|
638
644
|
|
|
639
645
|
## if highlight
|
|
640
646
|
highlight_i = pd.DataFrame()
|
|
@@ -977,6 +983,7 @@ def mqqplot(insumstats,
|
|
|
977
983
|
region=region,
|
|
978
984
|
region_anno_bbox_args=region_anno_bbox_args,
|
|
979
985
|
skip=skip,
|
|
986
|
+
anno_height=anno_height,
|
|
980
987
|
snpid=snpid,
|
|
981
988
|
chrom=chrom,
|
|
982
989
|
pos=pos,
|
|
@@ -1040,7 +1047,7 @@ def mqqplot(insumstats,
|
|
|
1040
1047
|
if "qq" in mode:
|
|
1041
1048
|
ax2.set_ylim(ylim)
|
|
1042
1049
|
|
|
1043
|
-
ax1 = _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats)
|
|
1050
|
+
ax1 = _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats, pos, chrpad, xtight, log = log, verbose=verbose)
|
|
1044
1051
|
|
|
1045
1052
|
# Titles
|
|
1046
1053
|
if title and anno and len(to_annotate)>0:
|
|
@@ -1065,20 +1072,34 @@ def mqqplot(insumstats,
|
|
|
1065
1072
|
|
|
1066
1073
|
|
|
1067
1074
|
|
|
1068
|
-
def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats):
|
|
1075
|
+
def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats, pos, chrpad, xtight, log, verbose):
|
|
1069
1076
|
|
|
1070
|
-
if
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1077
|
+
if xtight==True:
|
|
1078
|
+
log.write(" -Adjusting X padding on both side : tight mode", verbose=verbose)
|
|
1079
|
+
xmax = sumstats["i"].max()
|
|
1080
|
+
xmin= sumstats["i"].min()
|
|
1081
|
+
ax1.set_xlim([xmin, xmax])
|
|
1082
|
+
|
|
1083
|
+
else:
|
|
1084
|
+
chrpad_to_remove = sumstats[pos].max()*chrpad
|
|
1085
|
+
if ax1 is not None:
|
|
1086
|
+
xmin, xmax = ax1.get_xlim()
|
|
1087
|
+
length = xmax - xmin
|
|
1088
|
+
|
|
1089
|
+
if xpad is not None:
|
|
1090
|
+
log.write(" -Adjusting X padding on both side: {}".format(xpad), verbose=verbose)
|
|
1091
|
+
pad = xpad* length #sumstats["i"].max()
|
|
1092
|
+
ax1.set_xlim([xmin - pad + chrpad_to_remove, xmax + pad - chrpad_to_remove])
|
|
1093
|
+
if xpad is None and xpadl is not None:
|
|
1094
|
+
log.write(" -Adjusting X padding on left side: {}".format(xpadl), verbose=verbose)
|
|
1095
|
+
xmin, xmax = ax1.get_xlim()
|
|
1096
|
+
pad = xpadl*length # sumstats["i"].max()
|
|
1097
|
+
ax1.set_xlim([xmin - pad + chrpad_to_remove ,xmax])
|
|
1098
|
+
if xpad is None and xpadr is not None:
|
|
1099
|
+
log.write(" -Adjusting X padding on right side: {}".format(xpadr), verbose=verbose)
|
|
1100
|
+
xmin, xmax = ax1.get_xlim()
|
|
1101
|
+
pad = xpadr*length # sumstats["i"].max()
|
|
1102
|
+
ax1.set_xlim([xmin, xmax + pad - chrpad_to_remove])
|
|
1082
1103
|
|
|
1083
1104
|
return ax1
|
|
1084
1105
|
|
gwaslab/viz_plot_regional2.py
CHANGED
|
@@ -5,6 +5,7 @@ import matplotlib.patches as patches
|
|
|
5
5
|
import seaborn as sns
|
|
6
6
|
import numpy as np
|
|
7
7
|
import copy
|
|
8
|
+
import re
|
|
8
9
|
import scipy as sp
|
|
9
10
|
from pyensembl import EnsemblRelease
|
|
10
11
|
from allel import GenotypeArray
|
|
@@ -96,19 +97,20 @@ def _plot_regional(
|
|
|
96
97
|
marker_size= marker_size,
|
|
97
98
|
region_marker_shapes=region_marker_shapes,
|
|
98
99
|
log=log,verbose=verbose)
|
|
99
|
-
if lead_id_single is not None:
|
|
100
|
-
|
|
100
|
+
#if lead_id_single is not None:
|
|
101
|
+
lead_ids.append(lead_id_single)
|
|
101
102
|
|
|
102
103
|
# update region_ref to variant rsID or variantID / skip NAs
|
|
103
104
|
new_region_ref = []
|
|
104
105
|
for i in range(len(lead_ids)):
|
|
105
106
|
if lead_ids[i] is None:
|
|
107
|
+
new_region_ref.append(region_ref[i])
|
|
106
108
|
continue
|
|
107
109
|
if region_ref[i] is None:
|
|
108
|
-
if "
|
|
109
|
-
new_name = sumstats.loc[lead_ids[i],"rsID"]
|
|
110
|
-
elif "SNPID" in sumstats.columns:
|
|
110
|
+
if "SNPID" in sumstats.columns:
|
|
111
111
|
new_name = sumstats.loc[lead_ids[i],"SNPID"]
|
|
112
|
+
elif "rsID" in sumstats.columns:
|
|
113
|
+
new_name = sumstats.loc[lead_ids[i],"rsID"]
|
|
112
114
|
else:
|
|
113
115
|
new_name = "chr{}:{}".format(sumstats.loc[lead_ids[i],"CHR"] , sumstats.loc[lead_ids[i],"POS"])
|
|
114
116
|
new_region_ref.append(new_name)
|
|
@@ -162,7 +164,6 @@ def _plot_regional(
|
|
|
162
164
|
lead_snp_ys = []
|
|
163
165
|
lead_snp_is = []
|
|
164
166
|
lead_snp_is_colors = []
|
|
165
|
-
|
|
166
167
|
for i,lead_id_single in enumerate(lead_ids):
|
|
167
168
|
if lead_id_single is not None:
|
|
168
169
|
lead_snp_ys.append(sumstats.loc[lead_id_single,"scaled_P"] )
|
|
@@ -258,11 +259,35 @@ def _get_lead_id(sumstats=None, region_ref=None, log=None, verbose=True):
|
|
|
258
259
|
if type(lead_id) is list:
|
|
259
260
|
if len(lead_id)>0:
|
|
260
261
|
lead_id = int(lead_id[0])
|
|
261
|
-
|
|
262
|
+
|
|
262
263
|
if region_ref_to_check is not None:
|
|
263
264
|
if type(lead_id) is list:
|
|
264
265
|
if len(lead_id)==0 :
|
|
265
|
-
|
|
266
|
+
#try:
|
|
267
|
+
matched_snpid = re.match("(chr)?[0-9]+:[0-9]+:[ATCG]+:[ATCG]+", region_ref_to_check, re.IGNORECASE)
|
|
268
|
+
if matched_snpid is None:
|
|
269
|
+
pass
|
|
270
|
+
else:
|
|
271
|
+
lead_snpid = matched_snpid.group(0).split(":")
|
|
272
|
+
if len(lead_snpid)==4:
|
|
273
|
+
lead_chr= int(lead_snpid[0])
|
|
274
|
+
lead_pos= int(lead_snpid[1])
|
|
275
|
+
lead_ea= lead_snpid[2]
|
|
276
|
+
lead_nea= lead_snpid[3]
|
|
277
|
+
chrpos_match = (sumstats["CHR"] == lead_chr) & (sumstats["POS"] == lead_pos)
|
|
278
|
+
eanea_match = ((sumstats["EA"] == lead_ea) & (sumstats["NEA"] == lead_nea)) | ((sumstats["EA"] == lead_nea) & (sumstats["NEA"] == lead_ea))
|
|
279
|
+
if "rsID" in sumstats.columns:
|
|
280
|
+
lead_id = sumstats.index[chrpos_match&eanea_match].to_list()
|
|
281
|
+
if "SNPID" in sumstats.columns:
|
|
282
|
+
lead_id = sumstats.index[chrpos_match&eanea_match].to_list()
|
|
283
|
+
if type(lead_id) is list:
|
|
284
|
+
if len(lead_id)>0:
|
|
285
|
+
lead_id = int(lead_id[0])
|
|
286
|
+
log.warning("Trying matching variant {} using CHR:POS:EA:NEA to {}... ".format(region_ref_to_check,lead_id))
|
|
287
|
+
|
|
288
|
+
if type(lead_id) is list:
|
|
289
|
+
if len(lead_id)==0 :
|
|
290
|
+
log.warning("Extracting variant: {} not found in sumstats.. Skipping..".format(region_ref_to_check))
|
|
266
291
|
#lead_id = sumstats["scaled_P"].idxmax()
|
|
267
292
|
lead_id = None
|
|
268
293
|
return lead_id
|
|
@@ -275,7 +300,7 @@ def _get_lead_id(sumstats=None, region_ref=None, log=None, verbose=True):
|
|
|
275
300
|
|
|
276
301
|
return lead_id
|
|
277
302
|
|
|
278
|
-
def _pinpoint_lead(sumstats,ax1,region_ref, lead_color, marker_size, log, verbose,region_marker_shapes):
|
|
303
|
+
def _pinpoint_lead(sumstats,ax1,region_ref, lead_color, marker_size, log, verbose, region_marker_shapes):
|
|
279
304
|
|
|
280
305
|
if region_ref is None:
|
|
281
306
|
log.write(" -Extracting lead variant..." , verbose=verbose)
|
|
@@ -416,6 +441,11 @@ def _plot_gene_track(
|
|
|
416
441
|
texts_to_adjust_left = []
|
|
417
442
|
texts_to_adjust_middle = []
|
|
418
443
|
texts_to_adjust_right = []
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
sig_gene_names=[]
|
|
447
|
+
sig_gene_lefts=[]
|
|
448
|
+
sig_gene_rights=[]
|
|
419
449
|
for index,row in uniq_gene_region.iterrows():
|
|
420
450
|
|
|
421
451
|
gene_color="#020080"
|
|
@@ -426,21 +456,18 @@ def _plot_gene_track(
|
|
|
426
456
|
gene_anno = "<-" + row["name"]
|
|
427
457
|
|
|
428
458
|
|
|
429
|
-
|
|
430
|
-
sig_gene_lefts=[]
|
|
431
|
-
sig_gene_rights=[]
|
|
459
|
+
|
|
432
460
|
for lead_snp_i in lead_snp_is:
|
|
433
461
|
if region_lead_grid is True and lead_snp_i > gene_track_start_i+row["start"] and lead_snp_i < gene_track_start_i+row["end"] :
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
462
|
+
gene_color=region_lead_grid_line["color"]
|
|
463
|
+
sig_gene_names.append(row["name"])
|
|
464
|
+
sig_gene_lefts.append(gene_track_start_i+row["start"])
|
|
465
|
+
sig_gene_rights.append(gene_track_start_i+row["end"])
|
|
438
466
|
|
|
439
467
|
# plot gene line
|
|
440
468
|
ax3.plot((gene_track_start_i+row["start"],gene_track_start_i+row["end"]),
|
|
441
469
|
(row["stack"]*2,row["stack"]*2),color=gene_color,linewidth=linewidth_in_points/10)
|
|
442
470
|
|
|
443
|
-
|
|
444
471
|
# plot gene name
|
|
445
472
|
if row["end"] >= region[2]:
|
|
446
473
|
#right side
|
|
@@ -459,6 +486,7 @@ def _plot_gene_track(
|
|
|
459
486
|
for index,row in exons.iterrows():
|
|
460
487
|
exon_color="#020080"
|
|
461
488
|
for sig_gene_name, sig_gene_left, sig_gene_right in zip(sig_gene_names,sig_gene_lefts,sig_gene_rights):
|
|
489
|
+
|
|
462
490
|
if not pd.isnull(row["name"]):
|
|
463
491
|
if (region_lead_grid is True) and row["name"]==sig_gene_name:
|
|
464
492
|
exon_color = region_lead_grid_line["color"]
|
|
@@ -468,7 +496,7 @@ def _plot_gene_track(
|
|
|
468
496
|
exon_color = region_lead_grid_line["color"]
|
|
469
497
|
else:
|
|
470
498
|
exon_color="#020080"
|
|
471
|
-
|
|
499
|
+
|
|
472
500
|
ax3.plot((gene_track_start_i+row["start"],gene_track_start_i+row["end"]),
|
|
473
501
|
(row["stack"]*2,row["stack"]*2),linewidth=linewidth_in_points*taf[3],color=exon_color,solid_capstyle="butt")
|
|
474
502
|
|
|
@@ -550,24 +578,42 @@ def process_vcf(sumstats,
|
|
|
550
578
|
# figure out lead variant
|
|
551
579
|
lead_id = _get_lead_id(sumstats, region_ref_single, log, verbose)
|
|
552
580
|
|
|
553
|
-
if lead_id is None:
|
|
554
|
-
sumstats[rsq] = None
|
|
555
|
-
sumstats[rsq] = sumstats[rsq].astype("float")
|
|
556
|
-
sumstats[ld_single] = 0
|
|
557
|
-
continue
|
|
558
581
|
|
|
559
|
-
|
|
582
|
+
lead_series = None
|
|
583
|
+
if lead_id is None:
|
|
584
|
+
|
|
585
|
+
matched_snpid = re.match("(chr)?[0-9]+:[0-9]+:[ATCG]+:[ATCG]+",region_ref_single, re.IGNORECASE)
|
|
586
|
+
|
|
587
|
+
if matched_snpid is None:
|
|
588
|
+
sumstats[rsq] = None
|
|
589
|
+
sumstats[rsq] = sumstats[rsq].astype("float")
|
|
590
|
+
sumstats[ld_single] = 0
|
|
591
|
+
continue
|
|
592
|
+
else:
|
|
593
|
+
|
|
594
|
+
lead_snpid = matched_snpid.group(0).split(":")[1:]
|
|
595
|
+
lead_pos = int(lead_snpid[0])
|
|
596
|
+
lead_snpid[0]= int(lead_snpid[0])
|
|
597
|
+
lead_series = pd.Series(lead_snpid)
|
|
598
|
+
else:
|
|
599
|
+
lead_pos = sumstats.loc[lead_id,pos]
|
|
560
600
|
|
|
601
|
+
|
|
561
602
|
# if lead pos is available:
|
|
562
603
|
if lead_pos in ref_genotype["variants/POS"]:
|
|
563
604
|
|
|
564
605
|
# get ref index for lead snp
|
|
565
|
-
|
|
566
|
-
|
|
606
|
+
if lead_series is None:
|
|
607
|
+
lead_snp_ref_index = match_varaint(sumstats.loc[lead_id,[pos,nea,ea]])
|
|
608
|
+
#lead_snp_ref_index = np.where(ref_genotype["variants/POS"] == lead_pos)[0][0]
|
|
609
|
+
else:
|
|
610
|
+
log.warning("Computing LD: {} not found in sumstats but found in reference...Still Computing...".format(region_ref_single))
|
|
611
|
+
lead_snp_ref_index = match_varaint(lead_series)
|
|
567
612
|
|
|
568
613
|
# non-na other snp index
|
|
569
614
|
other_snps_ref_index = sumstats["REFINDEX"].dropna().astype("int").values
|
|
570
615
|
# get genotype
|
|
616
|
+
|
|
571
617
|
lead_snp_genotype = GenotypeArray([ref_genotype["calldata/GT"][lead_snp_ref_index]]).to_n_alt()
|
|
572
618
|
try:
|
|
573
619
|
if len(set(lead_snp_genotype[0]))==1:
|
|
@@ -604,10 +650,10 @@ def process_vcf(sumstats,
|
|
|
604
650
|
sumstats.loc[to_change_color,ld_single] = 1
|
|
605
651
|
to_change_color = sumstats[rsq]>ld_threshold
|
|
606
652
|
sumstats.loc[to_change_color,ld_single] = index+2
|
|
607
|
-
|
|
608
|
-
sumstats.loc[lead_id,ld_single] = len(region_ld_threshold)+2
|
|
609
653
|
|
|
610
|
-
|
|
654
|
+
if lead_series is None:
|
|
655
|
+
sumstats.loc[lead_id,ld_single] = len(region_ld_threshold)+2
|
|
656
|
+
sumstats.loc[lead_id,lead] = 1
|
|
611
657
|
|
|
612
658
|
####################################################################################################
|
|
613
659
|
final_shape_col = "SHAPE"
|
|
@@ -57,7 +57,9 @@ def plot_stacked_mqq(objects,
|
|
|
57
57
|
fig_args=None,
|
|
58
58
|
region_hspace=0.05,
|
|
59
59
|
subplot_height=4,
|
|
60
|
+
region_lead_grids = None,
|
|
60
61
|
region_lead_grid_line=None,
|
|
62
|
+
region_ld_legends = None,
|
|
61
63
|
fontsize=9,
|
|
62
64
|
font_family="Arial",
|
|
63
65
|
build="99",
|
|
@@ -85,6 +87,8 @@ def plot_stacked_mqq(objects,
|
|
|
85
87
|
region_chromatin_height = len(region_chromatin_files) * region_chromatin_height
|
|
86
88
|
if region_chromatin_labels is None:
|
|
87
89
|
region_chromatin_labels = []
|
|
90
|
+
if region_ld_legends is None:
|
|
91
|
+
region_ld_legends = [0]
|
|
88
92
|
if title_args is None:
|
|
89
93
|
title_args = {"family":"Arial"}
|
|
90
94
|
else:
|
|
@@ -125,7 +129,9 @@ def plot_stacked_mqq(objects,
|
|
|
125
129
|
'width_ratios':[mqqratio,1]},
|
|
126
130
|
**fig_args)
|
|
127
131
|
plt.subplots_adjust(hspace=region_hspace)
|
|
128
|
-
|
|
132
|
+
|
|
133
|
+
if region_lead_grids is None:
|
|
134
|
+
region_lead_grids = [i for i in range(len(axes))]
|
|
129
135
|
##########################################################################################################################################
|
|
130
136
|
mqq_args_for_each_plot = _sort_args(mqq_args, n_plot)
|
|
131
137
|
##########################################################################################################################################
|
|
@@ -149,6 +155,10 @@ def plot_stacked_mqq(objects,
|
|
|
149
155
|
figax = (fig,axes[index],axes[-1])
|
|
150
156
|
elif mode=="mqq":
|
|
151
157
|
figax = (fig,axes[index,0],axes[index,1])
|
|
158
|
+
if index in region_ld_legends:
|
|
159
|
+
region_ld_legend = True
|
|
160
|
+
else:
|
|
161
|
+
region_ld_legend = False
|
|
152
162
|
#################################################################
|
|
153
163
|
if index==0:
|
|
154
164
|
# plot last m and gene track
|
|
@@ -164,6 +174,7 @@ def plot_stacked_mqq(objects,
|
|
|
164
174
|
fontsize=fontsize,
|
|
165
175
|
font_family=font_family,
|
|
166
176
|
region_lead_grid=False,
|
|
177
|
+
region_ld_legend=region_ld_legend,
|
|
167
178
|
gtf_path="default",
|
|
168
179
|
rr_ylabel=False,
|
|
169
180
|
figax=figax,
|
|
@@ -192,7 +203,7 @@ def plot_stacked_mqq(objects,
|
|
|
192
203
|
font_family=font_family,
|
|
193
204
|
mode=mode,
|
|
194
205
|
rr_ylabel=False,
|
|
195
|
-
region_ld_legend=
|
|
206
|
+
region_ld_legend=region_ld_legend,
|
|
196
207
|
gtf_path=None,
|
|
197
208
|
figax=figax,
|
|
198
209
|
_get_region_lead=True,
|
|
@@ -257,7 +268,7 @@ def plot_stacked_mqq(objects,
|
|
|
257
268
|
|
|
258
269
|
##########################################################################################################################################
|
|
259
270
|
# draw the line for lead variants
|
|
260
|
-
_draw_grid_line_for_lead_variants(mode, lead_variants_is,lead_variants_is_color, n_plot, axes, region_lead_grid_line,region_chromatin_files)
|
|
271
|
+
_draw_grid_line_for_lead_variants(mode, lead_variants_is,lead_variants_is_color, n_plot, axes, region_lead_grid_line,region_chromatin_files,region_lead_grids)
|
|
261
272
|
|
|
262
273
|
##########################################################################################################################################
|
|
263
274
|
_drop_old_y_labels(axes, n_plot)
|
|
@@ -275,21 +286,24 @@ def _drop_old_y_labels(axes, n_plot):
|
|
|
275
286
|
for index in range(n_plot):
|
|
276
287
|
axes[index].set_ylabel("")
|
|
277
288
|
|
|
278
|
-
def _draw_grid_line_for_lead_variants(mode, lead_variants_is,lead_variants_is_color, n_plot, axes, region_lead_grid_line,region_chromatin_files):
|
|
289
|
+
def _draw_grid_line_for_lead_variants(mode, lead_variants_is,lead_variants_is_color, n_plot, axes, region_lead_grid_line,region_chromatin_files,region_lead_grids):
|
|
279
290
|
if len(region_chromatin_files)>0:
|
|
280
291
|
n_plot_and_track = n_plot+2
|
|
281
292
|
else:
|
|
282
293
|
n_plot_and_track = n_plot+1
|
|
294
|
+
|
|
295
|
+
plotted=[None]
|
|
283
296
|
if mode=="r":
|
|
284
297
|
for index, sig_is in lead_variants_is.items():
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
298
|
+
if index in region_lead_grids:
|
|
299
|
+
for j, sig_i in enumerate(sig_is):
|
|
300
|
+
try:
|
|
301
|
+
region_lead_grid_line["color"] = lead_variants_is_color[index][j]
|
|
302
|
+
except:
|
|
303
|
+
pass
|
|
304
|
+
if sig_i not in plotted:
|
|
305
|
+
for each_axis_index in range(n_plot_and_track):
|
|
306
|
+
axes[each_axis_index].axvline(x=sig_i, zorder=2,**region_lead_grid_line)
|
|
293
307
|
|
|
294
308
|
def _add_new_y_label(mode, fig, gene_track_height,n_plot,subplot_height ,fontsize,font_family):
|
|
295
309
|
gene_track_height_ratio = gene_track_height/(gene_track_height + n_plot*subplot_height)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: gwaslab
|
|
3
|
-
Version: 3.4.
|
|
3
|
+
Version: 3.4.48
|
|
4
4
|
Summary: A collection of handy tools for GWAS SumStats
|
|
5
5
|
Author-email: Yunye <yunye@gwaslab.com>
|
|
6
6
|
Project-URL: Homepage, https://cloufield.github.io/gwaslab/
|
|
@@ -12,19 +12,19 @@ Requires-Python: <3.11,>=3.9
|
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
License-File: LICENSE_before_v3.4.39
|
|
15
|
-
Requires-Dist: pandas
|
|
16
|
-
Requires-Dist: numpy
|
|
17
|
-
Requires-Dist: matplotlib
|
|
18
|
-
Requires-Dist: seaborn
|
|
19
|
-
Requires-Dist: scipy
|
|
20
|
-
Requires-Dist: pySAM
|
|
21
|
-
Requires-Dist: Biopython
|
|
22
|
-
Requires-Dist: adjustText
|
|
23
|
-
Requires-Dist: liftover
|
|
24
|
-
Requires-Dist: scikit-allel
|
|
25
|
-
Requires-Dist: pyensembl
|
|
26
|
-
Requires-Dist: gtfparse
|
|
27
|
-
Requires-Dist: h5py
|
|
15
|
+
Requires-Dist: pandas!=1.5,>=1.3
|
|
16
|
+
Requires-Dist: numpy<2,>=1.21.2
|
|
17
|
+
Requires-Dist: matplotlib!=3.7.2,<3.9,>=3.5
|
|
18
|
+
Requires-Dist: seaborn>=0.12
|
|
19
|
+
Requires-Dist: scipy>=1.12
|
|
20
|
+
Requires-Dist: pySAM==0.22.1
|
|
21
|
+
Requires-Dist: Biopython>=1.79
|
|
22
|
+
Requires-Dist: adjustText<=0.8,>=0.7.3
|
|
23
|
+
Requires-Dist: liftover>=1.1.13
|
|
24
|
+
Requires-Dist: scikit-allel>=1.3.5
|
|
25
|
+
Requires-Dist: pyensembl==2.2.3
|
|
26
|
+
Requires-Dist: gtfparse==1.3.0
|
|
27
|
+
Requires-Dist: h5py>=3.10.0
|
|
28
28
|
|
|
29
29
|
# GWASLab
|
|
30
30
|
|
|
@@ -51,7 +51,7 @@ Warning: Known issues of GWASLab are summarized in [https://cloufield.github.io/
|
|
|
51
51
|
### install via pip
|
|
52
52
|
|
|
53
53
|
```
|
|
54
|
-
pip install gwaslab==3.4.
|
|
54
|
+
pip install gwaslab==3.4.46
|
|
55
55
|
```
|
|
56
56
|
|
|
57
57
|
```python
|
|
@@ -6,18 +6,18 @@ gwaslab/bd_get_hapmap3.py,sha256=asNjQYeGfQi8u3jnfenRvDdKMs5ptql5wpcUzqMlwUI,393
|
|
|
6
6
|
gwaslab/cache_manager.py,sha256=HOTnSkCOyGEPLRl90WT8D_6pAdI8d8AzenMIDGuCeWc,28113
|
|
7
7
|
gwaslab/g_Log.py,sha256=C3Zv-_6c3C9ms8bgQ-ytplz22sjk7euqXYkWr9zNeAs,1573
|
|
8
8
|
gwaslab/g_Phenotypes.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
gwaslab/g_Sumstats.py,sha256=
|
|
9
|
+
gwaslab/g_Sumstats.py,sha256=U5la-fJCSTN8FNxXjcULcPH8MV9D6YgMaVFNesMTa_o,35854
|
|
10
10
|
gwaslab/g_SumstatsPair.py,sha256=20snPb4SlI6ftMGVjgxAuyxsxYRQF-GzzlBSnoB-3Lo,8851
|
|
11
11
|
gwaslab/g_SumstatsT.py,sha256=u_DighLMnMxwTLnqm-B58pA0G6WXRj6pudPyKMVKjSU,2133
|
|
12
12
|
gwaslab/g_Sumstats_summary.py,sha256=FECvvFXJVKaCX5dggBvvk9YvJ6AbdbcLfjltysX7wEE,6380
|
|
13
13
|
gwaslab/g_meta.py,sha256=htWlgURWclm9R6UqFcX1a93WN27xny7lGUeyJZOtszQ,2583
|
|
14
14
|
gwaslab/g_vchange_status.py,sha256=jLoVzMJFhB5k_cJKzHuBNc2HZGBWydAunCNa0n_d54g,1923
|
|
15
|
-
gwaslab/g_version.py,sha256=
|
|
15
|
+
gwaslab/g_version.py,sha256=kB6LiBYcRZElabjDUUEBhF5c5SVUq8vHO0pctDfBqkw,1886
|
|
16
16
|
gwaslab/hm_casting.py,sha256=FqP4EQl83Q2OKLw004OgLIvUH795TVCGwziLk5jsHqY,11368
|
|
17
17
|
gwaslab/hm_harmonize_sumstats.py,sha256=1hjUdle2DSKHGBp2BktfFqf-QHU_q2xWl_mPhiYc_ZA,78616
|
|
18
18
|
gwaslab/hm_rsid_to_chrpos.py,sha256=ODWREO0jPN0RAfNzL5fRzSRANfhiksOvUVPuEsFZQqA,6552
|
|
19
19
|
gwaslab/io_preformat_input.py,sha256=AZ43WGqVTzbo3XtClWhjRjsj6pBR9stw6JBL_TZ461U,20673
|
|
20
|
-
gwaslab/io_read_ldsc.py,sha256=
|
|
20
|
+
gwaslab/io_read_ldsc.py,sha256=wsYXpH50IchBKd2dhYloSqc4YgnDkiwMsAweaCoN5Eo,12471
|
|
21
21
|
gwaslab/io_read_tabular.py,sha256=EG-C6KhCutt4J4LlOMgXnqzJvU-EZXzVhMvaDFnHrMM,2380
|
|
22
22
|
gwaslab/io_to_formats.py,sha256=m57dGoqmHzAE1E27j9YxYKVCA12_lKd1qCnZtp0WZLw,29401
|
|
23
23
|
gwaslab/io_to_pickle.py,sha256=HhePU0VcaGni0HTNU0BqoRaOnrr0NOxotgY6ISdx3Ck,1833
|
|
@@ -28,18 +28,19 @@ gwaslab/ldsc_parse.py,sha256=MBnfgcWlV4oHp9MoDRh1mpilaHhAR15Af77hMFn4-5k,10564
|
|
|
28
28
|
gwaslab/ldsc_regressions.py,sha256=yzbGjgNV7u-SWXNPsh9S8y9mK97Bim_Nmad9G9V18ZU,30078
|
|
29
29
|
gwaslab/ldsc_sumstats.py,sha256=O0olsDxKlh1MJ1gAuEN1t40rxhajOEwOQ20ak7xoDrI,26245
|
|
30
30
|
gwaslab/qc_check_datatype.py,sha256=kW68uk4dTLOU2b1dHoVat6n0loundDysAjIqxsXW28Q,3379
|
|
31
|
-
gwaslab/qc_fix_sumstats.py,sha256
|
|
31
|
+
gwaslab/qc_fix_sumstats.py,sha256=aTX5jf7MVpp6XTiP-rbEyaLAQFsn1gyH9vEH4SLH-GY,93199
|
|
32
32
|
gwaslab/run_script.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
|
+
gwaslab/util_abf_finemapping.py,sha256=LRcopjtkT-iXtKPAJIzR4qjPdhD7nrS_BGit4EW89FM,3054
|
|
33
34
|
gwaslab/util_ex_calculate_ldmatrix.py,sha256=Z_spxbq6SHDS0v84I59YTTF40iyLQIOZbt0dmEcNJjw,15417
|
|
34
35
|
gwaslab/util_ex_calculate_prs.py,sha256=9uJ588Sdj4V0vw3OZ9NeLECwOvW67f0IdLandVPS5RY,9442
|
|
35
36
|
gwaslab/util_ex_gwascatalog.py,sha256=--Gde9HrsikfYTeFqSaYz0gUODr9wmv_gV6GZGNlElg,7688
|
|
36
37
|
gwaslab/util_ex_ldproxyfinder.py,sha256=wWNW9wITWozj23gT41LR00WxU-rrHpGKbxs2H_3jEyM,9431
|
|
37
|
-
gwaslab/util_ex_ldsc.py,sha256=
|
|
38
|
+
gwaslab/util_ex_ldsc.py,sha256=dd3QUNX7eOJY_z4mkKIE9Ey_SQGE4ox0efhq28ah_LE,18592
|
|
38
39
|
gwaslab/util_ex_plink_filter.py,sha256=pK1Yxtv9-J4rMOdVAG7VU9PktvI6-y4FxBiVEH0QuRs,1673
|
|
39
40
|
gwaslab/util_ex_process_h5.py,sha256=ynFvo3zxgvOxWYL565v2IQf8P6iEuq7UlKQ_ULxrd6Y,2831
|
|
40
41
|
gwaslab/util_ex_process_ref.py,sha256=GQ0ZEWLxGpHLdBs3tqnAqKn3Pqx1A1YvNbYrBLBvXeg,17126
|
|
41
42
|
gwaslab/util_ex_run_2samplemr.py,sha256=5c0DGF694T9j0Y58L2I7pr1_Z1hfpaatIgix7P5oPA8,9127
|
|
42
|
-
gwaslab/util_ex_run_clumping.py,sha256=
|
|
43
|
+
gwaslab/util_ex_run_clumping.py,sha256=bs9CJENMhwVXolCNygaYtso5ikPrdPMiV13tGA0ttxg,7915
|
|
43
44
|
gwaslab/util_ex_run_coloc.py,sha256=u57h8wPbTCOf6aY5u5DpzK1gv7inuDT8a15UGo-1ras,6288
|
|
44
45
|
gwaslab/util_ex_run_susie.py,sha256=TXqiwBVq1io7XSlLF2_gNsYgxDLiKNnYE9pIjRWJ1Hc,5315
|
|
45
46
|
gwaslab/util_in_calculate_gc.py,sha256=MWOXVzJv7SZx4i2_ncRiqsiEOADc7EfghaUzgGy4jaE,2219
|
|
@@ -52,22 +53,22 @@ gwaslab/util_in_get_density.py,sha256=kpKXH69acMkeYVG5vs-VbJC3COhmuLBfYco-wuOxgj
|
|
|
52
53
|
gwaslab/util_in_get_sig.py,sha256=9kq1GXacknO2YnVmsTli1GlPA728ASweTZ3UKm3Wszo,38783
|
|
53
54
|
gwaslab/util_in_meta.py,sha256=5K9lIZcIgUy0AERqHy1GvMN2X6dp45JUUgopuDLgt4o,11284
|
|
54
55
|
gwaslab/util_in_snphwe.py,sha256=-KpIDx6vn_nah6H55IkV2OyjXQVXV13XyBL069WE1wM,1751
|
|
55
|
-
gwaslab/viz_aux_annotate_plot.py,sha256=
|
|
56
|
+
gwaslab/viz_aux_annotate_plot.py,sha256=UTOx-OpFsM_UVphBHxqQZwCFI8bjwwB6CMsgFbAfheQ,32215
|
|
56
57
|
gwaslab/viz_aux_chromatin.py,sha256=7cGmej5EkKO7fxR1b5w8r1oRRl9ofVzFRG52SCYWtz0,4109
|
|
57
|
-
gwaslab/viz_aux_quickfix.py,sha256=
|
|
58
|
+
gwaslab/viz_aux_quickfix.py,sha256=5XM-J9D2yp5FVbK6YQKEhSCqAYBLNWSQms9WjpXxVn8,18313
|
|
58
59
|
gwaslab/viz_aux_reposition_text.py,sha256=iRIP-Rkltlei068HekJcVubiqPrunBqvAoSQ1eHk04M,4304
|
|
59
60
|
gwaslab/viz_aux_save_figure.py,sha256=nL-aoE8Kg06h7FgleGRBIZjhI-6w5gpn3E1HWMwBig8,2664
|
|
60
61
|
gwaslab/viz_plot_compare_af.py,sha256=qtXW45-Sq_ugK8ZfqBYMpmf58SKi3lB3YyHnzn_akcE,5344
|
|
61
|
-
gwaslab/viz_plot_compare_effect.py,sha256=
|
|
62
|
+
gwaslab/viz_plot_compare_effect.py,sha256=lV-4KSQatTSsYqzEGGTkBu17I2VlVyVh1uJr-kCq630,49585
|
|
62
63
|
gwaslab/viz_plot_forestplot.py,sha256=xgOnefh737CgdQxu5naVyRNBX1NQXPFKzf51fbh6afs,6771
|
|
63
64
|
gwaslab/viz_plot_miamiplot.py,sha256=rCFEp7VNuVqeBBG3WRkmFAtFklbF79BvIQQYiSY70VY,31238
|
|
64
|
-
gwaslab/viz_plot_miamiplot2.py,sha256=
|
|
65
|
-
gwaslab/viz_plot_mqqplot.py,sha256=
|
|
65
|
+
gwaslab/viz_plot_miamiplot2.py,sha256=Jg5j9qsr8gGydU8AOKU8xRZTNH_3QM8Nxv61Vh12aG0,15732
|
|
66
|
+
gwaslab/viz_plot_mqqplot.py,sha256=q_c_ZDRdirEcdHHnb030h7dTUWAFy9OVuzwbdCzZ3gU,64580
|
|
66
67
|
gwaslab/viz_plot_qqplot.py,sha256=psQgVpP29686CEZkzQz0iRbApzqy7aE3GGiBcazVvNw,7247
|
|
67
|
-
gwaslab/viz_plot_regional2.py,sha256=
|
|
68
|
+
gwaslab/viz_plot_regional2.py,sha256=tBoGox-4ngL5o_twdIjk_VW6Iam3JDyrPKuttm6_4Sg,36862
|
|
68
69
|
gwaslab/viz_plot_regionalplot.py,sha256=8u-5-yfy-UaXhaxVVz3Y5k2kBAoqzczUw1hyyD450iI,37983
|
|
69
70
|
gwaslab/viz_plot_rg_heatmap.py,sha256=PidUsgOiEVt6MfBPCF3_yDhOEytZ-I1q-ZD6_0pFrV4,13713
|
|
70
|
-
gwaslab/viz_plot_stackedregional.py,sha256=
|
|
71
|
+
gwaslab/viz_plot_stackedregional.py,sha256=AoGMer73P3y-21A2qityrDX-_sIJFRgKzkm8i9zVxKA,16420
|
|
71
72
|
gwaslab/viz_plot_trumpetplot.py,sha256=ZHdc6WcVx0-oKoj88yglRkmB4bS9pOiEMcuwKW35Yvo,42672
|
|
72
73
|
gwaslab/data/formatbook.json,sha256=N2nJs80HH98Rsu9FxaSvIQO9J5yIV97WEtAKjRqYwiY,38207
|
|
73
74
|
gwaslab/data/reference.json,sha256=k8AvvgDsuLxzv-NCJHWvTUZ5q_DLAFxs1Th3jtL313k,11441
|
|
@@ -77,9 +78,9 @@ gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz,sha256=qD9RsC5S2h6l-OdpW
|
|
|
77
78
|
gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz,sha256=Y8ZT2FIAhbhlgCJdE9qQVAiwnV_fcsPt72usBa7RSBM,10225828
|
|
78
79
|
gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz,sha256=R7IkssKu0L4WwkU9SrS84xCMdrkkKL0gnTNO_OKbG0Y,219
|
|
79
80
|
gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz,sha256=76CIU0pibDJ72Y6UY-TbIKE9gEPwTELAaIbCXyjm80Q,470
|
|
80
|
-
gwaslab-3.4.
|
|
81
|
-
gwaslab-3.4.
|
|
82
|
-
gwaslab-3.4.
|
|
83
|
-
gwaslab-3.4.
|
|
84
|
-
gwaslab-3.4.
|
|
85
|
-
gwaslab-3.4.
|
|
81
|
+
gwaslab-3.4.48.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
82
|
+
gwaslab-3.4.48.dist-info/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
|
|
83
|
+
gwaslab-3.4.48.dist-info/METADATA,sha256=4Cqofc_AlsSBNtwKwiLBFiDtQsh8UUvWiM_XhBYksxw,7752
|
|
84
|
+
gwaslab-3.4.48.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
|
|
85
|
+
gwaslab-3.4.48.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
|
|
86
|
+
gwaslab-3.4.48.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|