gwaslab 3.5.4__py3-none-any.whl → 3.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/__init__.py +2 -1
- gwaslab/g_Sumstats.py +29 -8
- gwaslab/g_SumstatsPair.py +16 -12
- gwaslab/g_headers.py +131 -0
- gwaslab/g_meta.py +2 -1
- gwaslab/g_version.py +3 -3
- gwaslab/io_preformat_input.py +29 -7
- gwaslab/io_read_pipcs.py +23 -0
- gwaslab/io_to_formats.py +45 -44
- gwaslab/qc_check_datatype.py +65 -42
- gwaslab/util_ex_ldsc.py +9 -0
- gwaslab/util_ex_run_2samplemr.py +34 -0
- gwaslab/util_ex_run_clumping.py +4 -2
- gwaslab/util_in_fill_data.py +9 -1
- gwaslab/util_in_filter_value.py +15 -1
- gwaslab/viz_plot_credible_sets.py +99 -0
- gwaslab/viz_plot_mqqplot.py +14 -11
- gwaslab/viz_plot_stackedregional.py +64 -33
- {gwaslab-3.5.4.dist-info → gwaslab-3.5.5.dist-info}/METADATA +3 -3
- {gwaslab-3.5.4.dist-info → gwaslab-3.5.5.dist-info}/RECORD +24 -22
- gwaslab/vis_plot_credible sets.py +0 -0
- {gwaslab-3.5.4.dist-info → gwaslab-3.5.5.dist-info}/LICENSE +0 -0
- {gwaslab-3.5.4.dist-info → gwaslab-3.5.5.dist-info}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.5.4.dist-info → gwaslab-3.5.5.dist-info}/WHEEL +0 -0
- {gwaslab-3.5.4.dist-info → gwaslab-3.5.5.dist-info}/top_level.txt +0 -0
gwaslab/qc_check_datatype.py
CHANGED
|
@@ -5,7 +5,54 @@ from gwaslab.g_Log import Log
|
|
|
5
5
|
# pandas.api.types.is_int64_dtype
|
|
6
6
|
# pandas.api.types.is_categorical_dtype
|
|
7
7
|
|
|
8
|
+
dtype_dict ={
|
|
9
|
+
"SNPID":["string","object"],
|
|
10
|
+
"rsID":["string","object"],
|
|
11
|
+
"CHR":["Int64","int64","int32","Int32","int"],
|
|
12
|
+
"POS":["int64","Int64"],
|
|
13
|
+
"EA":["category"],
|
|
14
|
+
"NEA":["category"],
|
|
15
|
+
"REF":["category"],
|
|
16
|
+
"ALT":["category"],
|
|
17
|
+
"BETA":["float64"],
|
|
18
|
+
"BETA_95L":["float64"],
|
|
19
|
+
"BETA_95U":["float64"],
|
|
20
|
+
"SE":["float64"],
|
|
21
|
+
"N":["Int64","int64","int32","Int32","int"],
|
|
22
|
+
"N_CASE":["Int64","int64","int32","Int32","int"],
|
|
23
|
+
"N_CONTROL":["Int64","int64","int32","Int32","int"],
|
|
24
|
+
"OR":["float64"],
|
|
25
|
+
"OR_95L":["float64"],
|
|
26
|
+
"OR_95U":["float64"],
|
|
27
|
+
"HR":["float64"],
|
|
28
|
+
"HR_95L":["float64"],
|
|
29
|
+
"HR_95U":["float64"],
|
|
30
|
+
"P":["float64"],
|
|
31
|
+
"MLOG10P":["float64"],
|
|
32
|
+
"Z":["float64"],
|
|
33
|
+
"F":["float64"],
|
|
34
|
+
"T":["float64"],
|
|
35
|
+
"TEST":["string","object","category"],
|
|
36
|
+
"CHISQ":["float64"],
|
|
37
|
+
"I2":["float64"],
|
|
38
|
+
"PHET":["float64"],
|
|
39
|
+
"SNPR2":["float64"],
|
|
40
|
+
"EAF":["float64","float","float32"],
|
|
41
|
+
"NEAF":["float64","float","float32"],
|
|
42
|
+
"MAF":["float64","float","float32"],
|
|
43
|
+
"INFO":["float64","float","float32"],
|
|
44
|
+
"DOF":["Int64","int64","int32","Int32","int"],
|
|
45
|
+
"STATUS":["category"],
|
|
46
|
+
"DIRECTION":["string","object"],
|
|
47
|
+
'PIP' :["float64","float","float32"],
|
|
48
|
+
'CREDIBLE_SET_INDEX':["Int64","int64","int32","Int32","int"],
|
|
49
|
+
'N_SNP' :["Int64","int64","int32","Int32","int"],
|
|
50
|
+
'LOCUS' :["string","object","category"],
|
|
51
|
+
'STUDY' :["string","object","category"]
|
|
52
|
+
}
|
|
53
|
+
|
|
8
54
|
def check_datatype(sumstats, verbose=True, log=Log()):
|
|
55
|
+
|
|
9
56
|
try:
|
|
10
57
|
headers = []
|
|
11
58
|
dtypes = []
|
|
@@ -39,47 +86,6 @@ def check_datatype(sumstats, verbose=True, log=Log()):
|
|
|
39
86
|
|
|
40
87
|
def verify_datatype(header, dtype):
|
|
41
88
|
|
|
42
|
-
dtype_dict ={
|
|
43
|
-
"SNPID":["object","string"],
|
|
44
|
-
"rsID":["object","string"],
|
|
45
|
-
"CHR":["int32","Int32","int64","Int64"],
|
|
46
|
-
"POS":["int64","Int64"],
|
|
47
|
-
"EA":"category",
|
|
48
|
-
"NEA":"category",
|
|
49
|
-
"REF":"category",
|
|
50
|
-
"ALT":"category",
|
|
51
|
-
"BETA":"float64",
|
|
52
|
-
"BETA_95L":"float64",
|
|
53
|
-
"BETA_95U":"float64",
|
|
54
|
-
"SE":"float64",
|
|
55
|
-
"N":["int","Int32","Int64","int32","int64"],
|
|
56
|
-
"N_CASE":["int","Int32","Int64","int32","int64"],
|
|
57
|
-
"N_CONTROL":["int","Int32","Int64","int32","int64"],
|
|
58
|
-
"OR":"float64",
|
|
59
|
-
"OR_95L":"float64",
|
|
60
|
-
"OR_95U":"float64",
|
|
61
|
-
"HR":"float64",
|
|
62
|
-
"HR_95L":"float64",
|
|
63
|
-
"HR_95U":"float64",
|
|
64
|
-
"P":"float64",
|
|
65
|
-
"MLOG10P":"float64",
|
|
66
|
-
"Z":"float64",
|
|
67
|
-
"F":"float64",
|
|
68
|
-
"T":"float64",
|
|
69
|
-
"TEST":["object","string","category"],
|
|
70
|
-
"CHISQ":"float64",
|
|
71
|
-
"I2":"float64",
|
|
72
|
-
"PHET":"float64",
|
|
73
|
-
"SNPR2":"float64",
|
|
74
|
-
"EAF":["float","float32","float64"],
|
|
75
|
-
"NEAF":["float","float32","float64"],
|
|
76
|
-
"MAF":["float","float32","float64"],
|
|
77
|
-
"INFO":["float32","float64"],
|
|
78
|
-
"DOF":["int","Int32","Int64","int32","int64"],
|
|
79
|
-
"STATUS":"category",
|
|
80
|
-
"DIRECTION":["object","string"],
|
|
81
|
-
}
|
|
82
|
-
|
|
83
89
|
if header in dtype_dict.keys():
|
|
84
90
|
if str(dtype) in dtype_dict[header]:
|
|
85
91
|
return "T"
|
|
@@ -88,6 +94,22 @@ def verify_datatype(header, dtype):
|
|
|
88
94
|
else:
|
|
89
95
|
return "NA"
|
|
90
96
|
|
|
97
|
+
def quick_convert_datatype(sumstats, log, verbose):
|
|
98
|
+
for col in sumstats.columns:
|
|
99
|
+
if col in dtype_dict.keys():
|
|
100
|
+
if str(sumstats[col].dtypes) not in dtype_dict[col]:
|
|
101
|
+
datatype=dtype_dict[col][0]
|
|
102
|
+
log.write(" -Trying to convert datatype for {}: {} -> {}...".format(col, str(sumstats[col].dtypes), datatype), end="" ,verbose=verbose)
|
|
103
|
+
try:
|
|
104
|
+
sumstats[col] = sumstats[col].astype(datatype)
|
|
105
|
+
log.write("{}".format(datatype),show_time=False, verbose=verbose)
|
|
106
|
+
except:
|
|
107
|
+
log.write("Failed...",show_time=False,verbose=verbose)
|
|
108
|
+
pass
|
|
109
|
+
return sumstats
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
|
|
91
113
|
def check_dataframe_shape(sumstats, log, verbose):
|
|
92
114
|
memory_in_mb = sumstats.memory_usage().sum()/1024/1024
|
|
93
115
|
try:
|
|
@@ -100,4 +122,5 @@ def check_dataframe_memory_usage(sumstats, log, verbose):
|
|
|
100
122
|
try:
|
|
101
123
|
log.write(" -Current Dataframe memory usage: {:.2f} MB".format(memory_in_mb), verbose=verbose)
|
|
102
124
|
except:
|
|
103
|
-
log.warning("Error: cannot get Memory usage...")
|
|
125
|
+
log.warning("Error: cannot get Memory usage...")
|
|
126
|
+
|
gwaslab/util_ex_ldsc.py
CHANGED
|
@@ -260,6 +260,9 @@ class ARGS():
|
|
|
260
260
|
def _estimate_h2_by_ldsc(insumstats, log, verbose=True, munge=False, munge_args=None, **kwargs):
|
|
261
261
|
sumstats = insumstats.copy()
|
|
262
262
|
|
|
263
|
+
if "N" in sumstats.columns:
|
|
264
|
+
sumstats["N"] = sumstats["N"].astype("int64")
|
|
265
|
+
|
|
263
266
|
if munge:
|
|
264
267
|
if munge_args is None:
|
|
265
268
|
munge_args={}
|
|
@@ -320,6 +323,8 @@ def _estimate_h2_by_ldsc(insumstats, log, verbose=True, munge=False, munge_args=
|
|
|
320
323
|
|
|
321
324
|
def _estimate_partitioned_h2_by_ldsc(insumstats, log, verbose=True, **kwargs):
|
|
322
325
|
sumstats = insumstats.copy()
|
|
326
|
+
if "N" in sumstats.columns:
|
|
327
|
+
sumstats["N"] = sumstats["N"].astype("int64")
|
|
323
328
|
##start function with col checking##########################################################
|
|
324
329
|
_start_line = "run LD score regression"
|
|
325
330
|
_end_line = "running LD score regression"
|
|
@@ -366,6 +371,8 @@ def _estimate_partitioned_h2_by_ldsc(insumstats, log, verbose=True, **kwargs):
|
|
|
366
371
|
|
|
367
372
|
def _estimate_rg_by_ldsc(insumstats, other_traits ,log, verbose=True, **kwargs):
|
|
368
373
|
sumstats = insumstats.copy()
|
|
374
|
+
if "N" in sumstats.columns:
|
|
375
|
+
sumstats["N"] = sumstats["N"].astype("int64")
|
|
369
376
|
##start function with col checking##########################################################
|
|
370
377
|
_start_line = "run LD score regression for genetic correlation"
|
|
371
378
|
_end_line = "running LD score regression for genetic correlation"
|
|
@@ -426,6 +433,8 @@ def _estimate_rg_by_ldsc(insumstats, other_traits ,log, verbose=True, **kwargs):
|
|
|
426
433
|
|
|
427
434
|
def _estimate_h2_cts_by_ldsc(insumstats, log, verbose=True, **kwargs):
|
|
428
435
|
sumstats = insumstats.copy()
|
|
436
|
+
if "N" in sumstats.columns:
|
|
437
|
+
sumstats["N"] = sumstats["N"].astype("int64")
|
|
429
438
|
##start function with col checking##########################################################
|
|
430
439
|
_start_line = "run LD score regression"
|
|
431
440
|
_end_line = "running LD score regression"
|
gwaslab/util_ex_run_2samplemr.py
CHANGED
|
@@ -21,6 +21,8 @@ def _run_two_sample_mr(sumstatspair_object,
|
|
|
21
21
|
n1=None,
|
|
22
22
|
n2=None,
|
|
23
23
|
binary1=False,
|
|
24
|
+
cck1=None,
|
|
25
|
+
cck2=None,
|
|
24
26
|
ncase1=None,
|
|
25
27
|
ncontrol1=None,
|
|
26
28
|
prevalence1=None,
|
|
@@ -35,6 +37,22 @@ def _run_two_sample_mr(sumstatspair_object,
|
|
|
35
37
|
if methods is None:
|
|
36
38
|
methods = ["mr_ivw","mr_simple_mode","mr_weighted_median","mr_egger_regression","mr_ivw_mre", "mr_weighted_mode"]
|
|
37
39
|
methods_string = '"{}"'.format('","'.join(methods))
|
|
40
|
+
|
|
41
|
+
if cck1 is not None:
|
|
42
|
+
log.write(" - ncase1, ncontrol1, prevalence1:{}".format(cck1))
|
|
43
|
+
binary1 = True
|
|
44
|
+
ncase1 = cck1[0]
|
|
45
|
+
ncontrol1 = cck1[1]
|
|
46
|
+
prevalence1 = cck1[2]
|
|
47
|
+
n1 = ncase1 + ncontrol1
|
|
48
|
+
if cck2 is not None:
|
|
49
|
+
log.write(" - ncase2, ncontrol2, prevalence2:{}".format(cck2))
|
|
50
|
+
binary2 = True
|
|
51
|
+
ncase2 = cck2[0]
|
|
52
|
+
ncontrol2 = cck2[1]
|
|
53
|
+
prevalence2 = cck2[2]
|
|
54
|
+
n2 = ncase2 + ncontrol2
|
|
55
|
+
|
|
38
56
|
if clump==True:
|
|
39
57
|
sumstatspair = sumstatspair_object.clumps["clumps"]
|
|
40
58
|
else:
|
|
@@ -64,10 +82,16 @@ def _run_two_sample_mr(sumstatspair_object,
|
|
|
64
82
|
|
|
65
83
|
###
|
|
66
84
|
calculate_r_script = ""
|
|
85
|
+
|
|
67
86
|
if binary1==True:
|
|
68
87
|
calculate_r_script+= _make_script_for_calculating_r("exposure", ncase1, ncontrol1, prevalence1)
|
|
88
|
+
else:
|
|
89
|
+
calculate_r_script+= _make_script_for_calculating_r_quant("exposure")
|
|
90
|
+
|
|
69
91
|
if binary2==True:
|
|
70
92
|
calculate_r_script+= _make_script_for_calculating_r("outcome", ncase2, ncontrol2, prevalence2)
|
|
93
|
+
else:
|
|
94
|
+
calculate_r_script+= _make_script_for_calculating_r_quant("outcome")
|
|
71
95
|
|
|
72
96
|
# create scripts
|
|
73
97
|
directionality_test_script='''
|
|
@@ -218,6 +242,16 @@ def _make_script_for_calculating_r(exposure_or_outcome, ncase, ncontrol, prevale
|
|
|
218
242
|
return script
|
|
219
243
|
|
|
220
244
|
|
|
245
|
+
def _make_script_for_calculating_r_quant(exposure_or_outcome):
|
|
246
|
+
script = """
|
|
247
|
+
harmonized_data$"r.{exposure_or_outcome}" <- get_r_from_bsen( harmonized_data$"beta.{exposure_or_outcome}",
|
|
248
|
+
harmonized_data$"se.{exposure_or_outcome}",
|
|
249
|
+
harmonized_data$"samplesize.{exposure_or_outcome}"
|
|
250
|
+
)
|
|
251
|
+
""".format(
|
|
252
|
+
exposure_or_outcome = exposure_or_outcome
|
|
253
|
+
)
|
|
254
|
+
return script
|
|
221
255
|
|
|
222
256
|
|
|
223
257
|
def _filter_by_f(sumstatspair, f_check, n1, binary1=None, ncase1=None, ncontrol1=None, prevalence1=None, log=Log() ):
|
gwaslab/util_ex_run_clumping.py
CHANGED
|
@@ -162,7 +162,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
|
|
|
162
162
|
log.write(e.output)
|
|
163
163
|
#os.system(script)
|
|
164
164
|
|
|
165
|
-
clumped = pd.read_csv("{}.clumps".format(out_single_chr),
|
|
165
|
+
clumped = pd.read_csv("{}.clumps".format(out_single_chr),sep="\s+")
|
|
166
166
|
results = pd.concat([results,clumped],ignore_index=True)
|
|
167
167
|
|
|
168
168
|
# remove temp SNPIDP file
|
|
@@ -172,7 +172,9 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
|
|
|
172
172
|
log.write("Finished clumping.",verbose=verbose)
|
|
173
173
|
results_sumstats = insumstats.loc[insumstats["SNPID"].isin(results["SNPID"]),:].copy()
|
|
174
174
|
finished(log=log, verbose=verbose, end_line=_end_line)
|
|
175
|
-
|
|
175
|
+
|
|
176
|
+
return results_sumstats, results, plink_log
|
|
177
|
+
|
|
176
178
|
|
|
177
179
|
|
|
178
180
|
|
gwaslab/util_in_fill_data.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
import numpy as np
|
|
3
3
|
import scipy.stats as ss
|
|
4
|
+
from scipy.stats import norm
|
|
4
5
|
from scipy import stats
|
|
5
6
|
from gwaslab.g_Log import Log
|
|
6
7
|
import gc
|
|
@@ -8,6 +9,7 @@ import gc
|
|
|
8
9
|
from gwaslab.g_version import _get_version
|
|
9
10
|
from gwaslab.qc_check_datatype import check_datatype
|
|
10
11
|
|
|
12
|
+
|
|
11
13
|
def filldata(
|
|
12
14
|
insumstats,
|
|
13
15
|
to_fill=None,
|
|
@@ -330,4 +332,10 @@ def _convert_or_to_beta(OR):
|
|
|
330
332
|
return np.log(OR)
|
|
331
333
|
|
|
332
334
|
def _convert_beta_to_or(beta):
|
|
333
|
-
return np.exp(beta)
|
|
335
|
+
return np.exp(beta)
|
|
336
|
+
|
|
337
|
+
def rank_based_int(series, c=3/8):
|
|
338
|
+
#https://onlinelibrary.wiley.com/doi/10.1111/biom.13214
|
|
339
|
+
n=sum(~series.isna())
|
|
340
|
+
normalized_value = norm.ppf((series.rank()-c)/(n+1-2*c))
|
|
341
|
+
return normalized_value
|
gwaslab/util_in_filter_value.py
CHANGED
|
@@ -513,4 +513,18 @@ def _exclude(sumstats, exclude=None, id_use="SNPID", log=Log(), verbose=True ):
|
|
|
513
513
|
log.write(" -Excluding {} variants from sumstats...".format(len(exclude)),verbose=verbose)
|
|
514
514
|
sumstats = sumstats.loc[~sumstats[id_use].isin(exclude),:]
|
|
515
515
|
log.write(" -Excluded {} variants from sumstats...".format(len(sumstats)),verbose=verbose)
|
|
516
|
-
return sumstats
|
|
516
|
+
return sumstats
|
|
517
|
+
|
|
518
|
+
def _filter_region(sumstats, region, chrom="CHR",pos="POS",log=Log(),verbose=True):
|
|
519
|
+
if region is not None:
|
|
520
|
+
region_chr = region[0]
|
|
521
|
+
region_start = region[1]
|
|
522
|
+
region_end = region[2]
|
|
523
|
+
|
|
524
|
+
log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]),verbose=verbose)
|
|
525
|
+
|
|
526
|
+
in_region_snp = (sumstats[chrom]==region_chr) & (sumstats[pos]<region_end) & (sumstats[pos]>region_start)
|
|
527
|
+
|
|
528
|
+
log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)),verbose=verbose)
|
|
529
|
+
sumstats = sumstats.loc[in_region_snp,:]
|
|
530
|
+
return sumstats.copy()
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import seaborn as sns
|
|
5
|
+
from gwaslab.g_Log import Log
|
|
6
|
+
from gwaslab.viz_aux_quickfix import _quick_assign_i_with_rank
|
|
7
|
+
from gwaslab.viz_plot_mqqplot import _process_xtick
|
|
8
|
+
from gwaslab.viz_plot_mqqplot import _process_xlabel
|
|
9
|
+
from gwaslab.bd_common_data import get_number_to_chr
|
|
10
|
+
from gwaslab.util_in_filter_value import _filter_region
|
|
11
|
+
from gwaslab.io_process_args import _extract_kwargs
|
|
12
|
+
|
|
13
|
+
def _plot_cs(pipcs,
|
|
14
|
+
region,
|
|
15
|
+
figax=None,
|
|
16
|
+
_posdiccul=None,
|
|
17
|
+
xtick_chr_dict=None,
|
|
18
|
+
pip="PIP",
|
|
19
|
+
onlycs=False,
|
|
20
|
+
cs="CREDIBLE_SET_INDEX",
|
|
21
|
+
marker_size=(45,85),
|
|
22
|
+
fontsize = 12,
|
|
23
|
+
font_family = "Arial",
|
|
24
|
+
legend_title="Credible sets",
|
|
25
|
+
log=Log(),
|
|
26
|
+
verbose=True,
|
|
27
|
+
**kwargs):
|
|
28
|
+
'''
|
|
29
|
+
pipcs : a DataFrame of finemapping results
|
|
30
|
+
'''
|
|
31
|
+
## parameters #############################
|
|
32
|
+
if xtick_chr_dict is None:
|
|
33
|
+
xtick_chr_dict = get_number_to_chr()
|
|
34
|
+
|
|
35
|
+
scatter_kwargs = _extract_kwargs("scatter", dict(), locals())
|
|
36
|
+
|
|
37
|
+
region_marker_shapes = ['o', '^','s','D','*','P','X','h','8']
|
|
38
|
+
region_ld_colors_m = ["grey","#E51819","green","#F07818","#AD5691","yellow","purple"]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
## filter data #############################
|
|
42
|
+
pipcs = _filter_region(pipcs, region)
|
|
43
|
+
if onlycs ==True:
|
|
44
|
+
pipcs = pipcs.loc[pipcs[cs]>0,:]
|
|
45
|
+
|
|
46
|
+
pipcs[cs] = pipcs[cs].astype("string")
|
|
47
|
+
|
|
48
|
+
## figure and ax #############################
|
|
49
|
+
if figax is not None:
|
|
50
|
+
ax=figax[1]
|
|
51
|
+
fig=figax[0]
|
|
52
|
+
else:
|
|
53
|
+
fig, ax = plt.subplots()
|
|
54
|
+
|
|
55
|
+
# assign i
|
|
56
|
+
pipcs,chrom_df=_quick_assign_i_with_rank(pipcs, chrpad=0.00,
|
|
57
|
+
use_rank=False,
|
|
58
|
+
chrom="CHR",pos="POS",
|
|
59
|
+
drop_chr_start=False,
|
|
60
|
+
_posdiccul=_posdiccul)
|
|
61
|
+
pipcs = pipcs.sort_values(by=cs,ascending=True)
|
|
62
|
+
|
|
63
|
+
## plot ##########################################
|
|
64
|
+
scatter_kwargs["markers"]= {m:region_marker_shapes[i] for i,m in enumerate(pipcs[cs].unique())}
|
|
65
|
+
palette = sns.color_palette(region_ld_colors_m,n_colors=pipcs[cs].nunique())
|
|
66
|
+
edgecolor="none"
|
|
67
|
+
|
|
68
|
+
plot = sns.scatterplot(data=pipcs,
|
|
69
|
+
x="i",
|
|
70
|
+
y=pip,
|
|
71
|
+
hue=cs,
|
|
72
|
+
edgecolor=edgecolor,
|
|
73
|
+
palette=palette,
|
|
74
|
+
style=cs,
|
|
75
|
+
s=marker_size[1],
|
|
76
|
+
ax=ax,
|
|
77
|
+
**scatter_kwargs)
|
|
78
|
+
|
|
79
|
+
# process legend
|
|
80
|
+
handles, labels = ax.get_legend_handles_labels()
|
|
81
|
+
new_labels = []
|
|
82
|
+
new_handles = []
|
|
83
|
+
ncol = len(labels)
|
|
84
|
+
|
|
85
|
+
for i,label in enumerate(labels):
|
|
86
|
+
if label in [str(j) for j in range(1,10)]:
|
|
87
|
+
new_labels.append(labels[i])
|
|
88
|
+
new_handles.append(handles[i])
|
|
89
|
+
|
|
90
|
+
ax.legend(labels =new_labels,
|
|
91
|
+
handles=new_handles,
|
|
92
|
+
loc="upper right",
|
|
93
|
+
bbox_to_anchor=(0.995, 0.995),
|
|
94
|
+
ncol=1,
|
|
95
|
+
scatterpoints=2,
|
|
96
|
+
title=legend_title,
|
|
97
|
+
frameon=True)
|
|
98
|
+
|
|
99
|
+
return fig, log
|
gwaslab/viz_plot_mqqplot.py
CHANGED
|
@@ -51,6 +51,7 @@ from gwaslab.bd_common_data import get_chr_to_number
|
|
|
51
51
|
from gwaslab.bd_common_data import get_number_to_chr
|
|
52
52
|
from gwaslab.bd_common_data import get_recombination_rate
|
|
53
53
|
from gwaslab.bd_common_data import get_gtf
|
|
54
|
+
from gwaslab.util_in_filter_value import _filter_region
|
|
54
55
|
from gwaslab.g_version import _get_version
|
|
55
56
|
from matplotlib.colors import ListedColormap
|
|
56
57
|
from matplotlib.colors import LinearSegmentedColormap
|
|
@@ -475,18 +476,20 @@ def mqqplot(insumstats,
|
|
|
475
476
|
sumstats[chrom] = _quick_fix_chr(sumstats[chrom], chr_dict=chr_dict)
|
|
476
477
|
|
|
477
478
|
## r
|
|
479
|
+
|
|
478
480
|
if region is not None:
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
481
|
+
sumstats = _filter_region(sumstats, region, log=log, verbose=verbose)
|
|
482
|
+
# region_chr = region[0]
|
|
483
|
+
# region_start = region[1]
|
|
484
|
+
# region_end = region[2]
|
|
485
|
+
#
|
|
486
|
+
# log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]),verbose=verbose)
|
|
487
|
+
#
|
|
488
|
+
# in_region_snp = (sumstats[chrom]==region_chr) & (sumstats[pos]<region_end) & (sumstats[pos]>region_start)
|
|
489
|
+
#
|
|
490
|
+
# log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)),verbose=verbose)
|
|
491
|
+
# sumstats = sumstats.loc[in_region_snp,:]
|
|
492
|
+
#
|
|
490
493
|
if len(sumstats)==0:
|
|
491
494
|
log.warning("No valid data! Please check the input.")
|
|
492
495
|
return None
|
|
@@ -38,9 +38,10 @@ from gwaslab.io_to_pickle import load_data_from_pickle
|
|
|
38
38
|
from gwaslab.g_Sumstats import Sumstats
|
|
39
39
|
from gwaslab.viz_aux_save_figure import save_figure
|
|
40
40
|
from gwaslab.viz_plot_mqqplot import mqqplot
|
|
41
|
+
from gwaslab.viz_plot_credible_sets import _plot_cs
|
|
41
42
|
import matplotlib.patches as patches
|
|
42
43
|
|
|
43
|
-
def plot_stacked_mqq(objects,
|
|
44
|
+
def plot_stacked_mqq( objects,
|
|
44
45
|
vcfs=None,
|
|
45
46
|
mode="r",
|
|
46
47
|
mqqratio=3,
|
|
@@ -62,10 +63,12 @@ def plot_stacked_mqq(objects,
|
|
|
62
63
|
region_ld_legends = None,
|
|
63
64
|
fontsize=9,
|
|
64
65
|
font_family="Arial",
|
|
66
|
+
common_ylabel=True,
|
|
65
67
|
build="99",
|
|
66
68
|
save=None,
|
|
67
69
|
save_args=None,
|
|
68
70
|
verbose=True,
|
|
71
|
+
pm=None,
|
|
69
72
|
log=Log(),
|
|
70
73
|
**mqq_args
|
|
71
74
|
):
|
|
@@ -74,10 +77,28 @@ def plot_stacked_mqq(objects,
|
|
|
74
77
|
# load sumstats
|
|
75
78
|
|
|
76
79
|
##########################################################################################################################################
|
|
80
|
+
if pm is None:
|
|
81
|
+
pm=[]
|
|
82
|
+
|
|
77
83
|
sumstats_list = []
|
|
78
84
|
for each_object in objects:
|
|
79
|
-
|
|
85
|
+
if type(each_object) is Sumstats:
|
|
86
|
+
if "P" in each_object.data.columns or "MLOG10P" in each_object.data.columns:
|
|
87
|
+
sumstats_list.append(each_object.data)
|
|
88
|
+
pm.append("m")
|
|
89
|
+
else:
|
|
90
|
+
if "PIP" in each_object.columns:
|
|
91
|
+
sumstats_list.append(each_object)
|
|
92
|
+
pm.append("pip")
|
|
93
|
+
common_ylabel=False
|
|
94
|
+
|
|
95
|
+
if common_ylabel==True:
|
|
96
|
+
rr_ylabel=False
|
|
97
|
+
else:
|
|
98
|
+
rr_ylabel=True
|
|
80
99
|
|
|
100
|
+
log.write(" -Panel mode:{}...".format(pm),verbose=verbose)
|
|
101
|
+
|
|
81
102
|
if fig_args is None:
|
|
82
103
|
fig_args = {"dpi":200}
|
|
83
104
|
if save_args is None:
|
|
@@ -200,7 +221,7 @@ def plot_stacked_mqq(objects,
|
|
|
200
221
|
region_lead_grid=False,
|
|
201
222
|
region_ld_legend=region_ld_legend,
|
|
202
223
|
gtf_path="default",
|
|
203
|
-
rr_ylabel=
|
|
224
|
+
rr_ylabel=rr_ylabel,
|
|
204
225
|
figax=figax,
|
|
205
226
|
_get_region_lead=True,
|
|
206
227
|
_if_quick_qc=False,
|
|
@@ -213,33 +234,42 @@ def plot_stacked_mqq(objects,
|
|
|
213
234
|
lead_variants_is[index] = lead_snp_is
|
|
214
235
|
lead_variants_is_color[index] = lead_snp_is_color
|
|
215
236
|
else:
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
237
|
+
if pm[index]=="m":
|
|
238
|
+
# plot only the scatter plot
|
|
239
|
+
fig,log,lead_snp_is,lead_snp_is_color = mqqplot(sumstats,
|
|
240
|
+
chrom="CHR",
|
|
241
|
+
pos="POS",
|
|
242
|
+
p="P",
|
|
243
|
+
region=region,
|
|
244
|
+
mlog10p="MLOG10P",
|
|
245
|
+
snpid="SNPID",
|
|
246
|
+
vcf_path=vcfs[index],
|
|
247
|
+
region_lead_grid=False,
|
|
248
|
+
fontsize=fontsize,
|
|
249
|
+
font_family=font_family,
|
|
250
|
+
mode=mode,
|
|
251
|
+
rr_ylabel=rr_ylabel,
|
|
252
|
+
region_ld_legend=region_ld_legend,
|
|
253
|
+
gtf_path=None,
|
|
254
|
+
figax=figax,
|
|
255
|
+
_get_region_lead=True,
|
|
256
|
+
_if_quick_qc=False,
|
|
257
|
+
_posdiccul=_posdiccul,
|
|
258
|
+
build=build,
|
|
259
|
+
verbose=verbose,
|
|
260
|
+
log=log,
|
|
261
|
+
**mqq_args_for_each_plot[index]
|
|
262
|
+
)
|
|
263
|
+
lead_variants_is[index] = lead_snp_is
|
|
264
|
+
lead_variants_is_color[index] = lead_snp_is_color
|
|
265
|
+
elif pm[index]=="pip":
|
|
266
|
+
fig,log =_plot_cs(sumstats,
|
|
267
|
+
region=region,
|
|
268
|
+
_posdiccul=_posdiccul,
|
|
269
|
+
figax=figax,
|
|
270
|
+
log=log,
|
|
271
|
+
verbose=verbose,
|
|
272
|
+
**mqq_args_for_each_plot[index])
|
|
243
273
|
if len(region_chromatin_files)>0 and mode=="r":
|
|
244
274
|
xlim_i = axes[-1].get_xlim()
|
|
245
275
|
fig = _plot_chromatin_state( region_chromatin_files = region_chromatin_files,
|
|
@@ -295,9 +325,10 @@ def plot_stacked_mqq(objects,
|
|
|
295
325
|
_draw_grid_line_for_lead_variants(mode, lead_variants_is,lead_variants_is_color, n_plot, axes, region_lead_grid_line,region_chromatin_files,region_lead_grids)
|
|
296
326
|
|
|
297
327
|
##########################################################################################################################################
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
328
|
+
if common_ylabel==True:
|
|
329
|
+
_drop_old_y_labels(axes, n_plot)
|
|
330
|
+
|
|
331
|
+
_add_new_y_label(mode, fig, gene_track_height,n_plot,subplot_height ,fontsize,font_family)
|
|
301
332
|
|
|
302
333
|
##########################################################################################################################################
|
|
303
334
|
save_figure(fig = fig, save = save, keyword= "stacked_" + mode, save_args=save_args, log = log, verbose=verbose)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: gwaslab
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.5
|
|
4
4
|
Summary: A collection of handy tools for GWAS SumStats
|
|
5
5
|
Author-email: Yunye <yunye@gwaslab.com>
|
|
6
6
|
Project-URL: Homepage, https://cloufield.github.io/gwaslab/
|
|
@@ -14,7 +14,7 @@ License-File: LICENSE
|
|
|
14
14
|
License-File: LICENSE_before_v3.4.39
|
|
15
15
|
Requires-Dist: pandas!=1.5,>=1.3
|
|
16
16
|
Requires-Dist: numpy<2,>=1.21.2
|
|
17
|
-
Requires-Dist: matplotlib
|
|
17
|
+
Requires-Dist: matplotlib<3.9,>=3.8
|
|
18
18
|
Requires-Dist: seaborn>=0.12
|
|
19
19
|
Requires-Dist: scipy>=1.12
|
|
20
20
|
Requires-Dist: pySAM==0.22.1
|
|
@@ -51,7 +51,7 @@ Warning: Known issues of GWASLab are summarized in [https://cloufield.github.io/
|
|
|
51
51
|
### install via pip
|
|
52
52
|
|
|
53
53
|
```
|
|
54
|
-
pip install gwaslab==3.5.
|
|
54
|
+
pip install gwaslab==3.5.4
|
|
55
55
|
```
|
|
56
56
|
|
|
57
57
|
```python
|