gwaslab 3.4.46__py3-none-any.whl → 3.4.47__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/g_version.py +7 -7
- gwaslab/io_to_formats.py +8 -3
- gwaslab/util_ex_calculate_ldmatrix.py +20 -7
- gwaslab/util_ex_calculate_prs.py +13 -7
- gwaslab/util_ex_process_ref.py +22 -11
- gwaslab/viz_aux_chromatin.py +4 -3
- gwaslab/viz_plot_mqqplot.py +80 -42
- gwaslab/viz_plot_regional2.py +792 -0
- gwaslab/viz_plot_stackedregional.py +62 -43
- {gwaslab-3.4.46.dist-info → gwaslab-3.4.47.dist-info}/METADATA +1 -1
- {gwaslab-3.4.46.dist-info → gwaslab-3.4.47.dist-info}/RECORD +15 -14
- {gwaslab-3.4.46.dist-info → gwaslab-3.4.47.dist-info}/WHEEL +1 -1
- {gwaslab-3.4.46.dist-info → gwaslab-3.4.47.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.46.dist-info → gwaslab-3.4.47.dist-info}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.4.46.dist-info → gwaslab-3.4.47.dist-info}/top_level.txt +0 -0
gwaslab/g_version.py
CHANGED
|
@@ -15,16 +15,16 @@ def _get_version():
|
|
|
15
15
|
def gwaslab_info():
|
|
16
16
|
# version meta information
|
|
17
17
|
dic={
|
|
18
|
-
"version":"3.4.
|
|
19
|
-
"release_date":"
|
|
18
|
+
"version":"3.4.47",
|
|
19
|
+
"release_date":"20240703"
|
|
20
20
|
}
|
|
21
21
|
return dic
|
|
22
22
|
|
|
23
|
-
def _checking_plink_version(
|
|
24
|
-
if
|
|
25
|
-
which_plink_script = "
|
|
26
|
-
elif
|
|
27
|
-
which_plink_script = "
|
|
23
|
+
def _checking_plink_version(plink=None,plink2=None,log=Log(), verbose=True):
|
|
24
|
+
if plink is not None:
|
|
25
|
+
which_plink_script = "{} --version".format(plink)
|
|
26
|
+
elif plink2 is not None:
|
|
27
|
+
which_plink_script = "{} --version".format(plink2)
|
|
28
28
|
output = subprocess.check_output(which_plink_script, stderr=subprocess.STDOUT, shell=True,text=True)
|
|
29
29
|
log.write(" -PLINK version: {}".format(output.strip()))
|
|
30
30
|
return log
|
gwaslab/io_to_formats.py
CHANGED
|
@@ -212,8 +212,10 @@ def tofmt(sumstats,
|
|
|
212
212
|
log.write(" -Start outputting sumstats in "+fmt+" format...")
|
|
213
213
|
|
|
214
214
|
if "CHR" in sumstats.columns:
|
|
215
|
+
# output X,Y,MT instead of 23,24,25
|
|
215
216
|
if xymt_number is False and pd.api.types.is_integer_dtype(sumstats["CHR"]):
|
|
216
217
|
sumstats["CHR"]= sumstats["CHR"].map(get_number_to_chr(xymt=xymt,prefix=chr_prefix))
|
|
218
|
+
# add prefix to CHR
|
|
217
219
|
elif chr_prefix is not None:
|
|
218
220
|
sumstats["CHR"]= chr_prefix + sumstats["CHR"].astype("string")
|
|
219
221
|
|
|
@@ -437,17 +439,20 @@ def _configure_output_cols_and_args(sumstats, rename_dictionary, cols, no_status
|
|
|
437
439
|
ouput_cols.append(i)
|
|
438
440
|
|
|
439
441
|
# + additional cols and remove duplicated
|
|
440
|
-
|
|
442
|
+
ouput_cols_final = []
|
|
443
|
+
for i in ouput_cols + cols:
|
|
444
|
+
if i not in ouput_cols_final:
|
|
445
|
+
ouput_cols_final.append(i)
|
|
441
446
|
|
|
442
447
|
# remove STATUS
|
|
443
448
|
try:
|
|
444
449
|
if no_status == True:
|
|
445
|
-
|
|
450
|
+
ouput_cols_final.remove("STATUS")
|
|
446
451
|
except:
|
|
447
452
|
pass
|
|
448
453
|
|
|
449
454
|
#filter and rename to target fromat headers
|
|
450
|
-
sumstats = sumstats[
|
|
455
|
+
sumstats = sumstats[ouput_cols_final]
|
|
451
456
|
sumstats = sumstats.rename(columns=rename_dictionary)
|
|
452
457
|
|
|
453
458
|
# configure target format args and reorder columns
|
|
@@ -17,6 +17,8 @@ def tofinemapping(sumstats,
|
|
|
17
17
|
vcf=None,
|
|
18
18
|
loci=None,
|
|
19
19
|
out="./",
|
|
20
|
+
plink="plink",
|
|
21
|
+
plink2="plink2",
|
|
20
22
|
windowsizekb=1000,
|
|
21
23
|
n_cores=1,
|
|
22
24
|
mode="r",
|
|
@@ -56,6 +58,9 @@ def tofinemapping(sumstats,
|
|
|
56
58
|
else:
|
|
57
59
|
sig_df = sumstats.loc[sumstats["SNPID"].isin(loci),:]
|
|
58
60
|
|
|
61
|
+
log.write(" -plink1.9 path: {}".format(plink),verbose=verbose)
|
|
62
|
+
log.write(" -plink2 path: {}".format(plink2),verbose=verbose)
|
|
63
|
+
|
|
59
64
|
# Drop duplicate!!!!
|
|
60
65
|
log.write(" -Dropping duplicated SNPIDs...",verbose=verbose)
|
|
61
66
|
sumstats = sumstats.drop_duplicates(subset=["SNPID"]).copy()
|
|
@@ -68,11 +73,13 @@ def tofinemapping(sumstats,
|
|
|
68
73
|
if exclude_hla==True:
|
|
69
74
|
sig_df = _exclude_hla(sig_df, log=log, verbose=verbose)
|
|
70
75
|
|
|
76
|
+
sig_df = sig_df.reset_index()
|
|
77
|
+
|
|
71
78
|
## for each lead variant
|
|
72
79
|
for index, row in sig_df.iterrows():
|
|
73
80
|
# extract snplist in each locus
|
|
74
81
|
gc.collect()
|
|
75
|
-
|
|
82
|
+
log.write(" -Locus #{}---------------------------------------------------------------".format(index+1))
|
|
76
83
|
log.write(" -Processing locus with lead variant {} at CHR {} POS {} ...".format(row["SNPID"],row["CHR"],row["POS"]))
|
|
77
84
|
locus_sumstats = _extract_variants_in_locus(sumstats, windowsizekb, locus = (row["CHR"],row["POS"]))
|
|
78
85
|
|
|
@@ -84,7 +91,10 @@ def tofinemapping(sumstats,
|
|
|
84
91
|
n_cores=n_cores,
|
|
85
92
|
log=log,
|
|
86
93
|
load_bim=True,
|
|
87
|
-
overwrite=overwrite
|
|
94
|
+
overwrite=overwrite,
|
|
95
|
+
plink=plink,
|
|
96
|
+
plink2=plink2,
|
|
97
|
+
**kwargs)
|
|
88
98
|
|
|
89
99
|
## check available snps with reference file
|
|
90
100
|
matched_sumstats = _align_sumstats_with_bim(row=row,
|
|
@@ -114,7 +124,10 @@ def tofinemapping(sumstats,
|
|
|
114
124
|
windowsizekb=windowsizekb,
|
|
115
125
|
out=out,
|
|
116
126
|
plink_log=plink_log,
|
|
117
|
-
log=log,
|
|
127
|
+
log=log,
|
|
128
|
+
filetype=filetype,
|
|
129
|
+
plink=plink,
|
|
130
|
+
plink2=plink2,
|
|
118
131
|
verbose=verbose)
|
|
119
132
|
|
|
120
133
|
|
|
@@ -143,12 +156,12 @@ def tofinemapping(sumstats,
|
|
|
143
156
|
|
|
144
157
|
|
|
145
158
|
|
|
146
|
-
def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, windowsizekb,out,plink_log,log,memory,mode,filetype,verbose=True):
|
|
159
|
+
def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, windowsizekb,out,plink_log,log,memory,mode,filetype,plink,plink2,verbose=True):
|
|
147
160
|
'''
|
|
148
161
|
Calculate LD r matrix by calling PLINK; return file name and log
|
|
149
162
|
'''
|
|
150
163
|
log.write(" -Start to calculate LD r matrix...",verbose=verbose)
|
|
151
|
-
log = _checking_plink_version(
|
|
164
|
+
log = _checking_plink_version(plink=plink, log=log)
|
|
152
165
|
if "@" in bfile_prefix:
|
|
153
166
|
bfile_to_use = bfile_prefix.replace("@",str(row["CHR"]))
|
|
154
167
|
else:
|
|
@@ -165,7 +178,7 @@ def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, w
|
|
|
165
178
|
raise ValueError("Please use bfile instead of pfile for PLINK1.")
|
|
166
179
|
|
|
167
180
|
script_vcf_to_bfile = """
|
|
168
|
-
|
|
181
|
+
{} \
|
|
169
182
|
--bfile {} \
|
|
170
183
|
--keep-allele-order \
|
|
171
184
|
--extract {} \
|
|
@@ -175,7 +188,7 @@ def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, w
|
|
|
175
188
|
--threads {} {}\
|
|
176
189
|
--write-snplist \
|
|
177
190
|
--out {}
|
|
178
|
-
""".format(bfile_to_use, snplist_path , row["CHR"], mode, n_cores, memory_flag if memory is not None else "", output_prefix)
|
|
191
|
+
""".format(plink, bfile_to_use, snplist_path , row["CHR"], mode, n_cores, memory_flag if memory is not None else "", output_prefix)
|
|
179
192
|
|
|
180
193
|
try:
|
|
181
194
|
output = subprocess.check_output(script_vcf_to_bfile, stderr=subprocess.STDOUT, shell=True,text=True)
|
gwaslab/util_ex_calculate_prs.py
CHANGED
|
@@ -18,6 +18,8 @@ def _calculate_prs(sumstats,
|
|
|
18
18
|
memory=None,
|
|
19
19
|
overwrite=False,
|
|
20
20
|
mode=None,delete=True,
|
|
21
|
+
plink="plink",
|
|
22
|
+
plink2="plink2",
|
|
21
23
|
log=Log(),**kwargs):
|
|
22
24
|
|
|
23
25
|
#matching_alleles
|
|
@@ -30,14 +32,18 @@ def _calculate_prs(sumstats,
|
|
|
30
32
|
chrlist.sort()
|
|
31
33
|
plink_log = ""
|
|
32
34
|
#process reference fileWWW
|
|
33
|
-
bfile_prefix, plink_log, ref_bim, filetype = _process_plink_input_files(
|
|
35
|
+
bfile_prefix, plink_log, ref_bim, filetype = _process_plink_input_files(
|
|
36
|
+
chrlist=chrlist,
|
|
34
37
|
bfile=bfile,
|
|
35
38
|
vcf=vcf,
|
|
36
39
|
plink_log=plink_log,
|
|
37
40
|
n_cores=n_cores,
|
|
38
41
|
log=log,
|
|
39
42
|
load_bim=False,
|
|
40
|
-
overwrite=overwrite
|
|
43
|
+
overwrite=overwrite,
|
|
44
|
+
plink=plink,
|
|
45
|
+
plink2=plink2,
|
|
46
|
+
**kwargs)
|
|
41
47
|
score_file_path_list =[]
|
|
42
48
|
for index, chrom in enumerate(chrlist):
|
|
43
49
|
chr_sumstats = sumstats.loc[sumstats["CHR"]==chrom,:].copy()
|
|
@@ -61,7 +67,7 @@ def _calculate_prs(sumstats,
|
|
|
61
67
|
plink_log=plink_log,
|
|
62
68
|
log=log,
|
|
63
69
|
memory=memory,
|
|
64
|
-
mode=mode,filetype=filetype)
|
|
70
|
+
mode=mode,filetype=filetype,plink2=plink2)
|
|
65
71
|
score_file_path_list.append(score_file_path)
|
|
66
72
|
if delete == True:
|
|
67
73
|
os.remove(model_path)
|
|
@@ -71,10 +77,10 @@ def _calculate_prs(sumstats,
|
|
|
71
77
|
|
|
72
78
|
|
|
73
79
|
|
|
74
|
-
def _run_calculate_prs(study, chrom , model_path, bfile_prefix, n_cores, out, plink_log, log, memory,filetype, mode=None):
|
|
80
|
+
def _run_calculate_prs(study, chrom , model_path, bfile_prefix, n_cores, out, plink_log, log, memory,filetype, plink2,mode=None):
|
|
75
81
|
|
|
76
82
|
log.write(" -Start to calculate PRS for Chr {}...".format(chrom))
|
|
77
|
-
_checking_plink_version(
|
|
83
|
+
_checking_plink_version(plink2=plink2, log=log)
|
|
78
84
|
|
|
79
85
|
if "@" in bfile_prefix:
|
|
80
86
|
bpfile_to_use = bfile_prefix.replace("@",str(chrom))
|
|
@@ -92,13 +98,13 @@ def _run_calculate_prs(study, chrom , model_path, bfile_prefix, n_cores, out, pl
|
|
|
92
98
|
memory_flag = "--memory {}".format(memory)
|
|
93
99
|
|
|
94
100
|
script_vcf_to_bfile = """
|
|
95
|
-
|
|
101
|
+
{} \
|
|
96
102
|
{} \
|
|
97
103
|
--score {} 1 2 3 header {} cols=+scoresums,+denom ignore-dup-ids \
|
|
98
104
|
--chr {} \
|
|
99
105
|
--threads {} {}\
|
|
100
106
|
--out {}
|
|
101
|
-
""".format(file_flag, model_path , mode if mode is not None else "", chrom, n_cores, memory_flag if memory is not None else "", output_prefix)
|
|
107
|
+
""".format(plink2, file_flag, model_path , mode if mode is not None else "", chrom, n_cores, memory_flag if memory is not None else "", output_prefix)
|
|
102
108
|
|
|
103
109
|
try:
|
|
104
110
|
output = subprocess.check_output(script_vcf_to_bfile, stderr=subprocess.STDOUT, shell=True,text=True)
|
gwaslab/util_ex_process_ref.py
CHANGED
|
@@ -20,7 +20,9 @@ def _process_plink_input_files(chrlist,
|
|
|
20
20
|
bgen_mode="ref-first",
|
|
21
21
|
convert="bfile",
|
|
22
22
|
memory=None,
|
|
23
|
-
load_bim=False
|
|
23
|
+
load_bim=False,
|
|
24
|
+
plink="plink",
|
|
25
|
+
plink2="plink2"):
|
|
24
26
|
"""
|
|
25
27
|
Process input files (bfile,pfile,vcf,bgen) to either PLINK1 bed/bim/fam or PLINK2 pgen/psam/pvar.
|
|
26
28
|
|
|
@@ -66,7 +68,9 @@ def _process_plink_input_files(chrlist,
|
|
|
66
68
|
convert=convert,
|
|
67
69
|
memory=memory,
|
|
68
70
|
overwrite=overwrite,
|
|
69
|
-
load_bim=load_bim
|
|
71
|
+
load_bim=load_bim,
|
|
72
|
+
plink=plink,
|
|
73
|
+
plink2=plink2)
|
|
70
74
|
filetype = convert
|
|
71
75
|
elif filetype == "bgen":
|
|
72
76
|
ref_file_prefix, plink_log, ref_bims = _process_bgen(ref_file_prefix=ref_file_prefix,
|
|
@@ -81,7 +85,9 @@ def _process_plink_input_files(chrlist,
|
|
|
81
85
|
convert=convert,
|
|
82
86
|
memory=memory,
|
|
83
87
|
overwrite=overwrite,
|
|
84
|
-
load_bim=load_bim
|
|
88
|
+
load_bim=load_bim,
|
|
89
|
+
plink=plink,
|
|
90
|
+
plink2=plink2)
|
|
85
91
|
filetype = convert
|
|
86
92
|
return ref_file_prefix, plink_log, ref_bims, filetype
|
|
87
93
|
|
|
@@ -199,11 +205,13 @@ def _process_vcf(ref_file_prefix,
|
|
|
199
205
|
convert="bfile",
|
|
200
206
|
memory=None,
|
|
201
207
|
overwrite=False,
|
|
202
|
-
load_bim=False
|
|
208
|
+
load_bim=False,
|
|
209
|
+
plink="plink",
|
|
210
|
+
plink2="plink2"):
|
|
203
211
|
log.write(" -Processing VCF : {}...".format(ref_file_prefix))
|
|
204
212
|
|
|
205
213
|
#check plink version
|
|
206
|
-
log = _checking_plink_version(
|
|
214
|
+
log = _checking_plink_version(plink2=plink2,log=log)
|
|
207
215
|
|
|
208
216
|
# file path prefix to return
|
|
209
217
|
if is_wild_card==True:
|
|
@@ -243,14 +251,15 @@ def _process_vcf(ref_file_prefix,
|
|
|
243
251
|
#if not existing or overwrite is True
|
|
244
252
|
if (not is_file_exist) or overwrite:
|
|
245
253
|
script_vcf_to_bfile = """
|
|
246
|
-
|
|
254
|
+
{} \
|
|
247
255
|
--vcf {} \
|
|
248
256
|
--chr {} \
|
|
249
257
|
{} \
|
|
250
258
|
--rm-dup force-first \
|
|
251
259
|
--threads {}{}\
|
|
252
260
|
--out {}
|
|
253
|
-
""".format(
|
|
261
|
+
""".format(plink2,
|
|
262
|
+
vcf_to_load,
|
|
254
263
|
i,
|
|
255
264
|
make_flag,
|
|
256
265
|
n_cores, memory_flag,
|
|
@@ -288,11 +297,13 @@ def _process_bgen(ref_file_prefix,
|
|
|
288
297
|
convert="bfile",
|
|
289
298
|
memory=None,
|
|
290
299
|
overwrite=False,
|
|
291
|
-
load_bim=False
|
|
300
|
+
load_bim=False,
|
|
301
|
+
plink="plink",
|
|
302
|
+
plink2="plink2"):
|
|
292
303
|
log.write(" -Processing BGEN files : {}...".format(ref_file_prefix))
|
|
293
304
|
|
|
294
305
|
#check plink version
|
|
295
|
-
log = _checking_plink_version(
|
|
306
|
+
log = _checking_plink_version(log=log,plink2=plink2)
|
|
296
307
|
|
|
297
308
|
# file path prefix to return
|
|
298
309
|
if is_wild_card==True:
|
|
@@ -338,14 +349,14 @@ def _process_bgen(ref_file_prefix,
|
|
|
338
349
|
#if not existing or overwrite is True
|
|
339
350
|
if (not is_file_exist) or overwrite:
|
|
340
351
|
script_vcf_to_bfile = """
|
|
341
|
-
|
|
352
|
+
{} \
|
|
342
353
|
--bgen {} {} {}\
|
|
343
354
|
--chr {} \
|
|
344
355
|
{} \
|
|
345
356
|
--rm-dup force-first \
|
|
346
357
|
--threads {}{}\
|
|
347
358
|
--out {}
|
|
348
|
-
""".format(bgen_to_load, bgen_mode, sample_flag,
|
|
359
|
+
""".format(plink2,bgen_to_load, bgen_mode, sample_flag,
|
|
349
360
|
i,
|
|
350
361
|
make_flag,
|
|
351
362
|
n_cores, memory_flag,
|
gwaslab/viz_aux_chromatin.py
CHANGED
|
@@ -63,6 +63,8 @@ def _plot_chromatin_state(region_chromatin_files,
|
|
|
63
63
|
fig,
|
|
64
64
|
ax,
|
|
65
65
|
xlim_i,
|
|
66
|
+
fontsize = 12,
|
|
67
|
+
font_family = "Arial",
|
|
66
68
|
log=Log(),
|
|
67
69
|
verbose=True):
|
|
68
70
|
'''
|
|
@@ -101,11 +103,10 @@ def _plot_chromatin_state(region_chromatin_files,
|
|
|
101
103
|
|
|
102
104
|
## add stripe label
|
|
103
105
|
if len(region_chromatin_labels) == len(region_chromatin_files):
|
|
104
|
-
ax.set_yticks([i*0.1 for i in range(len(region_chromatin_labels))],
|
|
105
|
-
region_chromatin_labels)
|
|
106
|
+
ax.set_yticks([i*0.1 for i in range(len(region_chromatin_labels))], region_chromatin_labels, fontsize=fontsize, family=font_family)
|
|
106
107
|
else:
|
|
107
108
|
ax.set_yticks(ticks=[])
|
|
108
109
|
|
|
109
|
-
ax.set_xticks(ticks=[])
|
|
110
|
+
#ax.set_xticks(ticks=[])
|
|
110
111
|
ax.invert_yaxis()
|
|
111
112
|
return fig
|
gwaslab/viz_plot_mqqplot.py
CHANGED
|
@@ -23,8 +23,8 @@ from gwaslab.viz_aux_reposition_text import adjust_text_position
|
|
|
23
23
|
from gwaslab.viz_aux_annotate_plot import annotate_single
|
|
24
24
|
from gwaslab.viz_plot_qqplot import _plot_qq
|
|
25
25
|
from gwaslab.hm_harmonize_sumstats import auto_check_vcf_chr_dict
|
|
26
|
-
from gwaslab.
|
|
27
|
-
from gwaslab.
|
|
26
|
+
from gwaslab.viz_plot_regional2 import _plot_regional
|
|
27
|
+
from gwaslab.viz_plot_regional2 import process_vcf
|
|
28
28
|
from gwaslab.viz_aux_quickfix import _get_largenumber
|
|
29
29
|
from gwaslab.viz_aux_quickfix import _quick_fix_p_value
|
|
30
30
|
from gwaslab.viz_aux_quickfix import _quick_fix_pos
|
|
@@ -51,6 +51,9 @@ from gwaslab.bd_common_data import get_number_to_chr
|
|
|
51
51
|
from gwaslab.bd_common_data import get_recombination_rate
|
|
52
52
|
from gwaslab.bd_common_data import get_gtf
|
|
53
53
|
from gwaslab.g_version import _get_version
|
|
54
|
+
from matplotlib.colors import ListedColormap
|
|
55
|
+
from matplotlib.colors import LinearSegmentedColormap
|
|
56
|
+
from matplotlib.colors import to_hex
|
|
54
57
|
# 20230202 ######################################################################################################
|
|
55
58
|
|
|
56
59
|
def mqqplot(insumstats,
|
|
@@ -98,13 +101,13 @@ def mqqplot(insumstats,
|
|
|
98
101
|
region_ld_threshold = None,
|
|
99
102
|
region_ld_legend = True,
|
|
100
103
|
region_ld_colors = None,
|
|
101
|
-
|
|
102
|
-
region_ld_colors2 = None,
|
|
104
|
+
region_ld_colors_m = None,
|
|
103
105
|
region_recombination = True,
|
|
104
106
|
region_protein_coding = True,
|
|
105
107
|
region_flank_factor = 0.05,
|
|
106
108
|
region_anno_bbox_args = None,
|
|
107
|
-
|
|
109
|
+
region_marker_shapes=None,
|
|
110
|
+
cbar_title='LD $r^{2}$ with variant',
|
|
108
111
|
cbar_fontsize = None,
|
|
109
112
|
cbar_font_family = None,
|
|
110
113
|
track_n=4,
|
|
@@ -238,20 +241,39 @@ def mqqplot(insumstats,
|
|
|
238
241
|
anno_args=dict()
|
|
239
242
|
if colors is None:
|
|
240
243
|
colors=["#597FBD","#74BAD3"]
|
|
241
|
-
|
|
242
|
-
|
|
244
|
+
|
|
245
|
+
if region is not None:
|
|
246
|
+
if marker_size == (5,20):
|
|
247
|
+
marker_size=(45,65)
|
|
248
|
+
|
|
249
|
+
# make region_ref a list of ref variants
|
|
250
|
+
if pd.api.types.is_list_like(region_ref):
|
|
251
|
+
if len(region_ref) == 0 :
|
|
252
|
+
region_ref.append(None)
|
|
253
|
+
if region_ref_second is not None:
|
|
254
|
+
region_ref.append(region_ref_second)
|
|
255
|
+
else:
|
|
256
|
+
region_ref = [region_ref]
|
|
257
|
+
if region_ref_second is not None:
|
|
258
|
+
region_ref.append(region_ref_second)
|
|
259
|
+
region_ref_index_dic = {value: index for index,value in enumerate(region_ref)}
|
|
260
|
+
|
|
261
|
+
if region_marker_shapes is None:
|
|
262
|
+
# 9 shapes
|
|
263
|
+
region_marker_shapes = ['o', 's','^','D','*','P','X','h','8']
|
|
243
264
|
if region_grid_line is None:
|
|
244
265
|
region_grid_line = {"linewidth": 2,"linestyle":"--"}
|
|
245
266
|
if region_lead_grid_line is None:
|
|
246
267
|
region_lead_grid_line = {"alpha":0.5,"linewidth" : 2,"linestyle":"--","color":"#FF0000"}
|
|
247
268
|
if region_ld_threshold is None:
|
|
248
269
|
region_ld_threshold = [0.2,0.4,0.6,0.8]
|
|
270
|
+
|
|
249
271
|
if region_ld_colors is None:
|
|
250
272
|
region_ld_colors = ["#E4E4E4","#020080","#86CEF9","#24FF02","#FDA400","#FF0000","#FF0000"]
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
273
|
+
|
|
274
|
+
# 7 colors
|
|
275
|
+
region_ld_colors_m = ["#E51819","#367EB7","green","#F07818","#AD5691","yellow","purple"]
|
|
276
|
+
|
|
255
277
|
if region_title_args is None:
|
|
256
278
|
region_title_args = {"size":10}
|
|
257
279
|
if cbar_fontsize is None:
|
|
@@ -353,6 +375,7 @@ def mqqplot(insumstats,
|
|
|
353
375
|
lines_to_plot = -np.log10(lines_to_plot)
|
|
354
376
|
|
|
355
377
|
vcf_chr_dict = auto_check_vcf_chr_dict(vcf_path, vcf_chr_dict, verbose, log)
|
|
378
|
+
|
|
356
379
|
|
|
357
380
|
# Plotting mode selection : layout ####################################################################
|
|
358
381
|
# ax1 : manhattanplot / brisbane plot
|
|
@@ -435,7 +458,7 @@ def mqqplot(insumstats,
|
|
|
435
458
|
region_chr = region[0]
|
|
436
459
|
region_start = region[1]
|
|
437
460
|
region_end = region[2]
|
|
438
|
-
|
|
461
|
+
|
|
439
462
|
log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]),verbose=verbose)
|
|
440
463
|
|
|
441
464
|
in_region_snp = (sumstats[chrom]==region_chr) & (sumstats[pos]<region_end) & (sumstats[pos]>region_start)
|
|
@@ -548,7 +571,6 @@ def mqqplot(insumstats,
|
|
|
548
571
|
vcf_path=vcf_path,
|
|
549
572
|
region=region,
|
|
550
573
|
region_ref=region_ref,
|
|
551
|
-
region_ref_second=region_ref_second,
|
|
552
574
|
log=log ,
|
|
553
575
|
pos=pos,
|
|
554
576
|
ea=ea,
|
|
@@ -589,17 +611,27 @@ def mqqplot(insumstats,
|
|
|
589
611
|
if vcf_path is not None:
|
|
590
612
|
legend=None
|
|
591
613
|
linewidth=1
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
614
|
+
if len(region_ref) == 1:
|
|
615
|
+
palette = {100+i:region_ld_colors[i] for i in range(len(region_ld_colors))}
|
|
616
|
+
else:
|
|
617
|
+
palette = {}
|
|
618
|
+
region_color_maps = []
|
|
619
|
+
for group_index, colorgroup in enumerate(region_ld_colors_m):
|
|
620
|
+
color_map_len = len(region_ld_threshold)+2 # default 6
|
|
621
|
+
rgba = LinearSegmentedColormap.from_list("custom", ["white",colorgroup], color_map_len)(range(1,color_map_len)) # skip white
|
|
622
|
+
output_hex_colors=[]
|
|
623
|
+
for i in range(len(rgba)):
|
|
624
|
+
output_hex_colors.append(to_hex(rgba[i]))
|
|
625
|
+
# 1 + 5 + 1
|
|
626
|
+
region_ld_colors_single = [region_ld_colors[0]] + output_hex_colors + [output_hex_colors[-1]]
|
|
627
|
+
region_color_maps.append(region_ld_colors_single)
|
|
628
|
+
# gradient colors
|
|
629
|
+
for i, hex_colors in enumerate(region_color_maps):
|
|
630
|
+
for j, hex_color in enumerate(hex_colors):
|
|
631
|
+
palette[(i+1)*100 + j ] = hex_color
|
|
632
|
+
|
|
598
633
|
edgecolor="none"
|
|
599
|
-
|
|
600
|
-
scatter_args["markers"]=['o', 's']
|
|
601
|
-
else:
|
|
602
|
-
scatter_args["markers"]=['o']
|
|
634
|
+
scatter_args["markers"]= region_marker_shapes[:len(region_ref)]
|
|
603
635
|
style="SHAPE"
|
|
604
636
|
|
|
605
637
|
|
|
@@ -649,6 +681,7 @@ def mqqplot(insumstats,
|
|
|
649
681
|
|
|
650
682
|
## if not highlight
|
|
651
683
|
else:
|
|
684
|
+
## density plot
|
|
652
685
|
if density_color == True:
|
|
653
686
|
hue = "DENSITY_hue"
|
|
654
687
|
s = "DENSITY"
|
|
@@ -676,6 +709,7 @@ def mqqplot(insumstats,
|
|
|
676
709
|
linewidth=linewidth,
|
|
677
710
|
zorder=2,ax=ax1,edgecolor=edgecolor,**scatter_args)
|
|
678
711
|
else:
|
|
712
|
+
# major / regional
|
|
679
713
|
s = "s"
|
|
680
714
|
hue = 'chr_hue'
|
|
681
715
|
hue_norm=None
|
|
@@ -720,7 +754,7 @@ def mqqplot(insumstats,
|
|
|
720
754
|
# if regional plot : pinpoint lead , add color bar ##################################################
|
|
721
755
|
if (region is not None) and ("r" in mode):
|
|
722
756
|
|
|
723
|
-
ax1, ax3, ax4, cbar,
|
|
757
|
+
ax1, ax3, ax4, cbar, lead_snp_is, lead_snp_is_color =_plot_regional(
|
|
724
758
|
sumstats=sumstats,
|
|
725
759
|
fig=fig,
|
|
726
760
|
ax1=ax1,
|
|
@@ -744,8 +778,8 @@ def mqqplot(insumstats,
|
|
|
744
778
|
rr_ylabel=rr_ylabel,
|
|
745
779
|
mode=mode,
|
|
746
780
|
region_step = region_step,
|
|
747
|
-
region_ref=region_ref,
|
|
748
|
-
|
|
781
|
+
region_ref = region_ref,
|
|
782
|
+
region_ref_index_dic = region_ref_index_dic,
|
|
749
783
|
region_grid = region_grid,
|
|
750
784
|
region_grid_line = region_grid_line,
|
|
751
785
|
region_lead_grid = region_lead_grid,
|
|
@@ -756,8 +790,8 @@ def mqqplot(insumstats,
|
|
|
756
790
|
region_ld_legend = region_ld_legend,
|
|
757
791
|
region_ld_threshold = region_ld_threshold,
|
|
758
792
|
region_ld_colors = region_ld_colors,
|
|
759
|
-
|
|
760
|
-
|
|
793
|
+
palette = palette,
|
|
794
|
+
region_marker_shapes = region_marker_shapes,
|
|
761
795
|
region_recombination = region_recombination,
|
|
762
796
|
region_protein_coding=region_protein_coding,
|
|
763
797
|
region_flank_factor =region_flank_factor,
|
|
@@ -771,8 +805,8 @@ def mqqplot(insumstats,
|
|
|
771
805
|
)
|
|
772
806
|
|
|
773
807
|
else:
|
|
774
|
-
|
|
775
|
-
|
|
808
|
+
lead_snp_is =[]
|
|
809
|
+
lead_snp_is_color = []
|
|
776
810
|
|
|
777
811
|
log.write("Finished creating MQQ plot successfully!",verbose=verbose)
|
|
778
812
|
|
|
@@ -884,8 +918,8 @@ def mqqplot(insumstats,
|
|
|
884
918
|
# regional plot cbar
|
|
885
919
|
if cbar is not None:
|
|
886
920
|
cbar = _process_cbar(cbar,
|
|
887
|
-
cbar_fontsize=
|
|
888
|
-
cbar_font_family=
|
|
921
|
+
cbar_fontsize=cbar_fontsize,
|
|
922
|
+
cbar_font_family=cbar_font_family,
|
|
889
923
|
cbar_title=cbar_title,
|
|
890
924
|
log=log,
|
|
891
925
|
verbose=verbose)
|
|
@@ -1021,7 +1055,7 @@ def mqqplot(insumstats,
|
|
|
1021
1055
|
garbage_collect.collect()
|
|
1022
1056
|
# Return matplotlib figure object #######################################################################################
|
|
1023
1057
|
if _get_region_lead==True:
|
|
1024
|
-
return fig, log,
|
|
1058
|
+
return fig, log, lead_snp_is, lead_snp_is_color
|
|
1025
1059
|
|
|
1026
1060
|
log.write("Finished creating plot successfully!",verbose=verbose)
|
|
1027
1061
|
return fig, log
|
|
@@ -1271,15 +1305,19 @@ def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_
|
|
|
1271
1305
|
|
|
1272
1306
|
def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title, log=Log(),verbose=True):
|
|
1273
1307
|
log.write(" -Processing color bar...",verbose=verbose)
|
|
1274
|
-
if type(cbar) == list:
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
else:
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1308
|
+
#if type(cbar) == list:
|
|
1309
|
+
# for cbar_single in cbar:
|
|
1310
|
+
# cbar_yticklabels = cbar_single.ax.get_yticklabels()
|
|
1311
|
+
# cbar_single.ax.set_yticklabels(cbar_yticklabels, fontsize=cbar_fontsize, family=cbar_font_family )
|
|
1312
|
+
# cbar_single.ax.set_title(cbar_title, fontsize=cbar_fontsize, family=cbar_font_family, loc="center",y=-0.2 )
|
|
1313
|
+
#else:
|
|
1314
|
+
|
|
1315
|
+
cbar_yticklabels = cbar.get_yticklabels()
|
|
1316
|
+
cbar.set_yticklabels(cbar_yticklabels, fontsize=cbar_fontsize, family=cbar_font_family )
|
|
1317
|
+
cbar_xticklabels = cbar.get_xticklabels()
|
|
1318
|
+
cbar.set_xticklabels(cbar_xticklabels, fontsize=cbar_fontsize, family=cbar_font_family )
|
|
1319
|
+
|
|
1320
|
+
cbar.set_title(cbar_title, fontsize=cbar_fontsize, family=cbar_font_family, loc="center", y=1.00 )
|
|
1283
1321
|
return cbar
|
|
1284
1322
|
|
|
1285
1323
|
def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log(),verbose=True):
|