gwaslab 3.5.4__py3-none-any.whl → 3.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/__init__.py +3 -1
- gwaslab/g_Sumstats.py +56 -9
- gwaslab/g_SumstatsPair.py +16 -12
- gwaslab/g_SumstatsSet.py +663 -0
- gwaslab/g_headers.py +131 -0
- gwaslab/g_meta.py +2 -1
- gwaslab/g_version.py +3 -3
- gwaslab/hm_harmonize_sumstats.py +91 -1
- gwaslab/io_preformat_input.py +29 -7
- gwaslab/io_read_pipcs.py +23 -0
- gwaslab/io_to_formats.py +45 -44
- gwaslab/qc_check_datatype.py +65 -42
- gwaslab/qc_fix_sumstats.py +1 -1
- gwaslab/util_ex_ldproxyfinder.py +162 -3
- gwaslab/util_ex_ldsc.py +9 -0
- gwaslab/util_ex_run_2samplemr.py +34 -0
- gwaslab/util_ex_run_clumping.py +4 -2
- gwaslab/util_in_fill_data.py +28 -3
- gwaslab/util_in_filter_value.py +66 -1
- gwaslab/util_in_merge.py +51 -0
- gwaslab/viz_aux_save_figure.py +2 -1
- gwaslab/viz_plot_credible_sets.py +99 -0
- gwaslab/viz_plot_effect.py +283 -0
- gwaslab/viz_plot_miamiplot2.py +1 -1
- gwaslab/viz_plot_mqqplot.py +31 -11
- gwaslab/viz_plot_regional2.py +133 -32
- gwaslab/viz_plot_stackedregional.py +64 -34
- {gwaslab-3.5.4.dist-info → gwaslab-3.5.6.dist-info}/METADATA +4 -4
- {gwaslab-3.5.4.dist-info → gwaslab-3.5.6.dist-info}/RECORD +33 -28
- {gwaslab-3.5.4.dist-info → gwaslab-3.5.6.dist-info}/WHEEL +1 -1
- gwaslab/vis_plot_credible sets.py +0 -0
- {gwaslab-3.5.4.dist-info → gwaslab-3.5.6.dist-info}/LICENSE +0 -0
- {gwaslab-3.5.4.dist-info → gwaslab-3.5.6.dist-info}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.5.4.dist-info → gwaslab-3.5.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
import matplotlib.ticker as ticker
|
|
4
|
+
import matplotlib.patches as patches
|
|
5
|
+
import seaborn as sns
|
|
6
|
+
import numpy as np
|
|
7
|
+
import scipy as sp
|
|
8
|
+
import copy
|
|
9
|
+
from math import ceil
|
|
10
|
+
from shutil import which
|
|
11
|
+
from pyensembl import EnsemblRelease
|
|
12
|
+
from allel import GenotypeArray
|
|
13
|
+
from allel import read_vcf
|
|
14
|
+
from allel import rogers_huff_r_between
|
|
15
|
+
import matplotlib as mpl
|
|
16
|
+
from scipy import stats
|
|
17
|
+
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
|
|
18
|
+
from mpl_toolkits.axes_grid1.inset_locator import mark_inset
|
|
19
|
+
from matplotlib.ticker import MaxNLocator
|
|
20
|
+
import gc as garbage_collect
|
|
21
|
+
from adjustText import adjust_text
|
|
22
|
+
from gwaslab.viz_aux_reposition_text import adjust_text_position
|
|
23
|
+
from gwaslab.viz_aux_annotate_plot import annotate_single
|
|
24
|
+
from gwaslab.viz_plot_qqplot import _plot_qq
|
|
25
|
+
from gwaslab.hm_harmonize_sumstats import auto_check_vcf_chr_dict
|
|
26
|
+
from gwaslab.viz_plot_regional2 import _plot_regional
|
|
27
|
+
from gwaslab.viz_plot_regional2 import process_vcf
|
|
28
|
+
from gwaslab.viz_plot_regional2 import _get_lead_id
|
|
29
|
+
from gwaslab.viz_aux_quickfix import _get_largenumber
|
|
30
|
+
from gwaslab.viz_aux_quickfix import _quick_fix_p_value
|
|
31
|
+
from gwaslab.viz_aux_quickfix import _quick_fix_pos
|
|
32
|
+
from gwaslab.viz_aux_quickfix import _quick_fix_chr
|
|
33
|
+
from gwaslab.viz_aux_quickfix import _quick_fix_eaf
|
|
34
|
+
from gwaslab.viz_aux_quickfix import _quick_fix_mlog10p
|
|
35
|
+
from gwaslab.viz_aux_quickfix import _quick_add_tchrpos
|
|
36
|
+
from gwaslab.viz_aux_quickfix import _quick_merge_sumstats
|
|
37
|
+
from gwaslab.viz_aux_quickfix import _quick_assign_i
|
|
38
|
+
from gwaslab.viz_aux_quickfix import _quick_assign_i_with_rank
|
|
39
|
+
from gwaslab.viz_aux_quickfix import _quick_extract_snp_in_region
|
|
40
|
+
from gwaslab.viz_aux_quickfix import _quick_assign_highlight_hue_pair
|
|
41
|
+
from gwaslab.viz_aux_quickfix import _quick_assign_marker_relative_size
|
|
42
|
+
from gwaslab.viz_aux_quickfix import _cut
|
|
43
|
+
from gwaslab.viz_aux_quickfix import _set_yticklabels
|
|
44
|
+
from gwaslab.viz_aux_quickfix import _jagged_y
|
|
45
|
+
from gwaslab.viz_aux_save_figure import save_figure
|
|
46
|
+
from gwaslab.g_Log import Log
|
|
47
|
+
from gwaslab.util_in_calculate_gc import lambdaGC
|
|
48
|
+
from gwaslab.util_in_get_sig import getsig
|
|
49
|
+
from gwaslab.util_in_get_sig import annogene
|
|
50
|
+
from gwaslab.bd_common_data import get_chr_to_number
|
|
51
|
+
from gwaslab.bd_common_data import get_number_to_chr
|
|
52
|
+
from gwaslab.bd_common_data import get_recombination_rate
|
|
53
|
+
from gwaslab.bd_common_data import get_gtf
|
|
54
|
+
from gwaslab.util_in_filter_value import _filter_region
|
|
55
|
+
from gwaslab.g_version import _get_version
|
|
56
|
+
from matplotlib.colors import ListedColormap
|
|
57
|
+
from matplotlib.colors import LinearSegmentedColormap
|
|
58
|
+
from matplotlib.colors import to_hex
|
|
59
|
+
from gwaslab.io_process_args import _extract_kwargs
|
|
60
|
+
|
|
61
|
+
def _plot_effect(to_plot,
|
|
62
|
+
y=None,
|
|
63
|
+
y_sort=None,
|
|
64
|
+
group=None,
|
|
65
|
+
x="BETA",
|
|
66
|
+
se="SE",
|
|
67
|
+
eaf="EAF",
|
|
68
|
+
snpr2="SNPR2",
|
|
69
|
+
ylabel="Variant",
|
|
70
|
+
eaf_panel=True,
|
|
71
|
+
snpvar_panel=True,
|
|
72
|
+
rename_dic=None,
|
|
73
|
+
err_args=None,
|
|
74
|
+
font_args=None,
|
|
75
|
+
save=None,
|
|
76
|
+
title=None,
|
|
77
|
+
save_args=None,
|
|
78
|
+
eaf_args=None,
|
|
79
|
+
snpr2_args=None,
|
|
80
|
+
fig_args=None,
|
|
81
|
+
scatter_args=None,
|
|
82
|
+
effect_label=None,
|
|
83
|
+
eaf_label=None,
|
|
84
|
+
snpr2_label=None,
|
|
85
|
+
log=Log(),
|
|
86
|
+
verbose=True,
|
|
87
|
+
legend_mode=1,
|
|
88
|
+
ncol=2,
|
|
89
|
+
size=None,
|
|
90
|
+
hue=None,
|
|
91
|
+
style=None,
|
|
92
|
+
**args):
|
|
93
|
+
|
|
94
|
+
if err_args is None:
|
|
95
|
+
err_args={"ecolor":"#cccccc",
|
|
96
|
+
"linewidth":0,
|
|
97
|
+
"zorder":90,
|
|
98
|
+
"elinewidth":1}
|
|
99
|
+
if eaf_args is None:
|
|
100
|
+
eaf_args={"color":"#74BAD3"}
|
|
101
|
+
if snpr2_args is None:
|
|
102
|
+
snpr2_args={"color":"#74BAD3"}
|
|
103
|
+
if font_args is None:
|
|
104
|
+
font_args={'fontsize':12,'family':'sans','fontname':'Arial'}
|
|
105
|
+
if fig_args is None:
|
|
106
|
+
fig_args={"figsize":(8,8),"dpi":300}
|
|
107
|
+
if scatter_args is None:
|
|
108
|
+
scatter_args={"s":20}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
legend_titles=[]
|
|
112
|
+
if hue is not None:
|
|
113
|
+
args["hue"] = hue
|
|
114
|
+
legend_titles.append(hue)
|
|
115
|
+
|
|
116
|
+
if size is not None:
|
|
117
|
+
args["size"] = size
|
|
118
|
+
legend_titles.append(size)
|
|
119
|
+
|
|
120
|
+
if style is not None:
|
|
121
|
+
args["style"] = style
|
|
122
|
+
legend_titles.append(style)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
save_kwargs = _extract_kwargs("save", save_args, locals())
|
|
126
|
+
err_kwargs = _extract_kwargs("err", err_args, locals())
|
|
127
|
+
scatter_kwargs = _extract_kwargs("scatter", scatter_args, locals())
|
|
128
|
+
font_kwargs = _extract_kwargs("font",font_args, locals())
|
|
129
|
+
|
|
130
|
+
def concat_cols(cols):
|
|
131
|
+
string = "-".join(map(str,cols))
|
|
132
|
+
return string
|
|
133
|
+
|
|
134
|
+
y_name = "-".join(y)
|
|
135
|
+
|
|
136
|
+
to_plot[y_name] = to_plot[y].apply(lambda x: concat_cols(x), axis=1)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# sort y
|
|
140
|
+
if y_sort is None:
|
|
141
|
+
y_sort = ["CHR","POS","STUDY"]
|
|
142
|
+
|
|
143
|
+
to_plot = to_plot.sort_values(by=y_sort)
|
|
144
|
+
|
|
145
|
+
if group is None:
|
|
146
|
+
group = ["CHR","POS"]
|
|
147
|
+
to_plot = to_plot.sort_values(by=group)
|
|
148
|
+
# Assign group IDs based on the sorted 'score'
|
|
149
|
+
|
|
150
|
+
to_plot['_VAR_GROUP'] = to_plot.groupby(group).ngroup() + 1
|
|
151
|
+
|
|
152
|
+
to_plot["_VAR_INDEX"] = range(len(to_plot))
|
|
153
|
+
to_plot["_VAR_INDEX"]= to_plot["_VAR_INDEX"] + to_plot['_VAR_GROUP']
|
|
154
|
+
|
|
155
|
+
y="_VAR_INDEX"
|
|
156
|
+
|
|
157
|
+
if rename_dic is None:
|
|
158
|
+
rename_dic = {
|
|
159
|
+
"BETA":"Per-allele effect size",
|
|
160
|
+
"STUDY":"Study"
|
|
161
|
+
}
|
|
162
|
+
ncols=1
|
|
163
|
+
if eaf_panel:
|
|
164
|
+
ncols+=1
|
|
165
|
+
if snpvar_panel:
|
|
166
|
+
ncols+=1
|
|
167
|
+
|
|
168
|
+
if ncols==1:
|
|
169
|
+
fig,ax1 = plt.subplots(ncols=ncols, **fig_args)
|
|
170
|
+
elif ncols==2:
|
|
171
|
+
if eaf_panel==True:
|
|
172
|
+
fig,axes = plt.subplots(ncols=ncols, dpi=400,sharey=True)
|
|
173
|
+
ax1=axes[0]
|
|
174
|
+
ax2=axes[1]
|
|
175
|
+
else:
|
|
176
|
+
fig,axes = plt.subplots(ncols=ncols, dpi=400,sharey=True)
|
|
177
|
+
ax1=axes[0]
|
|
178
|
+
ax3=axes[1]
|
|
179
|
+
else:
|
|
180
|
+
fig,axes = plt.subplots(ncols=ncols, dpi=400,sharey=True)
|
|
181
|
+
ax1=axes[0]
|
|
182
|
+
ax2=axes[1]
|
|
183
|
+
ax3=axes[2]
|
|
184
|
+
|
|
185
|
+
sns.scatterplot(data=to_plot, x=x, y=y, ax=ax1, zorder=100, **args)
|
|
186
|
+
|
|
187
|
+
ax1.errorbar(y=to_plot[y], x=to_plot[x], xerr=to_plot[se],
|
|
188
|
+
**err_kwargs)
|
|
189
|
+
|
|
190
|
+
ax1.axvline(x=0,linestyle="dashed",c="grey")
|
|
191
|
+
ax1.set_yticks(to_plot[y], labels = to_plot[y_name])
|
|
192
|
+
ax1.set_ylabel(ylabel)
|
|
193
|
+
|
|
194
|
+
if title is not None:
|
|
195
|
+
ax1.set_title(title)
|
|
196
|
+
|
|
197
|
+
if eaf_panel==True:
|
|
198
|
+
ax2.barh(y=to_plot[y], width=to_plot[eaf], zorder=100, **eaf_args)
|
|
199
|
+
ax2.set_xlabel(eaf)
|
|
200
|
+
|
|
201
|
+
if snpvar_panel==True:
|
|
202
|
+
ax3.barh(y=to_plot[y], width=to_plot[snpr2], zorder=100,**snpr2_args)
|
|
203
|
+
ax3.set_xlabel(snpr2)
|
|
204
|
+
|
|
205
|
+
#try:
|
|
206
|
+
if legend_mode==1:
|
|
207
|
+
#if ncols==1:
|
|
208
|
+
sns.move_legend(
|
|
209
|
+
ax1, "upper left",
|
|
210
|
+
bbox_to_anchor=(1, 1), title=None, frameon=False, bbox_transform = axes[-1].transAxes
|
|
211
|
+
)
|
|
212
|
+
#else:
|
|
213
|
+
##
|
|
214
|
+
# sns.move_legend(
|
|
215
|
+
# ax1, "lower left",
|
|
216
|
+
# bbox_to_anchor=(0, ncols), title=None, frameon=False,
|
|
217
|
+
# )
|
|
218
|
+
#elif legend_mode==2:
|
|
219
|
+
# sns.move_legend(
|
|
220
|
+
# ax1, "lower center",
|
|
221
|
+
# bbox_to_anchor=(0, 1), ncol=ncol, title=None, frameon=False,
|
|
222
|
+
# )
|
|
223
|
+
#except:
|
|
224
|
+
# pass
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
#handles, labels = ax1.get_legend_handles_labels()
|
|
228
|
+
#if len(labels)>0:
|
|
229
|
+
# #new_labels = []
|
|
230
|
+
# #ncol = len(labels)
|
|
231
|
+
# max_col=0
|
|
232
|
+
# new_labels=[]
|
|
233
|
+
# new_labels_i = []
|
|
234
|
+
# previous_i = 0
|
|
235
|
+
# max_string_len=0
|
|
236
|
+
# for i in range(len(labels)):
|
|
237
|
+
# if len(labels[i]) > max_string_len:
|
|
238
|
+
# max_string_len = len(labels[i])
|
|
239
|
+
# if labels[i] in legend_titles:
|
|
240
|
+
# new_labels_i.append(i)
|
|
241
|
+
# col_number = i - previous_i
|
|
242
|
+
# if col_number > max_col:
|
|
243
|
+
# max_col = col_number
|
|
244
|
+
# previous_i = i
|
|
245
|
+
# for i in labels:
|
|
246
|
+
# new_labels.append(str(i).ljust(max_string_len))
|
|
247
|
+
# print(new_labels)
|
|
248
|
+
# new_labels_i.append(len(labels))
|
|
249
|
+
#
|
|
250
|
+
# legend_rows = []
|
|
251
|
+
# #new_labels_i[index+1] - i
|
|
252
|
+
# for index, i in enumerate(new_labels_i):
|
|
253
|
+
# if index<len(new_labels_i)-1:
|
|
254
|
+
# legend_row = ax1.legend(labels = new_labels[i:new_labels_i[index+1]],
|
|
255
|
+
# handles= handles[i:new_labels_i[index+1]],
|
|
256
|
+
# loc="lower left",
|
|
257
|
+
# bbox_to_anchor=(-0.2, 1.02 + 0.05*index),
|
|
258
|
+
# ncol=max_col,
|
|
259
|
+
# scatterpoints=1,
|
|
260
|
+
# title=None,
|
|
261
|
+
# borderpad=0,
|
|
262
|
+
# handletextpad=0.1,
|
|
263
|
+
# handlelength=0.7,
|
|
264
|
+
# borderaxespad =0,
|
|
265
|
+
# alignment = "left",
|
|
266
|
+
# fontsize=8,
|
|
267
|
+
# frameon=False)
|
|
268
|
+
# legend_rows.append(legend_row)
|
|
269
|
+
# for legend_row in legend_rows[:-1]:
|
|
270
|
+
# ax1.add_artist(legend_row)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
if effect_label is not None:
|
|
275
|
+
ax1.set_xlabel(effect_label)
|
|
276
|
+
if eaf_label is not None:
|
|
277
|
+
ax2.set_xlabel(eaf_label)
|
|
278
|
+
if snpr2_label is not None:
|
|
279
|
+
ax3.set_xlabel(snpr2_label)
|
|
280
|
+
|
|
281
|
+
save_figure(fig, save, keyword="forest",save_args=save_kwargs, log=log, verbose=verbose)
|
|
282
|
+
|
|
283
|
+
return fig
|
gwaslab/viz_plot_miamiplot2.py
CHANGED
|
@@ -249,7 +249,7 @@ def plot_miami2(
|
|
|
249
249
|
#####################################################################################################################
|
|
250
250
|
##plotting
|
|
251
251
|
if figax is None:
|
|
252
|
-
fig_args["figsize"] = (15,10)
|
|
252
|
+
#fig_args["figsize"] = (15,10)
|
|
253
253
|
fig, (ax1, ax5) = plt.subplots(2, 1, gridspec_kw={'height_ratios': [1, 1]},**fig_args)
|
|
254
254
|
plt.subplots_adjust(hspace=region_hspace)
|
|
255
255
|
else:
|
gwaslab/viz_plot_mqqplot.py
CHANGED
|
@@ -51,6 +51,7 @@ from gwaslab.bd_common_data import get_chr_to_number
|
|
|
51
51
|
from gwaslab.bd_common_data import get_number_to_chr
|
|
52
52
|
from gwaslab.bd_common_data import get_recombination_rate
|
|
53
53
|
from gwaslab.bd_common_data import get_gtf
|
|
54
|
+
from gwaslab.util_in_filter_value import _filter_region
|
|
54
55
|
from gwaslab.g_version import _get_version
|
|
55
56
|
from matplotlib.colors import ListedColormap
|
|
56
57
|
from matplotlib.colors import LinearSegmentedColormap
|
|
@@ -109,9 +110,17 @@ def mqqplot(insumstats,
|
|
|
109
110
|
region_anno_bbox_args = None,
|
|
110
111
|
region_marker_shapes=None,
|
|
111
112
|
region_legend_marker=True,
|
|
113
|
+
region_ref_alias = None,
|
|
112
114
|
cbar_title='LD $r^{2}$ with variant',
|
|
113
115
|
cbar_fontsize = None,
|
|
116
|
+
cbar_scale=True,
|
|
114
117
|
cbar_font_family = None,
|
|
118
|
+
cbar_bbox_to_anchor = (0,0,1,1),
|
|
119
|
+
cbar_equal_aspect = True,
|
|
120
|
+
cbar_w_scale=1,
|
|
121
|
+
cbar_h_scale=1,
|
|
122
|
+
cbar_downward_offset =1.3,
|
|
123
|
+
cbar_borderpad=None,
|
|
115
124
|
track_n=4,
|
|
116
125
|
track_n_offset=0,
|
|
117
126
|
track_fontsize_ratio=0.95,
|
|
@@ -475,18 +484,20 @@ def mqqplot(insumstats,
|
|
|
475
484
|
sumstats[chrom] = _quick_fix_chr(sumstats[chrom], chr_dict=chr_dict)
|
|
476
485
|
|
|
477
486
|
## r
|
|
487
|
+
|
|
478
488
|
if region is not None:
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
489
|
+
sumstats = _filter_region(sumstats, region, log=log, verbose=verbose)
|
|
490
|
+
# region_chr = region[0]
|
|
491
|
+
# region_start = region[1]
|
|
492
|
+
# region_end = region[2]
|
|
493
|
+
#
|
|
494
|
+
# log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]),verbose=verbose)
|
|
495
|
+
#
|
|
496
|
+
# in_region_snp = (sumstats[chrom]==region_chr) & (sumstats[pos]<region_end) & (sumstats[pos]>region_start)
|
|
497
|
+
#
|
|
498
|
+
# log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)),verbose=verbose)
|
|
499
|
+
# sumstats = sumstats.loc[in_region_snp,:]
|
|
500
|
+
#
|
|
490
501
|
if len(sumstats)==0:
|
|
491
502
|
log.warning("No valid data! Please check the input.")
|
|
492
503
|
return None
|
|
@@ -810,6 +821,14 @@ def mqqplot(insumstats,
|
|
|
810
821
|
fontsize=fontsize,
|
|
811
822
|
build=build,
|
|
812
823
|
chrom_df=chrom_df,
|
|
824
|
+
cbar_scale=cbar_scale,
|
|
825
|
+
cbar_fontsize=cbar_fontsize,
|
|
826
|
+
cbar_bbox_to_anchor=cbar_bbox_to_anchor,
|
|
827
|
+
cbar_w_scale=cbar_w_scale,
|
|
828
|
+
cbar_h_scale=cbar_h_scale,
|
|
829
|
+
cbar_equal_aspect=cbar_equal_aspect,
|
|
830
|
+
cbar_downward_offset =cbar_downward_offset,
|
|
831
|
+
cbar_borderpad=cbar_borderpad,
|
|
813
832
|
xtick_chr_dict=xtick_chr_dict,
|
|
814
833
|
cut_line_color=cut_line_color,
|
|
815
834
|
vcf_chr_dict =vcf_chr_dict,
|
|
@@ -825,6 +844,7 @@ def mqqplot(insumstats,
|
|
|
825
844
|
region_step = region_step,
|
|
826
845
|
region_ref = region_ref,
|
|
827
846
|
region_ref_index_dic = region_ref_index_dic,
|
|
847
|
+
region_ref_alias = region_ref_alias,
|
|
828
848
|
region_grid = region_grid,
|
|
829
849
|
region_grid_line = region_grid_line,
|
|
830
850
|
region_lead_grid = region_lead_grid,
|
gwaslab/viz_plot_regional2.py
CHANGED
|
@@ -57,6 +57,7 @@ def _plot_regional(
|
|
|
57
57
|
region_step = 21,
|
|
58
58
|
region_ref=None,
|
|
59
59
|
region_ref_index_dic = None,
|
|
60
|
+
region_ref_alias = None,
|
|
60
61
|
#region_ref_second=None,
|
|
61
62
|
region_grid = False,
|
|
62
63
|
region_grid_line = {"linewidth": 2,"linestyle":"--"},
|
|
@@ -67,6 +68,14 @@ def _plot_regional(
|
|
|
67
68
|
region_ld_threshold = [0.2,0.4,0.6,0.8],
|
|
68
69
|
region_ld_colors = ["#E4E4E4","#020080","#86CEF9","#24FF02","#FDA400","#FF0000","#FF0000"],
|
|
69
70
|
region_marker_shapes=None,
|
|
71
|
+
cbar_fontsize=None,
|
|
72
|
+
cbar_scale=False,
|
|
73
|
+
cbar_bbox_to_anchor=None,
|
|
74
|
+
cbar_w_scale=1,
|
|
75
|
+
cbar_h_scale=1,
|
|
76
|
+
cbar_downward_offset =1.3,
|
|
77
|
+
cbar_borderpad=None,
|
|
78
|
+
cbar_equal_aspect=False,
|
|
70
79
|
palette=None,
|
|
71
80
|
region_recombination = True,
|
|
72
81
|
region_protein_coding=True,
|
|
@@ -132,7 +141,16 @@ def _plot_regional(
|
|
|
132
141
|
region_ref=region_ref,
|
|
133
142
|
region_ld_threshold=region_ld_threshold,
|
|
134
143
|
region_ref_index_dic=region_ref_index_dic,
|
|
144
|
+
region_ref_alias=region_ref_alias,
|
|
135
145
|
region_marker_shapes=region_marker_shapes,
|
|
146
|
+
cbar_fontsize= cbar_fontsize,
|
|
147
|
+
cbar_scale=cbar_scale,
|
|
148
|
+
cbar_equal_aspect=cbar_equal_aspect,
|
|
149
|
+
cbar_bbox_to_anchor=cbar_bbox_to_anchor,
|
|
150
|
+
cbar_w_scale=cbar_w_scale,
|
|
151
|
+
cbar_h_scale=cbar_h_scale,
|
|
152
|
+
cbar_downward_offset =cbar_downward_offset,
|
|
153
|
+
cbar_borderpad=cbar_borderpad,
|
|
136
154
|
palette=palette,
|
|
137
155
|
region_legend_marker=region_legend_marker,
|
|
138
156
|
fig=fig)
|
|
@@ -154,6 +172,8 @@ def _plot_regional(
|
|
|
154
172
|
build= build,
|
|
155
173
|
rr_lim=rr_lim,
|
|
156
174
|
rr_ylabel=rr_ylabel)
|
|
175
|
+
else:
|
|
176
|
+
ax4 = None
|
|
157
177
|
|
|
158
178
|
## regional plot : gene track ######################################################################
|
|
159
179
|
# calculate offset
|
|
@@ -345,25 +365,52 @@ def _add_region_title(region_title, ax1,region_title_args):
|
|
|
345
365
|
ax1.text(0.015,0.97, region_title, transform=ax1.transAxes, va="top", ha="left", region_ref=None, **region_title_args )
|
|
346
366
|
return ax1
|
|
347
367
|
|
|
348
|
-
def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_index_dic,region_marker_shapes,fig, region_legend_marker=True,
|
|
368
|
+
def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_index_dic,region_marker_shapes,fig, region_legend_marker=True,
|
|
369
|
+
cbar_fontsize= None,cbar_scale=False,cbar_equal_aspect=True,cbar_w_scale=1,cbar_h_scale=1,palette =None,
|
|
370
|
+
cbar_downward_offset =1.2, cbar_borderpad=None,
|
|
371
|
+
cbar_bbox_to_anchor=(0, 0, 1, 1),region_ref_alias=None):
|
|
372
|
+
|
|
373
|
+
scale = 1
|
|
374
|
+
if cbar_scale:
|
|
375
|
+
base_fontsize = 9
|
|
376
|
+
scale = cbar_fontsize / base_fontsize
|
|
377
|
+
scale = max(1,scale)
|
|
378
|
+
else:
|
|
379
|
+
scale = 1
|
|
380
|
+
|
|
381
|
+
width_raw= 11 * (scale)*cbar_w_scale
|
|
382
|
+
height_raw=(7 + 7 * len(region_ref))*(scale)*cbar_h_scale
|
|
383
|
+
|
|
384
|
+
width_pct = "{}%".format(width_raw)
|
|
385
|
+
height_pct = "{}%".format( height_raw)
|
|
386
|
+
|
|
387
|
+
total_y_pixels =(ax1.bbox.get_points()[1][1]-ax1.bbox.get_points()[0][1])
|
|
388
|
+
downwards_offset = cbar_fontsize / (total_y_pixels/ fig.dpi * 72) * cbar_downward_offset
|
|
389
|
+
bbox_to_anchor = (cbar_bbox_to_anchor[0],cbar_bbox_to_anchor[1]-downwards_offset,cbar_bbox_to_anchor[2],cbar_bbox_to_anchor[3])
|
|
349
390
|
|
|
350
|
-
|
|
351
|
-
|
|
391
|
+
if cbar_borderpad is None:
|
|
392
|
+
borderpad=0.5*(scale)
|
|
393
|
+
else:
|
|
394
|
+
borderpad=cbar_borderpad
|
|
395
|
+
|
|
352
396
|
axins1 = inset_axes(ax1,
|
|
353
397
|
width=width_pct, # width = 50% of parent_bbox width
|
|
354
398
|
height=height_pct, # height : 5%
|
|
399
|
+
bbox_to_anchor=bbox_to_anchor,
|
|
400
|
+
bbox_transform=ax1.transAxes,
|
|
401
|
+
borderpad=borderpad,
|
|
355
402
|
loc='upper right',
|
|
356
|
-
axes_kwargs={"frameon":True,"facecolor":"white","zorder":999999})
|
|
357
|
-
|
|
403
|
+
axes_kwargs={"frameon":True,"facecolor":"white","zorder":999999,"anchor":"NE"})
|
|
404
|
+
|
|
358
405
|
ld_ticks = [0]+region_ld_threshold+[1]
|
|
359
406
|
|
|
360
407
|
for index, ld_threshold in enumerate(ld_ticks):
|
|
361
408
|
for group_index in range(len(region_ref)):
|
|
362
409
|
if index < len(ld_ticks)-1:
|
|
363
410
|
x=ld_threshold
|
|
364
|
-
y=0.2*group_index
|
|
411
|
+
y=0.2*group_index
|
|
365
412
|
width=0.2
|
|
366
|
-
height=ld_ticks[index+1]-ld_ticks[index]
|
|
413
|
+
height=(ld_ticks[index+1]-ld_ticks[index])
|
|
367
414
|
hex_color = palette[(region_ref_index_dic[region_ref[group_index]]+1)*100 + index+1] # consistent color
|
|
368
415
|
|
|
369
416
|
a = Rectangle((x,y),width, height, fill = True, color = hex_color , linewidth = 2)
|
|
@@ -371,22 +418,46 @@ def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_ind
|
|
|
371
418
|
axins1.add_patch(a)
|
|
372
419
|
|
|
373
420
|
# y snpid
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
421
|
+
if region_ref_alias is None:
|
|
422
|
+
region_ref_name = region_ref
|
|
423
|
+
else:
|
|
424
|
+
region_ref_name = [region_ref_alias[i] for i in region_ref]
|
|
425
|
+
|
|
426
|
+
yticks_position = (0.1 + 0.2 *np.arange(0,len(region_ref_name)))
|
|
427
|
+
axins1.set_yticks(yticks_position, ["{}".format(x) for x in region_ref_name])
|
|
428
|
+
axins1.set_ylim(0,0.2*len(region_ref_name))
|
|
429
|
+
ymin, ymax=0,0.2*len(region_ref_name)
|
|
378
430
|
# x ld thresholds
|
|
431
|
+
|
|
379
432
|
axins1.set_xticks(ticks=ld_ticks)
|
|
380
433
|
axins1.set_xticklabels([str(i) for i in ld_ticks])
|
|
381
|
-
|
|
382
434
|
xmin, xmax = 0, 1
|
|
383
|
-
axins1.set_xlim(xmin,xmax)
|
|
435
|
+
axins1.set_xlim(xmin,xmax)
|
|
436
|
+
|
|
437
|
+
if cbar_equal_aspect==True:
|
|
438
|
+
axins1.set_aspect('equal', adjustable='box',anchor="NE")
|
|
384
439
|
|
|
385
440
|
############### ##############plot marker ############## ##############
|
|
386
441
|
if region_legend_marker==True:
|
|
387
442
|
for group_index, ref in enumerate(region_ref):
|
|
388
|
-
|
|
389
|
-
|
|
443
|
+
|
|
444
|
+
data_to_point_y =((axins1.bbox.get_points()[1][1]-axins1.bbox.get_points()[0][1])*height_raw/(ymax -ymin))
|
|
445
|
+
data_to_point_x =((axins1.bbox.get_points()[1][0]-axins1.bbox.get_points()[0][0])*width_raw/(xmax -xmin))
|
|
446
|
+
y_to_x = data_to_point_y/data_to_point_x
|
|
447
|
+
x_to_y = 1/y_to_x
|
|
448
|
+
xyratio = min(y_to_x, x_to_y)
|
|
449
|
+
|
|
450
|
+
marker_side_in_data = 0.075
|
|
451
|
+
if cbar_equal_aspect==True:
|
|
452
|
+
xyratio=1
|
|
453
|
+
|
|
454
|
+
## change markersize
|
|
455
|
+
|
|
456
|
+
if xyratio <1 :
|
|
457
|
+
x = 0 - (marker_side_in_data +0.03) * xyratio
|
|
458
|
+
else:
|
|
459
|
+
x = 0 - (marker_side_in_data +0.03)
|
|
460
|
+
y= (0.1 + 0.2 * group_index)
|
|
390
461
|
|
|
391
462
|
if len(region_ref) <2:
|
|
392
463
|
# single-ref mode
|
|
@@ -398,16 +469,31 @@ def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_ind
|
|
|
398
469
|
c = palette[(region_ref_index_dic[region_ref[group_index]]+1)*100 + len(ld_ticks)-1]
|
|
399
470
|
|
|
400
471
|
# ([x0,y0][x1,y1])
|
|
401
|
-
|
|
402
|
-
|
|
472
|
+
# y pixels / per data 1
|
|
473
|
+
|
|
474
|
+
data_to_point_y =((axins1.bbox.get_points()[1][1]-axins1.bbox.get_points()[0][1])*height_raw/(ymax -ymin))
|
|
475
|
+
data_to_point_x =((axins1.bbox.get_points()[1][0]-axins1.bbox.get_points()[0][0])*width_raw/(xmax -xmin))
|
|
476
|
+
|
|
477
|
+
if data_to_point_y < data_to_point_x:
|
|
478
|
+
length_raw = 1 #height_raw
|
|
479
|
+
data_to_point = data_to_point_y
|
|
480
|
+
else:
|
|
481
|
+
length_raw = 1 #width_raw
|
|
482
|
+
data_to_point = data_to_point_x
|
|
483
|
+
|
|
484
|
+
# pixels/data 1 -> font points/data 1
|
|
485
|
+
# (dpi / 72) = point_per_pixel
|
|
486
|
+
# y pixels / per data 1 / (dpi / 72) -> y font points/data 1
|
|
487
|
+
|
|
488
|
+
font_points_per_data_1 = data_to_point/(fig.dpi/72)
|
|
489
|
+
s = ((marker_side_in_data*2)* font_points_per_data_1 * length_raw/100 )**2
|
|
403
490
|
|
|
404
491
|
axins1.scatter(x, y, s=s, marker=marker,c=c, edgecolors="black", linewidths = 1, clip_on=False, zorder=100)
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
axins1.set_title('LD $r^{2}$ with variant',loc="center",y=-0.2)
|
|
492
|
+
|
|
493
|
+
pad = ((marker_side_in_data*2+0.02)* font_points_per_data_1 * length_raw/100)
|
|
494
|
+
tick_length=(abs(x)* font_points_per_data_1 * length_raw/100)
|
|
495
|
+
axins1.tick_params(axis="y", pad=pad-0.5*tick_length, length=tick_length)
|
|
496
|
+
|
|
411
497
|
cbar = axins1
|
|
412
498
|
return ax1, cbar
|
|
413
499
|
|
|
@@ -479,12 +565,15 @@ def _plot_gene_track(
|
|
|
479
565
|
stack_num_to_plot = max(taf[0],n_uniq_stack)
|
|
480
566
|
ax3.set_ylim((-stack_num_to_plot*2-taf[1]*2,2+taf[1]*2))
|
|
481
567
|
ax3.set_yticks([])
|
|
482
|
-
|
|
568
|
+
point_per_pixels = 72/fig.dpi
|
|
569
|
+
pixels_per_point = fig.dpi/72
|
|
570
|
+
|
|
483
571
|
pixels_per_track = np.abs(ax3.transData.transform([0,0])[1] - ax3.transData.transform([0,1])[1])
|
|
484
572
|
font_size_in_pixels= taf[2] * pixels_per_track
|
|
485
|
-
font_size_in_points = font_size_in_pixels *
|
|
486
|
-
linewidth_in_points= pixels_per_track * pixels_per_point
|
|
573
|
+
font_size_in_points = font_size_in_pixels * point_per_pixels
|
|
487
574
|
|
|
575
|
+
linewidth_in_points_per_track= pixels_per_track * point_per_pixels
|
|
576
|
+
|
|
488
577
|
log.write(" -plotting gene track..", verbose=verbose)
|
|
489
578
|
|
|
490
579
|
sig_gene_name = "Undefined"
|
|
@@ -517,8 +606,11 @@ def _plot_gene_track(
|
|
|
517
606
|
sig_gene_rights.append(gene_track_start_i+row["end"])
|
|
518
607
|
|
|
519
608
|
# plot gene line
|
|
609
|
+
## minimum width = 2 pixel
|
|
610
|
+
gene_line_width = max(linewidth_in_points_per_track/10, 2/pixels_per_point)
|
|
611
|
+
|
|
520
612
|
ax3.plot((gene_track_start_i+row["start"],gene_track_start_i+row["end"]),
|
|
521
|
-
(row["stack"]*2,row["stack"]*2),color=gene_color,linewidth=
|
|
613
|
+
(row["stack"]*2,row["stack"]*2),color=gene_color,linewidth=gene_line_width,solid_capstyle="butt")
|
|
522
614
|
|
|
523
615
|
# plot gene name
|
|
524
616
|
if row["end"] >= region[2]:
|
|
@@ -549,9 +641,12 @@ def _plot_gene_track(
|
|
|
549
641
|
exon_color = region_lead_grid_line["color"]
|
|
550
642
|
else:
|
|
551
643
|
exon_color="#020080"
|
|
552
|
-
|
|
644
|
+
|
|
645
|
+
## minimum width = 8 pixel
|
|
646
|
+
exon_line_width = max(linewidth_in_points_per_track * taf[3], 8/pixels_per_point)
|
|
647
|
+
|
|
553
648
|
ax3.plot((gene_track_start_i+row["start"],gene_track_start_i+row["end"]),
|
|
554
|
-
(row["stack"]*2,row["stack"]*2),linewidth=
|
|
649
|
+
(row["stack"]*2,row["stack"]*2),linewidth=exon_line_width,color=exon_color,solid_capstyle="butt")
|
|
555
650
|
|
|
556
651
|
log.write(" -Finished plotting gene track..", verbose=verbose)
|
|
557
652
|
|
|
@@ -573,7 +668,6 @@ def process_vcf(sumstats,
|
|
|
573
668
|
region_ld_threshold,
|
|
574
669
|
vcf_chr_dict,
|
|
575
670
|
tabix):
|
|
576
|
-
|
|
577
671
|
log.write("Start to load reference genotype...", verbose=verbose)
|
|
578
672
|
log.write(" -reference vcf path : "+ vcf_path, verbose=verbose)
|
|
579
673
|
|
|
@@ -631,7 +725,6 @@ def process_vcf(sumstats,
|
|
|
631
725
|
# figure out lead variant
|
|
632
726
|
lead_id = _get_lead_id(sumstats, region_ref_single, log, verbose)
|
|
633
727
|
|
|
634
|
-
|
|
635
728
|
lead_series = None
|
|
636
729
|
if lead_id is None:
|
|
637
730
|
|
|
@@ -685,6 +778,12 @@ def process_vcf(sumstats,
|
|
|
685
778
|
else:
|
|
686
779
|
log.write(" -Lead SNP not found in reference...", verbose=verbose)
|
|
687
780
|
sumstats[rsq]=None
|
|
781
|
+
|
|
782
|
+
#
|
|
783
|
+
try:
|
|
784
|
+
sumstats.loc[lead_id,rsq]=1
|
|
785
|
+
except KeyError:
|
|
786
|
+
pass
|
|
688
787
|
|
|
689
788
|
sumstats[rsq] = sumstats[rsq].astype("float")
|
|
690
789
|
sumstats[ld_single] = 0
|
|
@@ -716,7 +815,7 @@ def process_vcf(sumstats,
|
|
|
716
815
|
sumstats[final_ld_col] = 0
|
|
717
816
|
sumstats[final_shape_col] = 1
|
|
718
817
|
sumstats[final_rsq_col] = 0.0
|
|
719
|
-
|
|
818
|
+
|
|
720
819
|
if len(region_ref)==1:
|
|
721
820
|
if lead_id is not None:
|
|
722
821
|
sumstats.loc[lead_id, final_shape_col] +=1
|
|
@@ -729,6 +828,8 @@ def process_vcf(sumstats,
|
|
|
729
828
|
sumstats.loc[a_ngt_b, final_ld_col] = 100 * (i+1) + sumstats.loc[a_ngt_b, ld_single]
|
|
730
829
|
sumstats.loc[a_ngt_b, final_rsq_col] = sumstats.loc[a_ngt_b, current_rsq]
|
|
731
830
|
sumstats.loc[a_ngt_b, final_shape_col] = i + 1
|
|
831
|
+
|
|
832
|
+
sumstats = sumstats.dropna(subset=[pos,nea,ea])
|
|
732
833
|
####################################################################################################
|
|
733
834
|
log.write("Finished loading reference genotype successfully!", verbose=verbose)
|
|
734
835
|
return sumstats
|