gwaslab 3.5.0__py3-none-any.whl → 3.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/__init__.py +1 -0
- gwaslab/bd_get_hapmap3.py +3 -1
- gwaslab/g_Sumstats.py +15 -9
- gwaslab/g_version.py +2 -2
- gwaslab/io_process_args.py +28 -0
- gwaslab/qc_fix_sumstats.py +9 -5
- gwaslab/vis_plot_credible sets.py +0 -0
- gwaslab/viz_aux_annotate_plot.py +8 -0
- gwaslab/viz_aux_property.py +2 -0
- gwaslab/viz_aux_quickfix.py +8 -6
- gwaslab/viz_aux_save_figure.py +2 -1
- gwaslab/viz_plot_compare_effect.py +787 -468
- gwaslab/viz_plot_mqqplot.py +27 -9
- gwaslab/viz_plot_phe_heatmap.py +1 -1
- gwaslab/viz_plot_regional2.py +44 -9
- gwaslab/viz_plot_scatter_with_reg.py +229 -0
- gwaslab/viz_plot_stackedregional.py +1 -1
- gwaslab/viz_plot_trumpetplot.py +1 -1
- {gwaslab-3.5.0.dist-info → gwaslab-3.5.2.dist-info}/METADATA +3 -3
- {gwaslab-3.5.0.dist-info → gwaslab-3.5.2.dist-info}/RECORD +24 -20
- {gwaslab-3.5.0.dist-info → gwaslab-3.5.2.dist-info}/WHEEL +1 -1
- {gwaslab-3.5.0.dist-info → gwaslab-3.5.2.dist-info}/LICENSE +0 -0
- {gwaslab-3.5.0.dist-info → gwaslab-3.5.2.dist-info}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.5.0.dist-info → gwaslab-3.5.2.dist-info}/top_level.txt +0 -0
gwaslab/viz_plot_mqqplot.py
CHANGED
|
@@ -139,6 +139,7 @@ def mqqplot(insumstats,
|
|
|
139
139
|
anno_gtf_path=None,
|
|
140
140
|
anno_adjust=False,
|
|
141
141
|
anno_max_iter=100,
|
|
142
|
+
arrow_kwargs=None,
|
|
142
143
|
arm_offset=None,
|
|
143
144
|
arm_scale=1,
|
|
144
145
|
anno_height=1,
|
|
@@ -243,7 +244,8 @@ def mqqplot(insumstats,
|
|
|
243
244
|
anno_args=dict()
|
|
244
245
|
if colors is None:
|
|
245
246
|
colors=["#597FBD","#74BAD3"]
|
|
246
|
-
|
|
247
|
+
if arrow_kwargs is None:
|
|
248
|
+
arrow_kwargs=dict()
|
|
247
249
|
if region is not None:
|
|
248
250
|
if marker_size == (5,20):
|
|
249
251
|
marker_size=(45,65)
|
|
@@ -262,7 +264,7 @@ def mqqplot(insumstats,
|
|
|
262
264
|
|
|
263
265
|
if region_marker_shapes is None:
|
|
264
266
|
# 9 shapes
|
|
265
|
-
region_marker_shapes = ['o', '
|
|
267
|
+
region_marker_shapes = ['o', '^','s','D','*','P','X','h','8']
|
|
266
268
|
if region_grid_line is None:
|
|
267
269
|
region_grid_line = {"linewidth": 2,"linestyle":"--"}
|
|
268
270
|
if region_lead_grid_line is None:
|
|
@@ -638,6 +640,14 @@ def mqqplot(insumstats,
|
|
|
638
640
|
linewidth=1
|
|
639
641
|
if len(region_ref) == 1:
|
|
640
642
|
palette = {100+i:region_ld_colors[i] for i in range(len(region_ld_colors))}
|
|
643
|
+
scatter_args["markers"]= {(i+1):m for i,m in enumerate(region_marker_shapes[:2])}
|
|
644
|
+
if region_ref[0] is None:
|
|
645
|
+
id_to_hide = sumstats["scaled_P"].idxmax()
|
|
646
|
+
sumstats.loc[id_to_hide,"s"] = -100
|
|
647
|
+
else:
|
|
648
|
+
sumstats.loc[sumstats["SNPID"]==region_ref[0],"s"] = -100
|
|
649
|
+
marker_size=(0,marker_size[1])
|
|
650
|
+
style="SHAPE"
|
|
641
651
|
else:
|
|
642
652
|
palette = {}
|
|
643
653
|
region_color_maps = []
|
|
@@ -650,12 +660,14 @@ def mqqplot(insumstats,
|
|
|
650
660
|
# 1 + 5 + 1
|
|
651
661
|
region_ld_colors_single = [region_ld_colors[0]] + output_hex_colors + [output_hex_colors[-1]]
|
|
652
662
|
region_color_maps.append(region_ld_colors_single)
|
|
653
|
-
|
|
663
|
+
|
|
664
|
+
# gradient color dict
|
|
654
665
|
for i, hex_colors in enumerate(region_color_maps):
|
|
655
666
|
for j, hex_color in enumerate(hex_colors):
|
|
656
667
|
palette[(i+1)*100 + j ] = hex_color
|
|
657
668
|
|
|
658
669
|
edgecolor="none"
|
|
670
|
+
# create a marker shape dict
|
|
659
671
|
scatter_args["markers"]= {(i+1):m for i,m in enumerate(region_marker_shapes[:len(region_ref)])}
|
|
660
672
|
style="SHAPE"
|
|
661
673
|
|
|
@@ -1002,6 +1014,7 @@ def mqqplot(insumstats,
|
|
|
1002
1014
|
region_anno_bbox_args=region_anno_bbox_args,
|
|
1003
1015
|
skip=skip,
|
|
1004
1016
|
anno_height=anno_height,
|
|
1017
|
+
arrow_kwargs=arrow_kwargs,
|
|
1005
1018
|
snpid=snpid,
|
|
1006
1019
|
chrom=chrom,
|
|
1007
1020
|
pos=pos,
|
|
@@ -1393,14 +1406,19 @@ def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log
|
|
|
1393
1406
|
|
|
1394
1407
|
def _process_ytick(ax1, fontsize, font_family, ax4, log=Log(),verbose=True):
|
|
1395
1408
|
log.write(" -Processing Y labels...",verbose=verbose)
|
|
1396
|
-
ax1_yticklabels = ax1.get_yticklabels()
|
|
1409
|
+
#ax1_yticklabels = ax1.get_yticklabels()
|
|
1410
|
+
#print(ax1_yticklabels)
|
|
1411
|
+
#plt.draw()
|
|
1412
|
+
#ax1_yticks = ax1.get_yticks()
|
|
1413
|
+
#print(ax1_yticks)
|
|
1397
1414
|
#ax1.set_yticklabels(ax1_yticklabels,fontsize=fontsize,family=font_family)
|
|
1398
|
-
|
|
1399
|
-
ax1.set_yticks(ax1_yticks,ax1_yticklabels,fontsize=fontsize,family=font_family)
|
|
1415
|
+
ax1.tick_params(axis='y', labelsize=fontsize,labelfontfamily=font_family)
|
|
1416
|
+
#ax1.set_yticks(ax1_yticks,ax1_yticklabels,fontsize=fontsize,family=font_family)
|
|
1400
1417
|
if ax4 is not None:
|
|
1401
|
-
ax4_yticklabels = ax4.get_yticklabels()
|
|
1402
|
-
ax4_yticks = ax4.get_yticks()
|
|
1403
|
-
ax4.
|
|
1418
|
+
#ax4_yticklabels = ax4.get_yticklabels()
|
|
1419
|
+
#ax4_yticks = ax4.get_yticks()
|
|
1420
|
+
ax4.tick_params(axis='y', labelsize=fontsize,labelfontfamily=font_family)
|
|
1421
|
+
#ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
|
|
1404
1422
|
return ax1, ax4
|
|
1405
1423
|
|
|
1406
1424
|
def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None , log=Log(),verbose=True):
|
gwaslab/viz_plot_phe_heatmap.py
CHANGED
gwaslab/viz_plot_regional2.py
CHANGED
|
@@ -93,6 +93,7 @@ def _plot_regional(
|
|
|
93
93
|
ax1, lead_id_single = _pinpoint_lead(sumstats = sumstats,
|
|
94
94
|
ax1 = ax1,
|
|
95
95
|
region_ref=region_ref_single,
|
|
96
|
+
region_ref_total_n = len(region_ref),
|
|
96
97
|
lead_color = palette[(index+1)*100 + len(region_ld_threshold)+2],
|
|
97
98
|
marker_size= marker_size,
|
|
98
99
|
region_marker_shapes=region_marker_shapes,
|
|
@@ -130,7 +131,9 @@ def _plot_regional(
|
|
|
130
131
|
region_ref=region_ref,
|
|
131
132
|
region_ld_threshold=region_ld_threshold,
|
|
132
133
|
region_ref_index_dic=region_ref_index_dic,
|
|
133
|
-
|
|
134
|
+
region_marker_shapes=region_marker_shapes,
|
|
135
|
+
palette=palette,
|
|
136
|
+
fig=fig)
|
|
134
137
|
else:
|
|
135
138
|
cbar=None
|
|
136
139
|
|
|
@@ -300,7 +303,7 @@ def _get_lead_id(sumstats=None, region_ref=None, log=None, verbose=True):
|
|
|
300
303
|
|
|
301
304
|
return lead_id
|
|
302
305
|
|
|
303
|
-
def _pinpoint_lead(sumstats,ax1,region_ref, lead_color, marker_size, log, verbose, region_marker_shapes):
|
|
306
|
+
def _pinpoint_lead(sumstats,ax1,region_ref, region_ref_total_n, lead_color, marker_size, log, verbose, region_marker_shapes):
|
|
304
307
|
|
|
305
308
|
if region_ref is None:
|
|
306
309
|
log.write(" -Extracting lead variant..." , verbose=verbose)
|
|
@@ -308,12 +311,19 @@ def _pinpoint_lead(sumstats,ax1,region_ref, lead_color, marker_size, log, verbos
|
|
|
308
311
|
else:
|
|
309
312
|
lead_id = _get_lead_id(sumstats, region_ref, log, verbose)
|
|
310
313
|
|
|
314
|
+
if region_ref_total_n <2:
|
|
315
|
+
# single-ref mode
|
|
316
|
+
marker_shape = region_marker_shapes[sumstats.loc[lead_id,"SHAPE"]]
|
|
317
|
+
else:
|
|
318
|
+
# multi-ref mode
|
|
319
|
+
marker_shape = region_marker_shapes[sumstats.loc[lead_id,"SHAPE"]-1]
|
|
320
|
+
|
|
311
321
|
if lead_id is not None:
|
|
312
322
|
ax1.scatter(sumstats.loc[lead_id,"i"],sumstats.loc[lead_id,"scaled_P"],
|
|
313
323
|
color=lead_color,
|
|
314
324
|
zorder=3,
|
|
315
|
-
marker=
|
|
316
|
-
s=marker_size[1]
|
|
325
|
+
marker= marker_shape,
|
|
326
|
+
s=marker_size[1]*1.5,
|
|
317
327
|
edgecolor="black")
|
|
318
328
|
|
|
319
329
|
return ax1, lead_id
|
|
@@ -322,14 +332,15 @@ def _add_region_title(region_title, ax1,region_title_args):
|
|
|
322
332
|
ax1.text(0.015,0.97, region_title, transform=ax1.transAxes, va="top", ha="left", region_ref=None, **region_title_args )
|
|
323
333
|
return ax1
|
|
324
334
|
|
|
325
|
-
def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_index_dic,palette =None, position=1):
|
|
335
|
+
def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_index_dic,region_marker_shapes,fig, palette =None, position=1):
|
|
326
336
|
|
|
327
337
|
width_pct = "11%"
|
|
328
338
|
height_pct = "{}%".format( 14 + 7 * len(region_ref))
|
|
329
339
|
axins1 = inset_axes(ax1,
|
|
330
340
|
width=width_pct, # width = 50% of parent_bbox width
|
|
331
341
|
height=height_pct, # height : 5%
|
|
332
|
-
loc='upper right',
|
|
342
|
+
loc='upper right',
|
|
343
|
+
axes_kwargs={"frameon":True,"facecolor":"white","zorder":999999})
|
|
333
344
|
|
|
334
345
|
ld_ticks = [0]+region_ld_threshold+[1]
|
|
335
346
|
|
|
@@ -345,7 +356,7 @@ def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_ind
|
|
|
345
356
|
a = Rectangle((x,y),width, height, fill = True, color = hex_color , linewidth = 2)
|
|
346
357
|
#patches.append(a)
|
|
347
358
|
axins1.add_patch(a)
|
|
348
|
-
|
|
359
|
+
|
|
349
360
|
# y snpid
|
|
350
361
|
yticks_position = 0.1 + 0.2 *np.arange(0,len(region_ref))
|
|
351
362
|
axins1.set_yticks(yticks_position, ["{}".format(x) for x in region_ref])
|
|
@@ -354,9 +365,31 @@ def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_ind
|
|
|
354
365
|
# x ld thresholds
|
|
355
366
|
axins1.set_xticks(ticks=ld_ticks)
|
|
356
367
|
axins1.set_xticklabels([str(i) for i in ld_ticks])
|
|
357
|
-
axins1.set_xlim(0,1)
|
|
358
368
|
|
|
369
|
+
xmin, xmax = 0, 1
|
|
370
|
+
axins1.set_xlim(xmin,xmax)
|
|
371
|
+
|
|
372
|
+
############### ##############plot marker ############## ##############
|
|
373
|
+
for group_index, ref in enumerate(region_ref):
|
|
374
|
+
x= -0.1
|
|
375
|
+
y= 0.1 + 0.2 * group_index
|
|
376
|
+
|
|
377
|
+
if len(region_ref) <2:
|
|
378
|
+
# single-ref mode
|
|
379
|
+
marker = region_marker_shapes[group_index+1]
|
|
380
|
+
else:
|
|
381
|
+
# multi-ref mode
|
|
382
|
+
marker = region_marker_shapes[group_index]
|
|
383
|
+
|
|
384
|
+
# ([x0,y0][x1,y1])
|
|
385
|
+
data_to_point =(axins1.bbox.get_points()[1][0]-axins1.bbox.get_points()[0][0]) / (xmax - xmin)
|
|
386
|
+
s = data_to_point * 0.075
|
|
387
|
+
c = palette[(region_ref_index_dic[region_ref[group_index]]+1)*100 + len(ld_ticks)-1]
|
|
388
|
+
axins1.scatter(x, y, s=s, marker=marker,c=c, edgecolors="black", linewidths = 1, clip_on=False, zorder=100)
|
|
389
|
+
|
|
390
|
+
axins1.set_xlim(0,1)
|
|
359
391
|
axins1.set_aspect('equal', adjustable='box')
|
|
392
|
+
axins1.tick_params(axis="y", pad=np.sqrt(data_to_point * 0.11))
|
|
360
393
|
axins1.set_title('LD $r^{2}$ with variant',loc="center",y=-0.2)
|
|
361
394
|
cbar = axins1
|
|
362
395
|
return ax1, cbar
|
|
@@ -664,6 +697,9 @@ def process_vcf(sumstats,
|
|
|
664
697
|
sumstats[final_shape_col] = 1
|
|
665
698
|
sumstats[final_rsq_col] = 0.0
|
|
666
699
|
|
|
700
|
+
if len(region_ref)==1:
|
|
701
|
+
sumstats.loc[lead_id, final_shape_col] +=1
|
|
702
|
+
|
|
667
703
|
for i in range(len(region_ref)):
|
|
668
704
|
ld_single = "LD_{}".format(i)
|
|
669
705
|
current_rsq = "RSQ_{}".format(i)
|
|
@@ -672,7 +708,6 @@ def process_vcf(sumstats,
|
|
|
672
708
|
sumstats.loc[a_ngt_b, final_ld_col] = 100 * (i+1) + sumstats.loc[a_ngt_b, ld_single]
|
|
673
709
|
sumstats.loc[a_ngt_b, final_rsq_col] = sumstats.loc[a_ngt_b, current_rsq]
|
|
674
710
|
sumstats.loc[a_ngt_b, final_shape_col] = i + 1
|
|
675
|
-
|
|
676
711
|
####################################################################################################
|
|
677
712
|
log.write("Finished loading reference genotype successfully!", verbose=verbose)
|
|
678
713
|
return sumstats
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import matplotlib.pyplot as plt
|
|
4
|
+
import scipy.stats as ss
|
|
5
|
+
import seaborn as sns
|
|
6
|
+
import gc
|
|
7
|
+
import math
|
|
8
|
+
import scipy.stats as ss
|
|
9
|
+
from matplotlib.patches import Rectangle
|
|
10
|
+
from adjustText import adjust_text
|
|
11
|
+
from gwaslab.viz_aux_save_figure import save_figure
|
|
12
|
+
from gwaslab.util_in_get_sig import getsig
|
|
13
|
+
from gwaslab.util_in_get_sig import annogene
|
|
14
|
+
from gwaslab.g_Log import Log
|
|
15
|
+
from gwaslab.util_in_correct_winnerscurse import wc_correct
|
|
16
|
+
from gwaslab.util_in_correct_winnerscurse import wc_correct_test
|
|
17
|
+
from gwaslab.g_Sumstats import Sumstats
|
|
18
|
+
from gwaslab.io_process_args import _merge_and_sync_dic
|
|
19
|
+
from gwaslab.io_process_args import _extract_kwargs
|
|
20
|
+
|
|
21
|
+
def scatter(df,
|
|
22
|
+
x,
|
|
23
|
+
y,
|
|
24
|
+
mode="0",
|
|
25
|
+
reg_box=None,
|
|
26
|
+
is_reg=True,
|
|
27
|
+
fdr=False,
|
|
28
|
+
allele_match=False,
|
|
29
|
+
r_se=False,
|
|
30
|
+
is_45_helper_line=False,
|
|
31
|
+
plt_args=None,
|
|
32
|
+
xylabel_prefix="Per-allele effect size in ",
|
|
33
|
+
helper_line_args=None,
|
|
34
|
+
font_args=None,
|
|
35
|
+
fontargs=None,
|
|
36
|
+
build="19",
|
|
37
|
+
r_or_r2="r",
|
|
38
|
+
err_kwargs=None,
|
|
39
|
+
legend_args=None,
|
|
40
|
+
log = Log(),
|
|
41
|
+
save=False,
|
|
42
|
+
reg_xmin=None,
|
|
43
|
+
verbose=True,
|
|
44
|
+
save_args=None,
|
|
45
|
+
scatter_kwargs=None,
|
|
46
|
+
font_kwargs=None,
|
|
47
|
+
plt_kwargs=None,
|
|
48
|
+
null_beta=0,
|
|
49
|
+
engine="plt",
|
|
50
|
+
**kwargs):
|
|
51
|
+
|
|
52
|
+
if save_args is None:
|
|
53
|
+
save_args = {"dpi":300,"facecolor":"white"}
|
|
54
|
+
if reg_box is None:
|
|
55
|
+
reg_box = dict(boxstyle='round', facecolor='white', alpha=1,edgecolor="None")
|
|
56
|
+
if err_kwargs is None:
|
|
57
|
+
err_kwargs={"ecolor":"#cccccc","elinewidth":1}
|
|
58
|
+
if font_kwargs is None:
|
|
59
|
+
font_kwargs={'fontsize':12,'family':'sans','fontname':'Arial'}
|
|
60
|
+
if helper_line_args is None:
|
|
61
|
+
helper_line_args={"color":'black', "linestyle":'-',"lw":1}
|
|
62
|
+
if plt_kwargs is None:
|
|
63
|
+
plt_kwargs={"figsize":(8,8),"dpi":300}
|
|
64
|
+
if scatter_kwargs is None:
|
|
65
|
+
scatter_kwargs={"s":20}
|
|
66
|
+
if reg_xmin is None:
|
|
67
|
+
reg_xmin = df[x].min()
|
|
68
|
+
|
|
69
|
+
save_kwargs = _extract_kwargs("save", save_args, locals())
|
|
70
|
+
err_kwargs = _extract_kwargs("err", err_kwargs, locals())
|
|
71
|
+
plt_kwargs = _extract_kwargs("plt", plt_kwargs, locals())
|
|
72
|
+
scatter_kwargs = _extract_kwargs("scatter", scatter_kwargs, locals())
|
|
73
|
+
font_kwargs = _extract_kwargs("font",font_kwargs, locals())
|
|
74
|
+
|
|
75
|
+
log.write("Start to create scatter plot...", verbose=verbose)
|
|
76
|
+
fig,ax = plt.subplots(**plt_kwargs)
|
|
77
|
+
|
|
78
|
+
# plot x=0,y=0, and a 45 degree line
|
|
79
|
+
xl,xh=ax.get_xlim()
|
|
80
|
+
yl,yh=ax.get_ylim()
|
|
81
|
+
|
|
82
|
+
#ax.axhline(y=0, zorder=1,**helper_line_args)
|
|
83
|
+
#ax.axvline(x=0, zorder=1,**helper_line_args)
|
|
84
|
+
|
|
85
|
+
#for spine in ['top', 'right']:
|
|
86
|
+
# ax.spines[spine].set_visible(False)
|
|
87
|
+
|
|
88
|
+
log.write(" -Creating scatter plot : {} - {}...".format(x, y), verbose=verbose)
|
|
89
|
+
if engine=="plt":
|
|
90
|
+
ax.scatter(df[x],df[y],**scatter_kwargs)
|
|
91
|
+
elif engine=="sns":
|
|
92
|
+
sns.scatterplot(data=df,x=x,y=y,ax=ax,**scatter_kwargs)
|
|
93
|
+
###regression line##############################################################################################################################
|
|
94
|
+
ax, reg = confire_regression_line(x, y,
|
|
95
|
+
is_reg,
|
|
96
|
+
reg_box,
|
|
97
|
+
df,
|
|
98
|
+
ax,
|
|
99
|
+
mode,
|
|
100
|
+
xl,
|
|
101
|
+
yl,
|
|
102
|
+
xh,
|
|
103
|
+
yh,
|
|
104
|
+
null_beta,
|
|
105
|
+
r_se,
|
|
106
|
+
is_45_helper_line,
|
|
107
|
+
helper_line_args,
|
|
108
|
+
font_kwargs,
|
|
109
|
+
log,
|
|
110
|
+
verbose, reg_xmin)
|
|
111
|
+
|
|
112
|
+
save_figure(fig = fig, save = save, keyword="scatter", save_args=save_args, log = log, verbose=verbose)
|
|
113
|
+
|
|
114
|
+
return fig, ax, reg
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def confire_regression_line(x, y, is_reg, reg_box, df, ax, mode,xl,yl,xh,yh, null_beta, r_se,
|
|
118
|
+
is_45_helper_line,helper_line_args, font_kwargs,
|
|
119
|
+
log, verbose, reg_xmin):
|
|
120
|
+
# if N <3
|
|
121
|
+
if len(df)<3:
|
|
122
|
+
is_reg=False
|
|
123
|
+
|
|
124
|
+
if is_reg is True:
|
|
125
|
+
# reg
|
|
126
|
+
# slope, intercept, r, p, slope_se, intercept_se
|
|
127
|
+
if mode=="0":
|
|
128
|
+
reg = ss.linregress(df[x],df[y])
|
|
129
|
+
# estimate se for r
|
|
130
|
+
if r_se==True:
|
|
131
|
+
log.write(" -Estimating SE for rsq using Jackknife method.", verbose=verbose)
|
|
132
|
+
r_se_jackknife = jackknife_r(df,x,y,log,verbose)
|
|
133
|
+
r_se_jackknife_string = " ({:.2f})".format(r_se_jackknife)
|
|
134
|
+
else:
|
|
135
|
+
r_se_jackknife_string= ""
|
|
136
|
+
else:
|
|
137
|
+
reg = ss.linregress(df[x],df[y])
|
|
138
|
+
r_se_jackknife_string= ""
|
|
139
|
+
|
|
140
|
+
#### calculate p values based on selected value , default = 0
|
|
141
|
+
create_reg_log(reg, log, verbose)
|
|
142
|
+
|
|
143
|
+
reg_string = create_reg_string(reg,
|
|
144
|
+
r_se_jackknife_string)
|
|
145
|
+
|
|
146
|
+
ax.text(0.99,0.01, reg_string, va="bottom",ha="right",transform=ax.transAxes,bbox=reg_box,**font_kwargs)
|
|
147
|
+
|
|
148
|
+
ax = create_helper_line(ax, reg[0], is_45_helper_line, helper_line_args, reg_xmin=reg_xmin)
|
|
149
|
+
ax = create_reg_line(ax, reg, reg_xmin=reg_xmin)
|
|
150
|
+
|
|
151
|
+
return ax, reg
|
|
152
|
+
|
|
153
|
+
#############################################################################################################################################################################
|
|
154
|
+
def create_reg_log(reg,log, verbose):
|
|
155
|
+
#t_score = (reg[0]-null_beta) / reg[4]
|
|
156
|
+
#degree = len(df.dropna())-2
|
|
157
|
+
p = reg[3]
|
|
158
|
+
#ss.t.sf(abs(t_score), df=degree)*2
|
|
159
|
+
log.write(" -Beta = ", reg[0], verbose=verbose)
|
|
160
|
+
log.write(" -Beta_se = ", reg[4], verbose=verbose)
|
|
161
|
+
log.write(" -H0 beta = 0",", default p = ", "{:.2e}".format(reg[3]), verbose=verbose)
|
|
162
|
+
log.write(" -Peason correlation coefficient = ", "{:.2f}".format(reg[2]), verbose=verbose)
|
|
163
|
+
log.write(" -r2 = ", "{:.2f}".format(reg[2]**2), verbose=verbose)
|
|
164
|
+
|
|
165
|
+
def create_helper_line(ax,
|
|
166
|
+
slope,
|
|
167
|
+
is_45_helper_line,
|
|
168
|
+
helper_line_args,
|
|
169
|
+
reg_xmin=0):
|
|
170
|
+
|
|
171
|
+
if is_45_helper_line is True:
|
|
172
|
+
xl,xh=ax.get_xlim()
|
|
173
|
+
yl,yh=ax.get_ylim()
|
|
174
|
+
if slope >0:
|
|
175
|
+
ax.axline([min(xl,yl),min(xl,yl)], [max(xh, yh),max(xh, yh)],zorder=1,**helper_line_args)
|
|
176
|
+
else:
|
|
177
|
+
ax.axline([min(xl,yl),-min(xl,yl)], [max(xh, yh),-max(xh, yh)],zorder=1,**helper_line_args)
|
|
178
|
+
|
|
179
|
+
return ax
|
|
180
|
+
|
|
181
|
+
def create_reg_line(ax, reg, reg_xmin=0):
|
|
182
|
+
xy1 = (reg_xmin,reg[0]*reg_xmin+reg[1])
|
|
183
|
+
ax.axline(xy1=xy1,slope=reg[0],color="#cccccc",linestyle='--',zorder=1)
|
|
184
|
+
return ax
|
|
185
|
+
|
|
186
|
+
def create_reg_string(reg,
|
|
187
|
+
r_se_jackknife_string):
|
|
188
|
+
p = reg[2]
|
|
189
|
+
try:
|
|
190
|
+
p12=str("{:.2e}".format(p)).split("e")[0]
|
|
191
|
+
pe =str(int("{:.2e}".format(p).split("e")[1]))
|
|
192
|
+
except:
|
|
193
|
+
p12="0"
|
|
194
|
+
pe="0"
|
|
195
|
+
|
|
196
|
+
p_text="$p = " + p12 + " \\times 10^{"+pe+"}$"
|
|
197
|
+
p_latex= f'{p_text}'
|
|
198
|
+
|
|
199
|
+
reg_string = "$y =$ "+"{:.2f}".format(reg[1]) +" $+$ "+ "{:.2f}".format(reg[0])+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string
|
|
200
|
+
|
|
201
|
+
return reg_string
|
|
202
|
+
|
|
203
|
+
def jackknife_r(df,x,y,log,verbose):
|
|
204
|
+
"""Jackknife estimation of se for rsq
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
# dropna
|
|
208
|
+
df_nona = df.loc[:,[x,y]].dropna()
|
|
209
|
+
# non-empty entries
|
|
210
|
+
n=len(df)
|
|
211
|
+
# assign row number
|
|
212
|
+
df_nona["_NROW"] = range(n)
|
|
213
|
+
# a list to store r2
|
|
214
|
+
r_list=[]
|
|
215
|
+
# estimate r
|
|
216
|
+
for i in range(n):
|
|
217
|
+
# exclude 1 record
|
|
218
|
+
records_to_use = df_nona["_NROW"]!=i
|
|
219
|
+
# estimate r
|
|
220
|
+
reg_jackknife = ss.linregress(df_nona.loc[records_to_use, x],df_nona.loc[records_to_use,y])
|
|
221
|
+
# add r_i to list
|
|
222
|
+
r_list.append(reg_jackknife[2])
|
|
223
|
+
|
|
224
|
+
# convert list to array
|
|
225
|
+
rs = np.array(r_list)
|
|
226
|
+
# https://en.wikipedia.org/wiki/Jackknife_resampling
|
|
227
|
+
r_se = np.sqrt( (n-1)/n * np.sum((rs - np.mean(rs))**2) )
|
|
228
|
+
log.write(" -R se (jackknife) = {:.2e}".format(r_se), verbose=verbose)
|
|
229
|
+
return r_se
|
|
@@ -143,7 +143,7 @@ def plot_stacked_mqq(objects,
|
|
|
143
143
|
mqq_args_for_each_plot = _sort_args(mqq_args, n_plot)
|
|
144
144
|
##########################################################################################################################################
|
|
145
145
|
# get x axis dict
|
|
146
|
-
if mode=="m":
|
|
146
|
+
if mode=="m" or mode=="r":
|
|
147
147
|
_posdiccul = _get_chrom_dic(sumstats_list,chrom="CHR",pos="POS",chrpad=0.02)
|
|
148
148
|
else:
|
|
149
149
|
_posdiccul=None
|
gwaslab/viz_plot_trumpetplot.py
CHANGED
|
@@ -281,7 +281,7 @@ def plottrumpet(mysumstats,
|
|
|
281
281
|
sumstats["ABS_BETA"] = sumstats[beta].abs()
|
|
282
282
|
|
|
283
283
|
##################################################################################################
|
|
284
|
-
size_norm = (sumstats[
|
|
284
|
+
size_norm = (sumstats[size].min(), sumstats[size].max())
|
|
285
285
|
## if highlight ##################################################################################################
|
|
286
286
|
dots = sns.scatterplot(data=sumstats,
|
|
287
287
|
x=maf,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: gwaslab
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.2
|
|
4
4
|
Summary: A collection of handy tools for GWAS SumStats
|
|
5
5
|
Author-email: Yunye <yunye@gwaslab.com>
|
|
6
6
|
Project-URL: Homepage, https://cloufield.github.io/gwaslab/
|
|
@@ -20,7 +20,7 @@ Requires-Dist: scipy>=1.12
|
|
|
20
20
|
Requires-Dist: pySAM==0.22.1
|
|
21
21
|
Requires-Dist: Biopython>=1.79
|
|
22
22
|
Requires-Dist: adjustText<=0.8,>=0.7.3
|
|
23
|
-
Requires-Dist: liftover
|
|
23
|
+
Requires-Dist: liftover<=1.3.1,>=1.1.13
|
|
24
24
|
Requires-Dist: scikit-allel>=1.3.5
|
|
25
25
|
Requires-Dist: pyensembl==2.2.3
|
|
26
26
|
Requires-Dist: gtfparse==1.3.0
|
|
@@ -51,7 +51,7 @@ Warning: Known issues of GWASLab are summarized in [https://cloufield.github.io/
|
|
|
51
51
|
### install via pip
|
|
52
52
|
|
|
53
53
|
```
|
|
54
|
-
pip install gwaslab==3.
|
|
54
|
+
pip install gwaslab==3.5.0
|
|
55
55
|
```
|
|
56
56
|
|
|
57
57
|
```python
|
|
@@ -1,22 +1,23 @@
|
|
|
1
|
-
gwaslab/__init__.py,sha256=
|
|
1
|
+
gwaslab/__init__.py,sha256=pP_OQwkaXMJokVVU_o6AXnJEBs2HtaMtpcHIls3ezO8,2486
|
|
2
2
|
gwaslab/bd_common_data.py,sha256=2voBqMrIsII1TN5T6uvyDax90fWcJK1Stmo1ZHNGGsE,13898
|
|
3
3
|
gwaslab/bd_config.py,sha256=TP-r-DPhJD3XnRYZbw9bQHXaDIkiRgK8bG9HCt-UaLc,580
|
|
4
4
|
gwaslab/bd_download.py,sha256=cDDk2C5IvjeAzvPvVYGTkI4Ss33DUtEDjGo8eAbQRvY,15663
|
|
5
|
-
gwaslab/bd_get_hapmap3.py,sha256=
|
|
5
|
+
gwaslab/bd_get_hapmap3.py,sha256=FQpwbhWUPFT152QtiLevEkkN4YcVDIeKzoK0Uz1NlRo,4108
|
|
6
6
|
gwaslab/cache_manager.py,sha256=HOTnSkCOyGEPLRl90WT8D_6pAdI8d8AzenMIDGuCeWc,28113
|
|
7
7
|
gwaslab/g_Log.py,sha256=C3Zv-_6c3C9ms8bgQ-ytplz22sjk7euqXYkWr9zNeAs,1573
|
|
8
8
|
gwaslab/g_Phenotypes.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
gwaslab/g_Sumstats.py,sha256=
|
|
9
|
+
gwaslab/g_Sumstats.py,sha256=eqEpHEH5fnBMsOIufVzwaRp0_vCsuHvGEUe5OzNL41s,36969
|
|
10
10
|
gwaslab/g_SumstatsPair.py,sha256=20snPb4SlI6ftMGVjgxAuyxsxYRQF-GzzlBSnoB-3Lo,8851
|
|
11
11
|
gwaslab/g_SumstatsT.py,sha256=u_DighLMnMxwTLnqm-B58pA0G6WXRj6pudPyKMVKjSU,2133
|
|
12
12
|
gwaslab/g_Sumstats_summary.py,sha256=FECvvFXJVKaCX5dggBvvk9YvJ6AbdbcLfjltysX7wEE,6380
|
|
13
13
|
gwaslab/g_meta.py,sha256=htWlgURWclm9R6UqFcX1a93WN27xny7lGUeyJZOtszQ,2583
|
|
14
14
|
gwaslab/g_vchange_status.py,sha256=w3zsYYOcCaI3PTeboonvkQjudzUAfVIgATzRdiPViZs,1939
|
|
15
|
-
gwaslab/g_version.py,sha256=
|
|
15
|
+
gwaslab/g_version.py,sha256=GO-TBEpIUgE6esSkU-I4E8yS1MrCnzKwbYKM7htcJcw,1885
|
|
16
16
|
gwaslab/hm_casting.py,sha256=FqP4EQl83Q2OKLw004OgLIvUH795TVCGwziLk5jsHqY,11368
|
|
17
17
|
gwaslab/hm_harmonize_sumstats.py,sha256=_sZ8soikAxDokw-dcr_CLguBB8OmTmPPS04MfmsJc_Q,79509
|
|
18
18
|
gwaslab/hm_rsid_to_chrpos.py,sha256=ODWREO0jPN0RAfNzL5fRzSRANfhiksOvUVPuEsFZQqA,6552
|
|
19
19
|
gwaslab/io_preformat_input.py,sha256=J8Ny4OPMaLVdo2nP8lTM-c5A8LSdqphSrp9G4i9JjDQ,24097
|
|
20
|
+
gwaslab/io_process_args.py,sha256=KnQWMBwEZjQpCsVMKPrR1qQzeXCg542YdXsP0KwKat8,906
|
|
20
21
|
gwaslab/io_read_ldsc.py,sha256=wsYXpH50IchBKd2dhYloSqc4YgnDkiwMsAweaCoN5Eo,12471
|
|
21
22
|
gwaslab/io_read_tabular.py,sha256=EG-C6KhCutt4J4LlOMgXnqzJvU-EZXzVhMvaDFnHrMM,2380
|
|
22
23
|
gwaslab/io_to_formats.py,sha256=8FmbQjWUIsz_V1Lb80TuwRIXKBgs5t42j25Znougk1Y,29401
|
|
@@ -28,7 +29,7 @@ gwaslab/ldsc_parse.py,sha256=MBnfgcWlV4oHp9MoDRh1mpilaHhAR15Af77hMFn4-5k,10564
|
|
|
28
29
|
gwaslab/ldsc_regressions.py,sha256=yzbGjgNV7u-SWXNPsh9S8y9mK97Bim_Nmad9G9V18ZU,30078
|
|
29
30
|
gwaslab/ldsc_sumstats.py,sha256=O0olsDxKlh1MJ1gAuEN1t40rxhajOEwOQ20ak7xoDrI,26245
|
|
30
31
|
gwaslab/qc_check_datatype.py,sha256=kW68uk4dTLOU2b1dHoVat6n0loundDysAjIqxsXW28Q,3379
|
|
31
|
-
gwaslab/qc_fix_sumstats.py,sha256=
|
|
32
|
+
gwaslab/qc_fix_sumstats.py,sha256=u0YfC70zop2roUfq6mLMNL49m8AHPF2G-j8dKqW25yY,98261
|
|
32
33
|
gwaslab/run_script.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
34
|
gwaslab/util_abf_finemapping.py,sha256=LRcopjtkT-iXtKPAJIzR4qjPdhD7nrS_BGit4EW89FM,3054
|
|
34
35
|
gwaslab/util_ex_calculate_ldmatrix.py,sha256=Z_spxbq6SHDS0v84I59YTTF40iyLQIOZbt0dmEcNJjw,15417
|
|
@@ -53,24 +54,27 @@ gwaslab/util_in_get_density.py,sha256=kpKXH69acMkeYVG5vs-VbJC3COhmuLBfYco-wuOxgj
|
|
|
53
54
|
gwaslab/util_in_get_sig.py,sha256=53NOh7KueLY3vJPTNhhb37KPAIgLEfcP3k2zIV61lc4,39845
|
|
54
55
|
gwaslab/util_in_meta.py,sha256=5K9lIZcIgUy0AERqHy1GvMN2X6dp45JUUgopuDLgt4o,11284
|
|
55
56
|
gwaslab/util_in_snphwe.py,sha256=-KpIDx6vn_nah6H55IkV2OyjXQVXV13XyBL069WE1wM,1751
|
|
56
|
-
gwaslab/
|
|
57
|
+
gwaslab/vis_plot_credible sets.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
|
+
gwaslab/viz_aux_annotate_plot.py,sha256=gA-s8a90dsl3MB5CIapdI_DecD9h2FmuqMgy07kMYJI,25207
|
|
57
59
|
gwaslab/viz_aux_chromatin.py,sha256=7cGmej5EkKO7fxR1b5w8r1oRRl9ofVzFRG52SCYWtz0,4109
|
|
58
|
-
gwaslab/
|
|
60
|
+
gwaslab/viz_aux_property.py,sha256=UIaivghnLXYpTwkKnXRK0F28Jbn9L6OaICk3K73WZaU,33
|
|
61
|
+
gwaslab/viz_aux_quickfix.py,sha256=cGX5i3WBmvKIiqck8V00caDg-pvKOO709Ux3DBXsUrM,18693
|
|
59
62
|
gwaslab/viz_aux_reposition_text.py,sha256=iRIP-Rkltlei068HekJcVubiqPrunBqvAoSQ1eHk04M,4304
|
|
60
|
-
gwaslab/viz_aux_save_figure.py,sha256=
|
|
63
|
+
gwaslab/viz_aux_save_figure.py,sha256=x_b4DlTSmHJddfQgoYoReCi4QQbQEtcwCWTKfGetfTA,2768
|
|
61
64
|
gwaslab/viz_plot_compare_af.py,sha256=qtXW45-Sq_ugK8ZfqBYMpmf58SKi3lB3YyHnzn_akcE,5344
|
|
62
|
-
gwaslab/viz_plot_compare_effect.py,sha256=
|
|
65
|
+
gwaslab/viz_plot_compare_effect.py,sha256=kq-rVWygHEeTBMOtd_jk8nK85ClZHU-ADSf4nI2gTKo,66604
|
|
63
66
|
gwaslab/viz_plot_forestplot.py,sha256=xgOnefh737CgdQxu5naVyRNBX1NQXPFKzf51fbh6afs,6771
|
|
64
67
|
gwaslab/viz_plot_miamiplot.py,sha256=rCFEp7VNuVqeBBG3WRkmFAtFklbF79BvIQQYiSY70VY,31238
|
|
65
68
|
gwaslab/viz_plot_miamiplot2.py,sha256=xiFCgFX8hEySmCJORpEurMVER9eEXQyy_Ik7mLkbi9g,16015
|
|
66
|
-
gwaslab/viz_plot_mqqplot.py,sha256=
|
|
67
|
-
gwaslab/viz_plot_phe_heatmap.py,sha256=
|
|
69
|
+
gwaslab/viz_plot_mqqplot.py,sha256=mfmHseYHIFoEfSKBX46ps6abSQ6t9xDNahLDLLj4K8I,67924
|
|
70
|
+
gwaslab/viz_plot_phe_heatmap.py,sha256=qoXVeFTIm-n8IinNbDdPFVBSz2yGCGK6QzTstXv6aj4,9532
|
|
68
71
|
gwaslab/viz_plot_qqplot.py,sha256=psQgVpP29686CEZkzQz0iRbApzqy7aE3GGiBcazVvNw,7247
|
|
69
|
-
gwaslab/viz_plot_regional2.py,sha256=
|
|
72
|
+
gwaslab/viz_plot_regional2.py,sha256=g9cGI5sPAH32WR0ICvZB1wMG5butMgpdxYtxZUfG6fE,38314
|
|
70
73
|
gwaslab/viz_plot_regionalplot.py,sha256=8u-5-yfy-UaXhaxVVz3Y5k2kBAoqzczUw1hyyD450iI,37983
|
|
71
74
|
gwaslab/viz_plot_rg_heatmap.py,sha256=PidUsgOiEVt6MfBPCF3_yDhOEytZ-I1q-ZD6_0pFrV4,13713
|
|
72
|
-
gwaslab/
|
|
73
|
-
gwaslab/
|
|
75
|
+
gwaslab/viz_plot_scatter_with_reg.py,sha256=PmUZDQl2q4Dme3HLPXEwf_TrMjwJADA-uFXNDBWUEa4,8333
|
|
76
|
+
gwaslab/viz_plot_stackedregional.py,sha256=HfNUhwxevbwSoauE0ysG020U7YFVy4111nkIWdaJ4Q8,16664
|
|
77
|
+
gwaslab/viz_plot_trumpetplot.py,sha256=uuEdHNr2ZBXJkOMA6uu0OzKFh0de-BxOnPsUAYqwqOU,42660
|
|
74
78
|
gwaslab/data/formatbook.json,sha256=N2nJs80HH98Rsu9FxaSvIQO9J5yIV97WEtAKjRqYwiY,38207
|
|
75
79
|
gwaslab/data/reference.json,sha256=IrjwFnXjrpVUp3zYfcYClpibJE9Y-94gtrC1Aw8sXxg,12332
|
|
76
80
|
gwaslab/data/chrx_par/chrx_par_hg19.bed.gz,sha256=LocZg_ozhZjQiIpgWCO4EYCW9xgkEKpRy1m-YdIpzQs,83
|
|
@@ -79,9 +83,9 @@ gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz,sha256=qD9RsC5S2h6l-OdpW
|
|
|
79
83
|
gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz,sha256=Y8ZT2FIAhbhlgCJdE9qQVAiwnV_fcsPt72usBa7RSBM,10225828
|
|
80
84
|
gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz,sha256=R7IkssKu0L4WwkU9SrS84xCMdrkkKL0gnTNO_OKbG0Y,219
|
|
81
85
|
gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz,sha256=76CIU0pibDJ72Y6UY-TbIKE9gEPwTELAaIbCXyjm80Q,470
|
|
82
|
-
gwaslab-3.5.
|
|
83
|
-
gwaslab-3.5.
|
|
84
|
-
gwaslab-3.5.
|
|
85
|
-
gwaslab-3.5.
|
|
86
|
-
gwaslab-3.5.
|
|
87
|
-
gwaslab-3.5.
|
|
86
|
+
gwaslab-3.5.2.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
87
|
+
gwaslab-3.5.2.dist-info/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
|
|
88
|
+
gwaslab-3.5.2.dist-info/METADATA,sha256=KYa_HwVvvyGlZeookgHHJAUgfde0d5YqitViCSmEU8M,7758
|
|
89
|
+
gwaslab-3.5.2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
90
|
+
gwaslab-3.5.2.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
|
|
91
|
+
gwaslab-3.5.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|