gwaslab 3.4.37__py3-none-any.whl → 3.4.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/bd_common_data.py +6 -3
- gwaslab/bd_download.py +9 -9
- gwaslab/bd_get_hapmap3.py +43 -9
- gwaslab/data/formatbook.json +722 -721
- gwaslab/g_Log.py +22 -5
- gwaslab/g_Sumstats.py +110 -163
- gwaslab/g_SumstatsPair.py +76 -25
- gwaslab/g_SumstatsT.py +2 -2
- gwaslab/g_Sumstats_summary.py +3 -3
- gwaslab/g_version.py +10 -10
- gwaslab/hm_casting.py +36 -17
- gwaslab/hm_harmonize_sumstats.py +354 -221
- gwaslab/hm_rsid_to_chrpos.py +1 -1
- gwaslab/io_preformat_input.py +49 -43
- gwaslab/io_read_ldsc.py +49 -1
- gwaslab/io_to_formats.py +428 -295
- gwaslab/ldsc_irwls.py +198 -0
- gwaslab/ldsc_jackknife.py +514 -0
- gwaslab/ldsc_ldscore.py +417 -0
- gwaslab/ldsc_parse.py +294 -0
- gwaslab/ldsc_regressions.py +747 -0
- gwaslab/ldsc_sumstats.py +629 -0
- gwaslab/qc_check_datatype.py +3 -3
- gwaslab/qc_fix_sumstats.py +891 -778
- gwaslab/util_ex_calculate_ldmatrix.py +31 -13
- gwaslab/util_ex_gwascatalog.py +25 -25
- gwaslab/util_ex_ldproxyfinder.py +10 -10
- gwaslab/util_ex_ldsc.py +189 -0
- gwaslab/util_ex_process_ref.py +3 -3
- gwaslab/util_ex_run_coloc.py +26 -4
- gwaslab/util_in_calculate_gc.py +6 -6
- gwaslab/util_in_calculate_power.py +42 -43
- gwaslab/util_in_convert_h2.py +8 -8
- gwaslab/util_in_fill_data.py +30 -30
- gwaslab/util_in_filter_value.py +201 -74
- gwaslab/util_in_get_density.py +10 -10
- gwaslab/util_in_get_sig.py +445 -71
- gwaslab/viz_aux_annotate_plot.py +12 -12
- gwaslab/viz_aux_quickfix.py +42 -37
- gwaslab/viz_aux_reposition_text.py +10 -7
- gwaslab/viz_aux_save_figure.py +18 -8
- gwaslab/viz_plot_compare_af.py +32 -33
- gwaslab/viz_plot_compare_effect.py +63 -71
- gwaslab/viz_plot_miamiplot2.py +34 -26
- gwaslab/viz_plot_mqqplot.py +126 -75
- gwaslab/viz_plot_qqplot.py +11 -8
- gwaslab/viz_plot_regionalplot.py +36 -33
- gwaslab/viz_plot_rg_heatmap.py +28 -26
- gwaslab/viz_plot_stackedregional.py +40 -21
- gwaslab/viz_plot_trumpetplot.py +65 -61
- gwaslab-3.4.39.dist-info/LICENSE +674 -0
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/METADATA +5 -4
- gwaslab-3.4.39.dist-info/RECORD +80 -0
- gwaslab-3.4.37.dist-info/RECORD +0 -72
- /gwaslab-3.4.37.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0
gwaslab/viz_plot_mqqplot.py
CHANGED
|
@@ -174,6 +174,8 @@ def mqqplot(insumstats,
|
|
|
174
174
|
include_chrXYMT = True,
|
|
175
175
|
ylim=None,
|
|
176
176
|
xpad=None,
|
|
177
|
+
xpadl=None,
|
|
178
|
+
xpadr=None,
|
|
177
179
|
chrpad=0.03,
|
|
178
180
|
drop_chr_start=False,
|
|
179
181
|
title =None,
|
|
@@ -213,7 +215,6 @@ def mqqplot(insumstats,
|
|
|
213
215
|
chr_dict = get_chr_to_number()
|
|
214
216
|
if xtick_chr_dict is None:
|
|
215
217
|
xtick_chr_dict = get_number_to_chr()
|
|
216
|
-
|
|
217
218
|
if gtf_chr_dict is None:
|
|
218
219
|
gtf_chr_dict = get_number_to_chr()
|
|
219
220
|
if rr_chr_dict is None:
|
|
@@ -304,40 +305,42 @@ def mqqplot(insumstats,
|
|
|
304
305
|
scatter_args["rasterized"]=True
|
|
305
306
|
qq_scatter_args["rasterized"]=True
|
|
306
307
|
|
|
307
|
-
|
|
308
|
-
|
|
308
|
+
log.write("Start to create MQQ plot...{}:".format(_get_version()),verbose=verbose)
|
|
309
|
+
log.write(" -Genomic coordinates version: {}...".format(build),verbose=verbose)
|
|
309
310
|
if build is None or build=="99":
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
311
|
+
log.warning("Genomic coordinates version is unknown.")
|
|
312
|
+
log.write(" -Genome-wide significance level to plot is set to "+str(sig_level_plot)+" ...",verbose=verbose)
|
|
313
|
+
log.write(" -Raw input contains "+str(len(insumstats))+" variants...",verbose=verbose)
|
|
314
|
+
log.write(" -MQQ plot layout mode is : "+mode,verbose=verbose)
|
|
315
|
+
|
|
314
316
|
if len(anno_set)>0 and ("m" in mode):
|
|
315
|
-
|
|
317
|
+
log.write(" -Variants to annotate : "+",".join(anno_set),verbose=verbose)
|
|
318
|
+
|
|
316
319
|
if len(highlight)>0 and ("m" in mode):
|
|
317
320
|
if pd.api.types.is_list_like(highlight[0]):
|
|
318
321
|
if highlight_chrpos==False:
|
|
319
|
-
if len(highlight
|
|
320
|
-
log.
|
|
322
|
+
if len(highlight) != len(highlight_color):
|
|
323
|
+
log.warning("Number of locus groups in the list does not match number of provided colors.")
|
|
321
324
|
for i, highlight_set in enumerate(highlight):
|
|
322
|
-
|
|
325
|
+
log.write(" -Set {} loci to highlight ({}) : ".format(i+1, highlight_color[i%len(highlight_color)])+",".join(highlight_set),verbose=verbose)
|
|
323
326
|
else:
|
|
324
|
-
|
|
325
|
-
|
|
327
|
+
log.write(" -Loci to highlight ({}): {}".format(highlight_color,highlight),verbose=verbose)
|
|
328
|
+
log.write(" -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
|
|
326
329
|
else:
|
|
327
|
-
|
|
328
|
-
|
|
330
|
+
log.write(" -Loci to highlight ({}): ".format(highlight_color)+",".join(highlight),verbose=verbose)
|
|
331
|
+
log.write(" -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
|
|
329
332
|
|
|
330
333
|
if len(pinpoint)>0 :
|
|
331
334
|
if pd.api.types.is_list_like(pinpoint[0]):
|
|
332
|
-
if len(pinpoint
|
|
333
|
-
log.
|
|
335
|
+
if len(pinpoint) != len(pinpoint_color):
|
|
336
|
+
log.warning("Number of variant groups in the list does not match number of provided colors.")
|
|
334
337
|
for i, pinpoint_set in enumerate(pinpoint):
|
|
335
|
-
|
|
338
|
+
log.write(" -Set {} variants to pinpoint ({}) : ".format(i+1,pinpoint_color[i%len(pinpoint_color)])+",".join(pinpoint_set),verbose=verbose)
|
|
336
339
|
else:
|
|
337
|
-
|
|
340
|
+
log.write(" -Variants to pinpoint ({}) : ".format(pinpoint_color)+",".join(pinpoint),verbose=verbose)
|
|
338
341
|
|
|
339
342
|
if region is not None:
|
|
340
|
-
|
|
343
|
+
log.write(" -Region to plot : chr"+str(region[0])+":"+str(region[1])+"-"+str(region[2])+".",verbose=verbose)
|
|
341
344
|
|
|
342
345
|
# construct line series for coversion
|
|
343
346
|
if additional_line is None:
|
|
@@ -399,7 +402,7 @@ def mqqplot(insumstats,
|
|
|
399
402
|
pinpoint=pinpoint,
|
|
400
403
|
density_color=density_color)
|
|
401
404
|
|
|
402
|
-
sumstats = insumstats
|
|
405
|
+
sumstats = insumstats[usecols].copy()
|
|
403
406
|
|
|
404
407
|
#################################################################################################
|
|
405
408
|
|
|
@@ -408,7 +411,7 @@ def mqqplot(insumstats,
|
|
|
408
411
|
if (anno == "GENENAME"):
|
|
409
412
|
anno_sig=True
|
|
410
413
|
elif (anno is not None) and (anno is not True):
|
|
411
|
-
sumstats["Annotation"]=sumstats
|
|
414
|
+
sumstats["Annotation"]=sumstats[anno].astype("string")
|
|
412
415
|
|
|
413
416
|
## P value
|
|
414
417
|
## m, qq, r
|
|
@@ -432,15 +435,15 @@ def mqqplot(insumstats,
|
|
|
432
435
|
region_start = region[1]
|
|
433
436
|
region_end = region[2]
|
|
434
437
|
marker_size=(25,45)
|
|
435
|
-
|
|
438
|
+
log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]),verbose=verbose)
|
|
436
439
|
|
|
437
|
-
in_region_snp = (sumstats[chrom]==region_chr) &(sumstats[pos]<region_end) &(sumstats[pos]>region_start)
|
|
440
|
+
in_region_snp = (sumstats[chrom]==region_chr) & (sumstats[pos]<region_end) & (sumstats[pos]>region_start)
|
|
438
441
|
|
|
439
|
-
|
|
442
|
+
log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)),verbose=verbose)
|
|
440
443
|
sumstats = sumstats.loc[in_region_snp,:]
|
|
441
444
|
|
|
442
445
|
if len(sumstats)==0:
|
|
443
|
-
log.
|
|
446
|
+
log.warning("No valid data! Please check the input.")
|
|
444
447
|
return None
|
|
445
448
|
|
|
446
449
|
## EAF
|
|
@@ -454,11 +457,11 @@ def mqqplot(insumstats,
|
|
|
454
457
|
sumstats["HUE"] = pd.NA
|
|
455
458
|
sumstats["HUE"] = sumstats["HUE"].astype("Int64")
|
|
456
459
|
|
|
457
|
-
|
|
460
|
+
log.write("Finished loading specified columns from the sumstats.",verbose=verbose)
|
|
458
461
|
|
|
459
462
|
|
|
460
463
|
#sanity check############################################################################################################
|
|
461
|
-
log.write("Start conversion and sanity check:",verbose=verbose)
|
|
464
|
+
log.write("Start data conversion and sanity check:",verbose=verbose)
|
|
462
465
|
|
|
463
466
|
if _if_quick_qc == False:
|
|
464
467
|
log.write(" -Sanity check will be skipped.", verbose=verbose)
|
|
@@ -527,15 +530,19 @@ def mqqplot(insumstats,
|
|
|
527
530
|
lines_to_plot=lines_to_plot,
|
|
528
531
|
log = log)
|
|
529
532
|
except:
|
|
530
|
-
log.
|
|
533
|
+
log.warning("No valid data! Please check the input.")
|
|
531
534
|
return None
|
|
532
535
|
|
|
536
|
+
log.write("Finished data conversion and sanity check.",verbose=verbose)
|
|
537
|
+
|
|
533
538
|
# Manhattan plot ##########################################################################################################
|
|
539
|
+
log.write("Start to create MQQ plot with "+str(len(sumstats))+" variants...",verbose=verbose)
|
|
534
540
|
## regional plot ->rsq
|
|
535
541
|
#calculate rsq]
|
|
536
542
|
if vcf_path is not None:
|
|
537
543
|
if tabix is None:
|
|
538
544
|
tabix = which("tabix")
|
|
545
|
+
log.write(" -tabix will be used: {}".format(tabix),verbose=verbose)
|
|
539
546
|
sumstats = process_vcf(sumstats=sumstats,
|
|
540
547
|
vcf_path=vcf_path,
|
|
541
548
|
region=region,
|
|
@@ -568,8 +575,6 @@ def mqqplot(insumstats,
|
|
|
568
575
|
|
|
569
576
|
if vcf_path is not None:
|
|
570
577
|
sumstats["chr_hue"]=sumstats["LD"]
|
|
571
|
-
|
|
572
|
-
if verbose:log.write("Start to create MQQ plot with "+str(len(sumstats))+" variants:")
|
|
573
578
|
## default seetings
|
|
574
579
|
|
|
575
580
|
palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
|
|
@@ -601,6 +606,7 @@ def mqqplot(insumstats,
|
|
|
601
606
|
## if highlight
|
|
602
607
|
highlight_i = pd.DataFrame()
|
|
603
608
|
if len(highlight) >0:
|
|
609
|
+
log.write(" -Creating background plot...",verbose=verbose)
|
|
604
610
|
plot = sns.scatterplot(data=sumstats, x='i', y='scaled_P',
|
|
605
611
|
hue='chr_hue',
|
|
606
612
|
palette=palette,
|
|
@@ -612,8 +618,7 @@ def mqqplot(insumstats,
|
|
|
612
618
|
zorder=2,ax=ax1,edgecolor=edgecolor, **scatter_args)
|
|
613
619
|
if pd.api.types.is_list_like(highlight[0]) and highlight_chrpos==False:
|
|
614
620
|
for i, highlight_set in enumerate(highlight):
|
|
615
|
-
|
|
616
|
-
print(sumstats["HUE"].dtype)
|
|
621
|
+
log.write(" -Highlighting set {} target loci...".format(i+1),verbose=verbose)
|
|
617
622
|
sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==i], x='i', y='scaled_P',
|
|
618
623
|
hue="HUE",
|
|
619
624
|
palette={i:highlight_color[i%len(highlight_color)]},
|
|
@@ -625,7 +630,7 @@ def mqqplot(insumstats,
|
|
|
625
630
|
zorder=3+i,ax=ax1,edgecolor=edgecolor,**scatter_args)
|
|
626
631
|
highlight_i = sumstats.loc[~sumstats["HUE"].isna(),"i"].values
|
|
627
632
|
else:
|
|
628
|
-
|
|
633
|
+
log.write(" -Highlighting target loci...",verbose=verbose)
|
|
629
634
|
sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==0], x='i', y='scaled_P',
|
|
630
635
|
hue="HUE",
|
|
631
636
|
palette={0:highlight_color},
|
|
@@ -674,6 +679,7 @@ def mqqplot(insumstats,
|
|
|
674
679
|
hue = 'chr_hue'
|
|
675
680
|
hue_norm=None
|
|
676
681
|
to_plot = sumstats
|
|
682
|
+
log.write(" -Creating background plot...",verbose=verbose)
|
|
677
683
|
plot = sns.scatterplot(data=to_plot, x='i', y='scaled_P',
|
|
678
684
|
hue=hue,
|
|
679
685
|
palette= palette,
|
|
@@ -693,17 +699,17 @@ def mqqplot(insumstats,
|
|
|
693
699
|
for i, pinpoint_set in enumerate(pinpoint):
|
|
694
700
|
if sum(sumstats[snpid].isin(pinpoint_set))>0:
|
|
695
701
|
to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint_set),:]
|
|
696
|
-
|
|
702
|
+
log.write(" -Pinpointing set {} target vairants...".format(i+1),verbose=verbose)
|
|
697
703
|
ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color[i%len(pinpoint_color)],zorder=100,s=marker_size[1]+1)
|
|
698
704
|
else:
|
|
699
|
-
|
|
705
|
+
log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
|
|
700
706
|
else:
|
|
701
707
|
if sum(sumstats[snpid].isin(pinpoint))>0:
|
|
702
708
|
to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint),:]
|
|
703
|
-
|
|
709
|
+
log.write(" -Pinpointing target vairants...",verbose=verbose)
|
|
704
710
|
ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color,zorder=100,s=marker_size[1]+1)
|
|
705
711
|
else:
|
|
706
|
-
|
|
712
|
+
log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
|
|
707
713
|
|
|
708
714
|
|
|
709
715
|
|
|
@@ -767,12 +773,15 @@ def mqqplot(insumstats,
|
|
|
767
773
|
lead_snp_i= None
|
|
768
774
|
lead_snp_i2=None
|
|
769
775
|
|
|
776
|
+
log.write("Finished creating MQQ plot successfully!",verbose=verbose)
|
|
777
|
+
|
|
770
778
|
# Get top variants for annotation #######################################################
|
|
779
|
+
log.write("Start to extract variants for annotation...",verbose=verbose)
|
|
771
780
|
if (anno and anno!=True) or (len(anno_set)>0):
|
|
772
781
|
if len(anno_set)>0:
|
|
773
782
|
to_annotate=sumstats.loc[sumstats[snpid].isin(anno_set),:]
|
|
774
783
|
if to_annotate.empty is not True:
|
|
775
|
-
|
|
784
|
+
log.write(" -Found "+str(len(to_annotate))+" specified variants to annotate...",verbose=verbose)
|
|
776
785
|
else:
|
|
777
786
|
to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
|
|
778
787
|
snpid,
|
|
@@ -785,7 +794,7 @@ def mqqplot(insumstats,
|
|
|
785
794
|
mlog10p="scaled_P",
|
|
786
795
|
verbose=False)
|
|
787
796
|
if (to_annotate.empty is not True) and ("b" not in mode):
|
|
788
|
-
|
|
797
|
+
log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
|
|
789
798
|
else:
|
|
790
799
|
to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
|
|
791
800
|
"i",
|
|
@@ -798,7 +807,7 @@ def mqqplot(insumstats,
|
|
|
798
807
|
mlog10p="scaled_P",
|
|
799
808
|
sig_level=sig_level_lead)
|
|
800
809
|
if (to_annotate.empty is not True) and ("b" not in mode):
|
|
801
|
-
|
|
810
|
+
log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
|
|
802
811
|
if (to_annotate.empty is not True) and anno=="GENENAME":
|
|
803
812
|
to_annotate = annogene(to_annotate,
|
|
804
813
|
id=snpid,
|
|
@@ -808,16 +817,21 @@ def mqqplot(insumstats,
|
|
|
808
817
|
build=build,
|
|
809
818
|
source=anno_source,
|
|
810
819
|
verbose=verbose).rename(columns={"GENE":"Annotation"})
|
|
820
|
+
log.write("Finished extracting variants for annotation...",verbose=verbose)
|
|
811
821
|
|
|
812
822
|
# Configure X, Y axes #######################################################
|
|
823
|
+
log.write("Start to process figure arts.",verbose=verbose)
|
|
813
824
|
if region is None:
|
|
814
825
|
# if Manhattan plot
|
|
826
|
+
|
|
815
827
|
ax1 = _process_xtick(ax1=ax1,
|
|
816
828
|
chrom_df=chrom_df,
|
|
817
829
|
xtick_chr_dict=xtick_chr_dict,
|
|
818
830
|
fontsize = fontsize,
|
|
819
|
-
font_family=font_family
|
|
820
|
-
|
|
831
|
+
font_family=font_family,
|
|
832
|
+
log=log,
|
|
833
|
+
verbose=verbose)
|
|
834
|
+
|
|
821
835
|
ax1, ax3 = _process_xlabel(region=region,
|
|
822
836
|
xlabel=xlabel,
|
|
823
837
|
ax1=ax1,
|
|
@@ -825,7 +839,9 @@ def mqqplot(insumstats,
|
|
|
825
839
|
mode=mode,
|
|
826
840
|
fontsize=fontsize,
|
|
827
841
|
font_family=font_family,
|
|
828
|
-
ax3=ax3
|
|
842
|
+
ax3=ax3,
|
|
843
|
+
log=log,
|
|
844
|
+
verbose=verbose)
|
|
829
845
|
|
|
830
846
|
ax1, ax4 = _process_ylabel(ylabel=ylabel,
|
|
831
847
|
ax1=ax1,
|
|
@@ -833,8 +849,11 @@ def mqqplot(insumstats,
|
|
|
833
849
|
bwindowsizekb=bwindowsizekb,
|
|
834
850
|
fontsize=fontsize,
|
|
835
851
|
font_family=font_family,
|
|
836
|
-
ax4=ax4
|
|
852
|
+
ax4=ax4,
|
|
853
|
+
log=log,
|
|
854
|
+
verbose=verbose)
|
|
837
855
|
|
|
856
|
+
|
|
838
857
|
ax1 = _set_yticklabels(cut=cut,
|
|
839
858
|
cutfactor=cutfactor,
|
|
840
859
|
cut_log=cut_log,
|
|
@@ -849,19 +868,28 @@ def mqqplot(insumstats,
|
|
|
849
868
|
font_family=font_family,
|
|
850
869
|
ytick3=ytick3,
|
|
851
870
|
ylabels=ylabels,
|
|
852
|
-
ylabels_converted=ylabels_converted
|
|
853
|
-
|
|
871
|
+
ylabels_converted=ylabels_converted,
|
|
872
|
+
log=log,
|
|
873
|
+
verbose=verbose)
|
|
854
874
|
|
|
855
875
|
ax1, ax4 = _process_ytick(ax1=ax1,
|
|
856
876
|
fontsize=fontsize,
|
|
857
877
|
font_family=font_family,
|
|
858
|
-
ax4=ax4
|
|
878
|
+
ax4=ax4,
|
|
879
|
+
log=log,
|
|
880
|
+
verbose=verbose)
|
|
859
881
|
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
cbar = _process_cbar(cbar,
|
|
882
|
+
# regional plot cbar
|
|
883
|
+
if cbar is not None:
|
|
884
|
+
cbar = _process_cbar(cbar,
|
|
885
|
+
cbar_fontsize=fontsize,
|
|
886
|
+
cbar_font_family=font_family,
|
|
887
|
+
cbar_title=cbar_title,
|
|
888
|
+
log=log,
|
|
889
|
+
verbose=verbose)
|
|
863
890
|
|
|
864
891
|
ax1 = _process_spine(ax1, mode)
|
|
892
|
+
|
|
865
893
|
# genomewide significant line
|
|
866
894
|
ax1 = _process_line(ax1,
|
|
867
895
|
sig_line,
|
|
@@ -874,7 +902,9 @@ def mqqplot(insumstats,
|
|
|
874
902
|
additional_line_color,
|
|
875
903
|
mode,
|
|
876
904
|
bmean,
|
|
877
|
-
bmedian
|
|
905
|
+
bmedian,
|
|
906
|
+
log=log,
|
|
907
|
+
verbose=verbose )
|
|
878
908
|
|
|
879
909
|
|
|
880
910
|
if mtitle and anno and len(to_annotate)>0:
|
|
@@ -882,8 +912,10 @@ def mqqplot(insumstats,
|
|
|
882
912
|
ax1.set_title(mtitle,pad=pad,fontsize=title_fontsize,family=font_family)
|
|
883
913
|
elif mtitle:
|
|
884
914
|
ax1.set_title(mtitle,fontsize=title_fontsize,family=font_family)
|
|
885
|
-
|
|
915
|
+
log.write("Finished processing figure arts.",verbose=verbose)
|
|
916
|
+
|
|
886
917
|
# Add annotation arrows and texts
|
|
918
|
+
log.write("Start to annotate variants...",verbose=verbose)
|
|
887
919
|
ax1 = annotate_single(
|
|
888
920
|
sumstats=sumstats,
|
|
889
921
|
anno=anno,
|
|
@@ -917,6 +949,7 @@ def mqqplot(insumstats,
|
|
|
917
949
|
log=log,
|
|
918
950
|
_invert=_invert
|
|
919
951
|
)
|
|
952
|
+
log.write("Finished annotating variants.",verbose=verbose)
|
|
920
953
|
# Manhatann-like plot Finished #####################################################################
|
|
921
954
|
|
|
922
955
|
# QQ plot #########################################################################################################
|
|
@@ -961,9 +994,9 @@ def mqqplot(insumstats,
|
|
|
961
994
|
|
|
962
995
|
# Y axis jagged
|
|
963
996
|
if jagged==True:
|
|
964
|
-
ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
|
|
997
|
+
ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
|
|
965
998
|
if "qq" in mode:
|
|
966
|
-
ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
|
|
999
|
+
ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
|
|
967
1000
|
|
|
968
1001
|
# XY lim
|
|
969
1002
|
if ylim is not None:
|
|
@@ -971,8 +1004,7 @@ def mqqplot(insumstats,
|
|
|
971
1004
|
if "qq" in mode:
|
|
972
1005
|
ax2.set_ylim(ylim)
|
|
973
1006
|
|
|
974
|
-
|
|
975
|
-
ax1.set_xlim([0 - xpad* sumstats["i"].max(),(1+xpad)*sumstats["i"].max()])
|
|
1007
|
+
ax1 = _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats)
|
|
976
1008
|
|
|
977
1009
|
# Titles
|
|
978
1010
|
if title and anno and len(to_annotate)>0:
|
|
@@ -989,7 +1021,7 @@ def mqqplot(insumstats,
|
|
|
989
1021
|
if _get_region_lead==True:
|
|
990
1022
|
return fig, log, lead_snp_i, lead_snp_i2
|
|
991
1023
|
|
|
992
|
-
|
|
1024
|
+
log.write("Finished creating plot successfully!",verbose=verbose)
|
|
993
1025
|
return fig, log
|
|
994
1026
|
|
|
995
1027
|
##############################################################################################################################################################################
|
|
@@ -997,8 +1029,21 @@ def mqqplot(insumstats,
|
|
|
997
1029
|
|
|
998
1030
|
|
|
999
1031
|
|
|
1032
|
+
def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats):
|
|
1033
|
+
|
|
1034
|
+
xmin, xmax = ax1.get_xlim()
|
|
1035
|
+
|
|
1036
|
+
if xpad is not None:
|
|
1037
|
+
pad = xpad* sumstats["i"].max()
|
|
1038
|
+
ax1.set_xlim([xmin - pad, xmin + pad])
|
|
1039
|
+
if xpadl is not None:
|
|
1040
|
+
pad = xpadl* sumstats["i"].max()
|
|
1041
|
+
ax1.set_xlim([xmin - pad,xmax])
|
|
1042
|
+
if xpadr is not None:
|
|
1043
|
+
pad = xpadr* sumstats["i"].max()
|
|
1044
|
+
ax1.set_xlim([xmin, xmax + pad])
|
|
1000
1045
|
|
|
1001
|
-
|
|
1046
|
+
return ax1
|
|
1002
1047
|
|
|
1003
1048
|
|
|
1004
1049
|
|
|
@@ -1084,22 +1129,22 @@ def _sanity_check(sumstats, mode, chrom, pos, stratified, _if_quick_qc, log, ver
|
|
|
1084
1129
|
#sanity check : drop variants with na values in chr and pos df
|
|
1085
1130
|
sumstats = sumstats.dropna(subset=[chrom,pos])
|
|
1086
1131
|
after_number=len(sumstats)
|
|
1087
|
-
|
|
1132
|
+
log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in CHR or POS column ...",verbose=verbose)
|
|
1088
1133
|
out_of_range_chr = sumstats[chrom]<=0
|
|
1089
|
-
|
|
1134
|
+
log.write(" -Removed {} variants with CHR <=0...".format(sum(out_of_range_chr)),verbose=verbose)
|
|
1090
1135
|
sumstats = sumstats.loc[~out_of_range_chr,:]
|
|
1091
1136
|
|
|
1092
1137
|
if stratified is True and _if_quick_qc:
|
|
1093
1138
|
pre_number=len(sumstats)
|
|
1094
1139
|
sumstats = sumstats.dropna(subset=["MAF"])
|
|
1095
1140
|
after_number=len(sumstats)
|
|
1096
|
-
|
|
1141
|
+
log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in EAF column ...",verbose=verbose)
|
|
1097
1142
|
|
|
1098
1143
|
if "b" not in mode and _if_quick_qc:
|
|
1099
1144
|
pre_number=len(sumstats)
|
|
1100
1145
|
sumstats = sumstats.dropna(subset=["raw_P"])
|
|
1101
1146
|
after_number=len(sumstats)
|
|
1102
|
-
|
|
1147
|
+
log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in P column ...",verbose=verbose)
|
|
1103
1148
|
return sumstats
|
|
1104
1149
|
|
|
1105
1150
|
def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
|
|
@@ -1108,7 +1153,7 @@ def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
|
|
|
1108
1153
|
sumstats["scaled_P"] = sumstats["DENSITY"].copy()
|
|
1109
1154
|
sumstats["raw_P"] = -np.log10(sumstats["DENSITY"].copy()+2)
|
|
1110
1155
|
elif scaled is True:
|
|
1111
|
-
|
|
1156
|
+
log.write(" -P values are already converted to -log10(P)!",verbose=verbose)
|
|
1112
1157
|
sumstats["scaled_P"] = sumstats["raw_P"].copy()
|
|
1113
1158
|
sumstats["raw_P"] = np.power(10,-sumstats["scaled_P"].astype("float64"))
|
|
1114
1159
|
else:
|
|
@@ -1156,7 +1201,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
|
|
|
1156
1201
|
sumstats.loc[right_chr&up_pos&low_pos,"HUE"]=0
|
|
1157
1202
|
else:
|
|
1158
1203
|
# highlight for one set
|
|
1159
|
-
|
|
1204
|
+
to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
|
|
1160
1205
|
#assign colors: 0 is hightlight color
|
|
1161
1206
|
for index,row in to_highlight.iterrows():
|
|
1162
1207
|
target_chr = int(row[chrom])
|
|
@@ -1169,7 +1214,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
|
|
|
1169
1214
|
|
|
1170
1215
|
def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
|
|
1171
1216
|
if "b" in mode and "DENSITY" not in sumstats.columns:
|
|
1172
|
-
|
|
1217
|
+
log.write(" -Calculating DENSITY with windowsize of ",bwindowsizekb ," kb",verbose=verbose)
|
|
1173
1218
|
large_number = _get_largenumber(sumstats[pos].max(),log=log)
|
|
1174
1219
|
|
|
1175
1220
|
stack=[]
|
|
@@ -1190,11 +1235,12 @@ def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
|
|
|
1190
1235
|
elif "b" in mode and "DENSITY" in sumstats.columns:
|
|
1191
1236
|
bmean=sumstats["DENSITY"].mean()
|
|
1192
1237
|
bmedian=sumstats["DENSITY"].median()
|
|
1193
|
-
|
|
1238
|
+
log.write(" -DENSITY column exists. Skipping calculation...",verbose=verbose)
|
|
1194
1239
|
return sumstats, bmean, bmedian
|
|
1195
1240
|
|
|
1196
|
-
def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian ):
|
|
1241
|
+
def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian , log=Log(),verbose=True):
|
|
1197
1242
|
# genomewide significant line
|
|
1243
|
+
log.write(" -Processing lines...",verbose=verbose)
|
|
1198
1244
|
if sig_line is True:
|
|
1199
1245
|
sigline = ax1.axhline(y=lines_to_plot[0],
|
|
1200
1246
|
linewidth = sc_linewidth,
|
|
@@ -1220,8 +1266,9 @@ def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_
|
|
|
1220
1266
|
medianline = ax1.axhline(y=bmedian, linewidth = sc_linewidth,linestyle="--",color=sig_line_color,zorder=1000)
|
|
1221
1267
|
return ax1
|
|
1222
1268
|
|
|
1223
|
-
def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title):
|
|
1224
|
-
|
|
1269
|
+
def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title, log=Log(),verbose=True):
|
|
1270
|
+
log.write(" -Processing color bar...",verbose=verbose)
|
|
1271
|
+
if type(cbar) == list:
|
|
1225
1272
|
for cbar_single in cbar:
|
|
1226
1273
|
cbar_yticklabels = cbar_single.ax.get_yticklabels()
|
|
1227
1274
|
cbar_single.ax.set_yticklabels(cbar_yticklabels, fontsize=cbar_fontsize, family=cbar_font_family )
|
|
@@ -1232,12 +1279,14 @@ def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title):
|
|
|
1232
1279
|
cbar.ax.set_title(cbar_title, fontsize=cbar_fontsize, family=cbar_font_family, loc="center",y=-0.2 )
|
|
1233
1280
|
return cbar
|
|
1234
1281
|
|
|
1235
|
-
def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family):
|
|
1282
|
+
def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log(),verbose=True):
|
|
1283
|
+
log.write(" -Processing X ticks...",verbose=verbose)
|
|
1236
1284
|
ax1.set_xticks(chrom_df.astype("float64"))
|
|
1237
1285
|
ax1.set_xticklabels(chrom_df.index.astype("Int64").map(xtick_chr_dict),fontsize=fontsize,family=font_family)
|
|
1238
1286
|
return ax1
|
|
1239
1287
|
|
|
1240
|
-
def _process_ytick(ax1, fontsize, font_family, ax4):
|
|
1288
|
+
def _process_ytick(ax1, fontsize, font_family, ax4, log=Log(),verbose=True):
|
|
1289
|
+
log.write(" -Processing Y labels...",verbose=verbose)
|
|
1241
1290
|
ax1_yticklabels = ax1.get_yticklabels()
|
|
1242
1291
|
#ax1.set_yticklabels(ax1_yticklabels,fontsize=fontsize,family=font_family)
|
|
1243
1292
|
ax1_yticks = ax1.get_yticks()
|
|
@@ -1248,7 +1297,8 @@ def _process_ytick(ax1, fontsize, font_family, ax4):
|
|
|
1248
1297
|
ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
|
|
1249
1298
|
return ax1, ax4
|
|
1250
1299
|
|
|
1251
|
-
def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None ):
|
|
1300
|
+
def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None , log=Log(),verbose=True):
|
|
1301
|
+
log.write(" -Processing X labels...",verbose=verbose)
|
|
1252
1302
|
if region is not None:
|
|
1253
1303
|
if xlabel is None:
|
|
1254
1304
|
xlabel = "Chromosome "+str(region[0])+" (MB)"
|
|
@@ -1262,7 +1312,8 @@ def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family,
|
|
|
1262
1312
|
ax1.set_xlabel(xlabel,fontsize=fontsize,family=font_family)
|
|
1263
1313
|
return ax1, ax3
|
|
1264
1314
|
|
|
1265
|
-
def _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family, ax4=None):
|
|
1315
|
+
def _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family, ax4=None, log=Log(),verbose=True):
|
|
1316
|
+
log.write(" -Processing Y labels...",verbose=verbose)
|
|
1266
1317
|
if "b" in mode:
|
|
1267
1318
|
if ylabel is None:
|
|
1268
1319
|
ylabel ="Density of GWAS \n SNPs within "+str(bwindowsizekb)+" kb"
|
|
@@ -1336,4 +1387,4 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
|
|
|
1336
1387
|
raise ValueError("Please select one from the 5 modes: mqq/qqm/m/qq/r/b")
|
|
1337
1388
|
ax4=None
|
|
1338
1389
|
cbar=None
|
|
1339
|
-
return fig, ax1, ax2, ax3, ax4, cbar
|
|
1390
|
+
return fig, ax1, ax2, ax3, ax4, cbar
|
gwaslab/viz_plot_qqplot.py
CHANGED
|
@@ -45,7 +45,7 @@ def _plot_qq(
|
|
|
45
45
|
|
|
46
46
|
# QQ plot #########################################################################################################
|
|
47
47
|
# ax2 qqplot
|
|
48
|
-
|
|
48
|
+
log.write("Start to create QQ plot with "+str(len(sumstats))+" variants:",verbose=verbose )
|
|
49
49
|
|
|
50
50
|
# plotting qq plots using processed data after cut and skip
|
|
51
51
|
|
|
@@ -59,6 +59,7 @@ def _plot_qq(
|
|
|
59
59
|
upper_bound_p = np.power(10.0, -expected_min_mlog10p)
|
|
60
60
|
|
|
61
61
|
if stratified is False:
|
|
62
|
+
log.write(" -Plotting all variants...",verbose=verbose)
|
|
62
63
|
# sort x,y for qq plot
|
|
63
64
|
# high to low
|
|
64
65
|
observed = p_toplot.sort_values(ascending=False)
|
|
@@ -68,13 +69,13 @@ def _plot_qq(
|
|
|
68
69
|
|
|
69
70
|
expected_all = -np.log10(np.linspace(minit,upper_bound_p,len(p_toplot_raw)))[:len(observed)]
|
|
70
71
|
|
|
71
|
-
|
|
72
|
+
log.write(" -Expected range of P: (0,{})".format(upper_bound_p),verbose=verbose)
|
|
72
73
|
#p_toplot = sumstats["scaled_P"]
|
|
73
74
|
ax2.scatter(expected_all,observed,s=marker_size[1],color=colors[0],**qq_scatter_args)
|
|
74
75
|
|
|
75
76
|
else:
|
|
76
77
|
# stratified qq plot
|
|
77
|
-
|
|
78
|
+
log.write(" -Plotting variants stratified by MAF...",verbose=verbose)
|
|
78
79
|
observed = p_toplot.sort_values(ascending=False)
|
|
79
80
|
expected_all = -np.log10(np.linspace(minit,upper_bound_p,len(p_toplot_raw)))[:len(observed)]
|
|
80
81
|
|
|
@@ -115,15 +116,15 @@ def _plot_qq(
|
|
|
115
116
|
|
|
116
117
|
if expected_min_mlog10p!=0:
|
|
117
118
|
level = 1 - np.power(10.0,-np.nanmedian(expected_all))
|
|
118
|
-
|
|
119
|
+
log.write(" -Level for calculating lambda GC : {}".format(1 - level),verbose=verbose)
|
|
119
120
|
|
|
120
|
-
if
|
|
121
|
+
if not include_chrXYMT : log.write(" -Excluding chrX,Y, MT from calculation of lambda GC.",verbose=verbose)
|
|
121
122
|
lambdagc = lambdaGC(p_toplot_raw,
|
|
122
123
|
mode="MLOG10P",
|
|
123
124
|
level=level,
|
|
124
125
|
include_chrXYMT=include_chrXYMT,
|
|
125
126
|
log=log,
|
|
126
|
-
verbose=
|
|
127
|
+
verbose=verbose)
|
|
127
128
|
|
|
128
129
|
# annotate lambda gc to qq plot
|
|
129
130
|
ax2.text(0.10, 1.03,"$\\lambda_{GC}$ = "+"{:.4f}".format(lambdagc),
|
|
@@ -147,7 +148,9 @@ def _plot_qq(
|
|
|
147
148
|
font_family=font_family,
|
|
148
149
|
ylabels=ylabels,
|
|
149
150
|
ytick3=ytick3,
|
|
150
|
-
ylabels_converted=ylabels_converted
|
|
151
|
+
ylabels_converted=ylabels_converted,
|
|
152
|
+
log=log,
|
|
153
|
+
verbose=verbose
|
|
151
154
|
)
|
|
152
155
|
|
|
153
156
|
#if cut == 0:
|
|
@@ -181,7 +184,7 @@ def _plot_qq(
|
|
|
181
184
|
if qtitle:
|
|
182
185
|
ax2.set_title(qtitle,fontsize=title_fontsize,pad=10,family=font_family)
|
|
183
186
|
|
|
184
|
-
|
|
187
|
+
log.write("Finished creating QQ plot successfully!",verbose=verbose)
|
|
185
188
|
|
|
186
189
|
# Creating QQ plot Finished #############################################################################################
|
|
187
190
|
return ax2
|