gwaslab 3.4.37__py3-none-any.whl → 3.4.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/data/formatbook.json +722 -721
- gwaslab/g_Log.py +8 -0
- gwaslab/g_Sumstats.py +26 -147
- gwaslab/g_SumstatsPair.py +6 -2
- gwaslab/g_Sumstats_summary.py +3 -3
- gwaslab/g_version.py +2 -2
- gwaslab/hm_casting.py +29 -15
- gwaslab/hm_harmonize_sumstats.py +291 -163
- gwaslab/hm_rsid_to_chrpos.py +1 -1
- gwaslab/io_preformat_input.py +43 -37
- gwaslab/io_to_formats.py +428 -295
- gwaslab/qc_check_datatype.py +3 -3
- gwaslab/qc_fix_sumstats.py +793 -682
- gwaslab/util_ex_calculate_ldmatrix.py +29 -11
- gwaslab/util_ex_gwascatalog.py +1 -1
- gwaslab/util_ex_ldproxyfinder.py +1 -1
- gwaslab/util_ex_process_ref.py +3 -3
- gwaslab/util_ex_run_coloc.py +26 -4
- gwaslab/util_in_convert_h2.py +1 -1
- gwaslab/util_in_fill_data.py +2 -2
- gwaslab/util_in_filter_value.py +122 -34
- gwaslab/util_in_get_density.py +2 -2
- gwaslab/util_in_get_sig.py +41 -9
- gwaslab/viz_aux_quickfix.py +24 -19
- gwaslab/viz_aux_reposition_text.py +7 -4
- gwaslab/viz_aux_save_figure.py +6 -5
- gwaslab/viz_plot_compare_af.py +5 -5
- gwaslab/viz_plot_miamiplot2.py +28 -20
- gwaslab/viz_plot_mqqplot.py +109 -72
- gwaslab/viz_plot_qqplot.py +11 -8
- gwaslab/viz_plot_regionalplot.py +3 -1
- gwaslab/viz_plot_trumpetplot.py +15 -6
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/METADATA +2 -2
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/RECORD +37 -37
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/WHEEL +0 -0
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/top_level.txt +0 -0
gwaslab/viz_plot_mqqplot.py
CHANGED
|
@@ -213,7 +213,6 @@ def mqqplot(insumstats,
|
|
|
213
213
|
chr_dict = get_chr_to_number()
|
|
214
214
|
if xtick_chr_dict is None:
|
|
215
215
|
xtick_chr_dict = get_number_to_chr()
|
|
216
|
-
|
|
217
216
|
if gtf_chr_dict is None:
|
|
218
217
|
gtf_chr_dict = get_number_to_chr()
|
|
219
218
|
if rr_chr_dict is None:
|
|
@@ -304,40 +303,42 @@ def mqqplot(insumstats,
|
|
|
304
303
|
scatter_args["rasterized"]=True
|
|
305
304
|
qq_scatter_args["rasterized"]=True
|
|
306
305
|
|
|
307
|
-
|
|
308
|
-
|
|
306
|
+
log.write("Start to create MQQ plot...{}:".format(_get_version()),verbose=verbose)
|
|
307
|
+
log.write(" -Genomic coordinates version: {}...".format(build),verbose=verbose)
|
|
309
308
|
if build is None or build=="99":
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
309
|
+
log.warning("Genomic coordinates version is unknown.")
|
|
310
|
+
log.write(" -Genome-wide significance level to plot is set to "+str(sig_level_plot)+" ...",verbose=verbose)
|
|
311
|
+
log.write(" -Raw input contains "+str(len(insumstats))+" variants...",verbose=verbose)
|
|
312
|
+
log.write(" -MQQ plot layout mode is : "+mode,verbose=verbose)
|
|
313
|
+
|
|
314
314
|
if len(anno_set)>0 and ("m" in mode):
|
|
315
|
-
|
|
315
|
+
log.write(" -Variants to annotate : "+",".join(anno_set),verbose=verbose)
|
|
316
|
+
|
|
316
317
|
if len(highlight)>0 and ("m" in mode):
|
|
317
318
|
if pd.api.types.is_list_like(highlight[0]):
|
|
318
319
|
if highlight_chrpos==False:
|
|
319
|
-
if len(highlight
|
|
320
|
-
log.
|
|
320
|
+
if len(highlight) != len(highlight_color):
|
|
321
|
+
log.warning("Number of locus groups in the list does not match number of provided colors.")
|
|
321
322
|
for i, highlight_set in enumerate(highlight):
|
|
322
|
-
|
|
323
|
+
log.write(" -Set {} loci to highlight ({}) : ".format(i+1, highlight_color[i%len(highlight_color)])+",".join(highlight_set),verbose=verbose)
|
|
323
324
|
else:
|
|
324
|
-
|
|
325
|
-
|
|
325
|
+
log.write(" -Loci to highlight ({}): {}".format(highlight_color,highlight),verbose=verbose)
|
|
326
|
+
log.write(" -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
|
|
326
327
|
else:
|
|
327
|
-
|
|
328
|
-
|
|
328
|
+
log.write(" -Loci to highlight ({}): ".format(highlight_color)+",".join(highlight),verbose=verbose)
|
|
329
|
+
log.write(" -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
|
|
329
330
|
|
|
330
331
|
if len(pinpoint)>0 :
|
|
331
332
|
if pd.api.types.is_list_like(pinpoint[0]):
|
|
332
|
-
if len(pinpoint
|
|
333
|
-
log.
|
|
333
|
+
if len(pinpoint) != len(pinpoint_color):
|
|
334
|
+
log.warning("Number of variant groups in the list does not match number of provided colors.")
|
|
334
335
|
for i, pinpoint_set in enumerate(pinpoint):
|
|
335
|
-
|
|
336
|
+
log.write(" -Set {} variants to pinpoint ({}) : ".format(i+1,pinpoint_color[i%len(pinpoint_color)])+",".join(pinpoint_set),verbose=verbose)
|
|
336
337
|
else:
|
|
337
|
-
|
|
338
|
+
log.write(" -Variants to pinpoint ({}) : ".format(pinpoint_color)+",".join(pinpoint),verbose=verbose)
|
|
338
339
|
|
|
339
340
|
if region is not None:
|
|
340
|
-
|
|
341
|
+
log.write(" -Region to plot : chr"+str(region[0])+":"+str(region[1])+"-"+str(region[2])+".",verbose=verbose)
|
|
341
342
|
|
|
342
343
|
# construct line series for coversion
|
|
343
344
|
if additional_line is None:
|
|
@@ -399,7 +400,7 @@ def mqqplot(insumstats,
|
|
|
399
400
|
pinpoint=pinpoint,
|
|
400
401
|
density_color=density_color)
|
|
401
402
|
|
|
402
|
-
sumstats = insumstats
|
|
403
|
+
sumstats = insumstats[usecols].copy()
|
|
403
404
|
|
|
404
405
|
#################################################################################################
|
|
405
406
|
|
|
@@ -408,7 +409,7 @@ def mqqplot(insumstats,
|
|
|
408
409
|
if (anno == "GENENAME"):
|
|
409
410
|
anno_sig=True
|
|
410
411
|
elif (anno is not None) and (anno is not True):
|
|
411
|
-
sumstats["Annotation"]=sumstats
|
|
412
|
+
sumstats["Annotation"]=sumstats[anno].astype("string")
|
|
412
413
|
|
|
413
414
|
## P value
|
|
414
415
|
## m, qq, r
|
|
@@ -432,15 +433,15 @@ def mqqplot(insumstats,
|
|
|
432
433
|
region_start = region[1]
|
|
433
434
|
region_end = region[2]
|
|
434
435
|
marker_size=(25,45)
|
|
435
|
-
|
|
436
|
+
log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]),verbose=verbose)
|
|
436
437
|
|
|
437
|
-
in_region_snp = (sumstats[chrom]==region_chr) &(sumstats[pos]<region_end) &(sumstats[pos]>region_start)
|
|
438
|
+
in_region_snp = (sumstats[chrom]==region_chr) & (sumstats[pos]<region_end) & (sumstats[pos]>region_start)
|
|
438
439
|
|
|
439
|
-
|
|
440
|
+
log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)),verbose=verbose)
|
|
440
441
|
sumstats = sumstats.loc[in_region_snp,:]
|
|
441
442
|
|
|
442
443
|
if len(sumstats)==0:
|
|
443
|
-
log.
|
|
444
|
+
log.warning("No valid data! Please check the input.")
|
|
444
445
|
return None
|
|
445
446
|
|
|
446
447
|
## EAF
|
|
@@ -454,11 +455,11 @@ def mqqplot(insumstats,
|
|
|
454
455
|
sumstats["HUE"] = pd.NA
|
|
455
456
|
sumstats["HUE"] = sumstats["HUE"].astype("Int64")
|
|
456
457
|
|
|
457
|
-
|
|
458
|
+
log.write("Finished loading specified columns from the sumstats.",verbose=verbose)
|
|
458
459
|
|
|
459
460
|
|
|
460
461
|
#sanity check############################################################################################################
|
|
461
|
-
log.write("Start conversion and sanity check:",verbose=verbose)
|
|
462
|
+
log.write("Start data conversion and sanity check:",verbose=verbose)
|
|
462
463
|
|
|
463
464
|
if _if_quick_qc == False:
|
|
464
465
|
log.write(" -Sanity check will be skipped.", verbose=verbose)
|
|
@@ -527,15 +528,19 @@ def mqqplot(insumstats,
|
|
|
527
528
|
lines_to_plot=lines_to_plot,
|
|
528
529
|
log = log)
|
|
529
530
|
except:
|
|
530
|
-
log.
|
|
531
|
+
log.warning("No valid data! Please check the input.")
|
|
531
532
|
return None
|
|
532
533
|
|
|
534
|
+
log.write("Finished data conversion and sanity check.",verbose=verbose)
|
|
535
|
+
|
|
533
536
|
# Manhattan plot ##########################################################################################################
|
|
537
|
+
log.write("Start to create MQQ plot with "+str(len(sumstats))+" variants...",verbose=verbose)
|
|
534
538
|
## regional plot ->rsq
|
|
535
539
|
#calculate rsq]
|
|
536
540
|
if vcf_path is not None:
|
|
537
541
|
if tabix is None:
|
|
538
542
|
tabix = which("tabix")
|
|
543
|
+
log.write(" -tabix will be used: {}".format(tabix),verbose=verbose)
|
|
539
544
|
sumstats = process_vcf(sumstats=sumstats,
|
|
540
545
|
vcf_path=vcf_path,
|
|
541
546
|
region=region,
|
|
@@ -568,8 +573,6 @@ def mqqplot(insumstats,
|
|
|
568
573
|
|
|
569
574
|
if vcf_path is not None:
|
|
570
575
|
sumstats["chr_hue"]=sumstats["LD"]
|
|
571
|
-
|
|
572
|
-
if verbose:log.write("Start to create MQQ plot with "+str(len(sumstats))+" variants:")
|
|
573
576
|
## default seetings
|
|
574
577
|
|
|
575
578
|
palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
|
|
@@ -601,6 +604,7 @@ def mqqplot(insumstats,
|
|
|
601
604
|
## if highlight
|
|
602
605
|
highlight_i = pd.DataFrame()
|
|
603
606
|
if len(highlight) >0:
|
|
607
|
+
log.write(" -Creating background plot...",verbose=verbose)
|
|
604
608
|
plot = sns.scatterplot(data=sumstats, x='i', y='scaled_P',
|
|
605
609
|
hue='chr_hue',
|
|
606
610
|
palette=palette,
|
|
@@ -612,8 +616,7 @@ def mqqplot(insumstats,
|
|
|
612
616
|
zorder=2,ax=ax1,edgecolor=edgecolor, **scatter_args)
|
|
613
617
|
if pd.api.types.is_list_like(highlight[0]) and highlight_chrpos==False:
|
|
614
618
|
for i, highlight_set in enumerate(highlight):
|
|
615
|
-
|
|
616
|
-
print(sumstats["HUE"].dtype)
|
|
619
|
+
log.write(" -Highlighting set {} target loci...".format(i+1),verbose=verbose)
|
|
617
620
|
sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==i], x='i', y='scaled_P',
|
|
618
621
|
hue="HUE",
|
|
619
622
|
palette={i:highlight_color[i%len(highlight_color)]},
|
|
@@ -625,7 +628,7 @@ def mqqplot(insumstats,
|
|
|
625
628
|
zorder=3+i,ax=ax1,edgecolor=edgecolor,**scatter_args)
|
|
626
629
|
highlight_i = sumstats.loc[~sumstats["HUE"].isna(),"i"].values
|
|
627
630
|
else:
|
|
628
|
-
|
|
631
|
+
log.write(" -Highlighting target loci...",verbose=verbose)
|
|
629
632
|
sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==0], x='i', y='scaled_P',
|
|
630
633
|
hue="HUE",
|
|
631
634
|
palette={0:highlight_color},
|
|
@@ -674,6 +677,7 @@ def mqqplot(insumstats,
|
|
|
674
677
|
hue = 'chr_hue'
|
|
675
678
|
hue_norm=None
|
|
676
679
|
to_plot = sumstats
|
|
680
|
+
log.write(" -Creating background plot...",verbose=verbose)
|
|
677
681
|
plot = sns.scatterplot(data=to_plot, x='i', y='scaled_P',
|
|
678
682
|
hue=hue,
|
|
679
683
|
palette= palette,
|
|
@@ -693,17 +697,17 @@ def mqqplot(insumstats,
|
|
|
693
697
|
for i, pinpoint_set in enumerate(pinpoint):
|
|
694
698
|
if sum(sumstats[snpid].isin(pinpoint_set))>0:
|
|
695
699
|
to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint_set),:]
|
|
696
|
-
|
|
700
|
+
log.write(" -Pinpointing set {} target vairants...".format(i+1),verbose=verbose)
|
|
697
701
|
ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color[i%len(pinpoint_color)],zorder=100,s=marker_size[1]+1)
|
|
698
702
|
else:
|
|
699
|
-
|
|
703
|
+
log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
|
|
700
704
|
else:
|
|
701
705
|
if sum(sumstats[snpid].isin(pinpoint))>0:
|
|
702
706
|
to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint),:]
|
|
703
|
-
|
|
707
|
+
log.write(" -Pinpointing target vairants...",verbose=verbose)
|
|
704
708
|
ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color,zorder=100,s=marker_size[1]+1)
|
|
705
709
|
else:
|
|
706
|
-
|
|
710
|
+
log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
|
|
707
711
|
|
|
708
712
|
|
|
709
713
|
|
|
@@ -767,12 +771,15 @@ def mqqplot(insumstats,
|
|
|
767
771
|
lead_snp_i= None
|
|
768
772
|
lead_snp_i2=None
|
|
769
773
|
|
|
774
|
+
log.write("Finished creating MQQ plot successfully!",verbose=verbose)
|
|
775
|
+
|
|
770
776
|
# Get top variants for annotation #######################################################
|
|
777
|
+
log.write("Start to extract variants for annotation...",verbose=verbose)
|
|
771
778
|
if (anno and anno!=True) or (len(anno_set)>0):
|
|
772
779
|
if len(anno_set)>0:
|
|
773
780
|
to_annotate=sumstats.loc[sumstats[snpid].isin(anno_set),:]
|
|
774
781
|
if to_annotate.empty is not True:
|
|
775
|
-
|
|
782
|
+
log.write(" -Found "+str(len(to_annotate))+" specified variants to annotate...",verbose=verbose)
|
|
776
783
|
else:
|
|
777
784
|
to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
|
|
778
785
|
snpid,
|
|
@@ -785,7 +792,7 @@ def mqqplot(insumstats,
|
|
|
785
792
|
mlog10p="scaled_P",
|
|
786
793
|
verbose=False)
|
|
787
794
|
if (to_annotate.empty is not True) and ("b" not in mode):
|
|
788
|
-
|
|
795
|
+
log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
|
|
789
796
|
else:
|
|
790
797
|
to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
|
|
791
798
|
"i",
|
|
@@ -798,7 +805,7 @@ def mqqplot(insumstats,
|
|
|
798
805
|
mlog10p="scaled_P",
|
|
799
806
|
sig_level=sig_level_lead)
|
|
800
807
|
if (to_annotate.empty is not True) and ("b" not in mode):
|
|
801
|
-
|
|
808
|
+
log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
|
|
802
809
|
if (to_annotate.empty is not True) and anno=="GENENAME":
|
|
803
810
|
to_annotate = annogene(to_annotate,
|
|
804
811
|
id=snpid,
|
|
@@ -808,16 +815,21 @@ def mqqplot(insumstats,
|
|
|
808
815
|
build=build,
|
|
809
816
|
source=anno_source,
|
|
810
817
|
verbose=verbose).rename(columns={"GENE":"Annotation"})
|
|
818
|
+
log.write("Finished extracting variants for annotation...",verbose=verbose)
|
|
811
819
|
|
|
812
820
|
# Configure X, Y axes #######################################################
|
|
821
|
+
log.write("Start to process figure arts.",verbose=verbose)
|
|
813
822
|
if region is None:
|
|
814
823
|
# if Manhattan plot
|
|
824
|
+
|
|
815
825
|
ax1 = _process_xtick(ax1=ax1,
|
|
816
826
|
chrom_df=chrom_df,
|
|
817
827
|
xtick_chr_dict=xtick_chr_dict,
|
|
818
828
|
fontsize = fontsize,
|
|
819
|
-
font_family=font_family
|
|
820
|
-
|
|
829
|
+
font_family=font_family,
|
|
830
|
+
log=log,
|
|
831
|
+
verbose=verbose)
|
|
832
|
+
|
|
821
833
|
ax1, ax3 = _process_xlabel(region=region,
|
|
822
834
|
xlabel=xlabel,
|
|
823
835
|
ax1=ax1,
|
|
@@ -825,7 +837,9 @@ def mqqplot(insumstats,
|
|
|
825
837
|
mode=mode,
|
|
826
838
|
fontsize=fontsize,
|
|
827
839
|
font_family=font_family,
|
|
828
|
-
ax3=ax3
|
|
840
|
+
ax3=ax3,
|
|
841
|
+
log=log,
|
|
842
|
+
verbose=verbose)
|
|
829
843
|
|
|
830
844
|
ax1, ax4 = _process_ylabel(ylabel=ylabel,
|
|
831
845
|
ax1=ax1,
|
|
@@ -833,8 +847,11 @@ def mqqplot(insumstats,
|
|
|
833
847
|
bwindowsizekb=bwindowsizekb,
|
|
834
848
|
fontsize=fontsize,
|
|
835
849
|
font_family=font_family,
|
|
836
|
-
ax4=ax4
|
|
850
|
+
ax4=ax4,
|
|
851
|
+
log=log,
|
|
852
|
+
verbose=verbose)
|
|
837
853
|
|
|
854
|
+
|
|
838
855
|
ax1 = _set_yticklabels(cut=cut,
|
|
839
856
|
cutfactor=cutfactor,
|
|
840
857
|
cut_log=cut_log,
|
|
@@ -849,19 +866,28 @@ def mqqplot(insumstats,
|
|
|
849
866
|
font_family=font_family,
|
|
850
867
|
ytick3=ytick3,
|
|
851
868
|
ylabels=ylabels,
|
|
852
|
-
ylabels_converted=ylabels_converted
|
|
853
|
-
|
|
869
|
+
ylabels_converted=ylabels_converted,
|
|
870
|
+
log=log,
|
|
871
|
+
verbose=verbose)
|
|
854
872
|
|
|
855
873
|
ax1, ax4 = _process_ytick(ax1=ax1,
|
|
856
874
|
fontsize=fontsize,
|
|
857
875
|
font_family=font_family,
|
|
858
|
-
ax4=ax4
|
|
876
|
+
ax4=ax4,
|
|
877
|
+
log=log,
|
|
878
|
+
verbose=verbose)
|
|
859
879
|
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
cbar = _process_cbar(cbar,
|
|
880
|
+
# regional plot cbar
|
|
881
|
+
if cbar is not None:
|
|
882
|
+
cbar = _process_cbar(cbar,
|
|
883
|
+
cbar_fontsize=fontsize,
|
|
884
|
+
cbar_font_family=font_family,
|
|
885
|
+
cbar_title=cbar_title,
|
|
886
|
+
log=log,
|
|
887
|
+
verbose=verbose)
|
|
863
888
|
|
|
864
889
|
ax1 = _process_spine(ax1, mode)
|
|
890
|
+
|
|
865
891
|
# genomewide significant line
|
|
866
892
|
ax1 = _process_line(ax1,
|
|
867
893
|
sig_line,
|
|
@@ -874,7 +900,9 @@ def mqqplot(insumstats,
|
|
|
874
900
|
additional_line_color,
|
|
875
901
|
mode,
|
|
876
902
|
bmean,
|
|
877
|
-
bmedian
|
|
903
|
+
bmedian,
|
|
904
|
+
log=log,
|
|
905
|
+
verbose=verbose )
|
|
878
906
|
|
|
879
907
|
|
|
880
908
|
if mtitle and anno and len(to_annotate)>0:
|
|
@@ -882,8 +910,10 @@ def mqqplot(insumstats,
|
|
|
882
910
|
ax1.set_title(mtitle,pad=pad,fontsize=title_fontsize,family=font_family)
|
|
883
911
|
elif mtitle:
|
|
884
912
|
ax1.set_title(mtitle,fontsize=title_fontsize,family=font_family)
|
|
885
|
-
|
|
913
|
+
log.write("Finished processing figure arts.",verbose=verbose)
|
|
914
|
+
|
|
886
915
|
# Add annotation arrows and texts
|
|
916
|
+
log.write("Start to annotate variants...",verbose=verbose)
|
|
887
917
|
ax1 = annotate_single(
|
|
888
918
|
sumstats=sumstats,
|
|
889
919
|
anno=anno,
|
|
@@ -917,6 +947,7 @@ def mqqplot(insumstats,
|
|
|
917
947
|
log=log,
|
|
918
948
|
_invert=_invert
|
|
919
949
|
)
|
|
950
|
+
log.write("Finished annotating variants.",verbose=verbose)
|
|
920
951
|
# Manhatann-like plot Finished #####################################################################
|
|
921
952
|
|
|
922
953
|
# QQ plot #########################################################################################################
|
|
@@ -961,9 +992,9 @@ def mqqplot(insumstats,
|
|
|
961
992
|
|
|
962
993
|
# Y axis jagged
|
|
963
994
|
if jagged==True:
|
|
964
|
-
ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
|
|
995
|
+
ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
|
|
965
996
|
if "qq" in mode:
|
|
966
|
-
ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
|
|
997
|
+
ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
|
|
967
998
|
|
|
968
999
|
# XY lim
|
|
969
1000
|
if ylim is not None:
|
|
@@ -989,7 +1020,7 @@ def mqqplot(insumstats,
|
|
|
989
1020
|
if _get_region_lead==True:
|
|
990
1021
|
return fig, log, lead_snp_i, lead_snp_i2
|
|
991
1022
|
|
|
992
|
-
|
|
1023
|
+
log.write("Finished creating plot successfully!",verbose=verbose)
|
|
993
1024
|
return fig, log
|
|
994
1025
|
|
|
995
1026
|
##############################################################################################################################################################################
|
|
@@ -1084,22 +1115,22 @@ def _sanity_check(sumstats, mode, chrom, pos, stratified, _if_quick_qc, log, ver
|
|
|
1084
1115
|
#sanity check : drop variants with na values in chr and pos df
|
|
1085
1116
|
sumstats = sumstats.dropna(subset=[chrom,pos])
|
|
1086
1117
|
after_number=len(sumstats)
|
|
1087
|
-
|
|
1118
|
+
log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in CHR or POS column ...",verbose=verbose)
|
|
1088
1119
|
out_of_range_chr = sumstats[chrom]<=0
|
|
1089
|
-
|
|
1120
|
+
log.write(" -Removed {} variants with CHR <=0...".format(sum(out_of_range_chr)),verbose=verbose)
|
|
1090
1121
|
sumstats = sumstats.loc[~out_of_range_chr,:]
|
|
1091
1122
|
|
|
1092
1123
|
if stratified is True and _if_quick_qc:
|
|
1093
1124
|
pre_number=len(sumstats)
|
|
1094
1125
|
sumstats = sumstats.dropna(subset=["MAF"])
|
|
1095
1126
|
after_number=len(sumstats)
|
|
1096
|
-
|
|
1127
|
+
log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in EAF column ...",verbose=verbose)
|
|
1097
1128
|
|
|
1098
1129
|
if "b" not in mode and _if_quick_qc:
|
|
1099
1130
|
pre_number=len(sumstats)
|
|
1100
1131
|
sumstats = sumstats.dropna(subset=["raw_P"])
|
|
1101
1132
|
after_number=len(sumstats)
|
|
1102
|
-
|
|
1133
|
+
log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in P column ...",verbose=verbose)
|
|
1103
1134
|
return sumstats
|
|
1104
1135
|
|
|
1105
1136
|
def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
|
|
@@ -1108,7 +1139,7 @@ def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
|
|
|
1108
1139
|
sumstats["scaled_P"] = sumstats["DENSITY"].copy()
|
|
1109
1140
|
sumstats["raw_P"] = -np.log10(sumstats["DENSITY"].copy()+2)
|
|
1110
1141
|
elif scaled is True:
|
|
1111
|
-
|
|
1142
|
+
log.write(" -P values are already converted to -log10(P)!",verbose=verbose)
|
|
1112
1143
|
sumstats["scaled_P"] = sumstats["raw_P"].copy()
|
|
1113
1144
|
sumstats["raw_P"] = np.power(10,-sumstats["scaled_P"].astype("float64"))
|
|
1114
1145
|
else:
|
|
@@ -1156,7 +1187,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
|
|
|
1156
1187
|
sumstats.loc[right_chr&up_pos&low_pos,"HUE"]=0
|
|
1157
1188
|
else:
|
|
1158
1189
|
# highlight for one set
|
|
1159
|
-
|
|
1190
|
+
to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
|
|
1160
1191
|
#assign colors: 0 is hightlight color
|
|
1161
1192
|
for index,row in to_highlight.iterrows():
|
|
1162
1193
|
target_chr = int(row[chrom])
|
|
@@ -1169,7 +1200,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
|
|
|
1169
1200
|
|
|
1170
1201
|
def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
|
|
1171
1202
|
if "b" in mode and "DENSITY" not in sumstats.columns:
|
|
1172
|
-
|
|
1203
|
+
log.write(" -Calculating DENSITY with windowsize of ",bwindowsizekb ," kb",verbose=verbose)
|
|
1173
1204
|
large_number = _get_largenumber(sumstats[pos].max(),log=log)
|
|
1174
1205
|
|
|
1175
1206
|
stack=[]
|
|
@@ -1190,11 +1221,12 @@ def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
|
|
|
1190
1221
|
elif "b" in mode and "DENSITY" in sumstats.columns:
|
|
1191
1222
|
bmean=sumstats["DENSITY"].mean()
|
|
1192
1223
|
bmedian=sumstats["DENSITY"].median()
|
|
1193
|
-
|
|
1224
|
+
log.write(" -DENSITY column exists. Skipping calculation...",verbose=verbose)
|
|
1194
1225
|
return sumstats, bmean, bmedian
|
|
1195
1226
|
|
|
1196
|
-
def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian ):
|
|
1227
|
+
def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian , log=Log(),verbose=True):
|
|
1197
1228
|
# genomewide significant line
|
|
1229
|
+
log.write(" -Processing lines...",verbose=verbose)
|
|
1198
1230
|
if sig_line is True:
|
|
1199
1231
|
sigline = ax1.axhline(y=lines_to_plot[0],
|
|
1200
1232
|
linewidth = sc_linewidth,
|
|
@@ -1220,8 +1252,9 @@ def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_
|
|
|
1220
1252
|
medianline = ax1.axhline(y=bmedian, linewidth = sc_linewidth,linestyle="--",color=sig_line_color,zorder=1000)
|
|
1221
1253
|
return ax1
|
|
1222
1254
|
|
|
1223
|
-
def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title):
|
|
1224
|
-
|
|
1255
|
+
def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title, log=Log(),verbose=True):
|
|
1256
|
+
log.write(" -Processing color bar...",verbose=verbose)
|
|
1257
|
+
if type(cbar) == list:
|
|
1225
1258
|
for cbar_single in cbar:
|
|
1226
1259
|
cbar_yticklabels = cbar_single.ax.get_yticklabels()
|
|
1227
1260
|
cbar_single.ax.set_yticklabels(cbar_yticklabels, fontsize=cbar_fontsize, family=cbar_font_family )
|
|
@@ -1232,12 +1265,14 @@ def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title):
|
|
|
1232
1265
|
cbar.ax.set_title(cbar_title, fontsize=cbar_fontsize, family=cbar_font_family, loc="center",y=-0.2 )
|
|
1233
1266
|
return cbar
|
|
1234
1267
|
|
|
1235
|
-
def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family):
|
|
1268
|
+
def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log(),verbose=True):
|
|
1269
|
+
log.write(" -Processing X ticks...",verbose=verbose)
|
|
1236
1270
|
ax1.set_xticks(chrom_df.astype("float64"))
|
|
1237
1271
|
ax1.set_xticklabels(chrom_df.index.astype("Int64").map(xtick_chr_dict),fontsize=fontsize,family=font_family)
|
|
1238
1272
|
return ax1
|
|
1239
1273
|
|
|
1240
|
-
def _process_ytick(ax1, fontsize, font_family, ax4):
|
|
1274
|
+
def _process_ytick(ax1, fontsize, font_family, ax4, log=Log(),verbose=True):
|
|
1275
|
+
log.write(" -Processing Y labels...",verbose=verbose)
|
|
1241
1276
|
ax1_yticklabels = ax1.get_yticklabels()
|
|
1242
1277
|
#ax1.set_yticklabels(ax1_yticklabels,fontsize=fontsize,family=font_family)
|
|
1243
1278
|
ax1_yticks = ax1.get_yticks()
|
|
@@ -1248,7 +1283,8 @@ def _process_ytick(ax1, fontsize, font_family, ax4):
|
|
|
1248
1283
|
ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
|
|
1249
1284
|
return ax1, ax4
|
|
1250
1285
|
|
|
1251
|
-
def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None ):
|
|
1286
|
+
def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None , log=Log(),verbose=True):
|
|
1287
|
+
log.write(" -Processing X labels...",verbose=verbose)
|
|
1252
1288
|
if region is not None:
|
|
1253
1289
|
if xlabel is None:
|
|
1254
1290
|
xlabel = "Chromosome "+str(region[0])+" (MB)"
|
|
@@ -1262,7 +1298,8 @@ def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family,
|
|
|
1262
1298
|
ax1.set_xlabel(xlabel,fontsize=fontsize,family=font_family)
|
|
1263
1299
|
return ax1, ax3
|
|
1264
1300
|
|
|
1265
|
-
def _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family, ax4=None):
|
|
1301
|
+
def _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family, ax4=None, log=Log(),verbose=True):
|
|
1302
|
+
log.write(" -Processing Y labels...",verbose=verbose)
|
|
1266
1303
|
if "b" in mode:
|
|
1267
1304
|
if ylabel is None:
|
|
1268
1305
|
ylabel ="Density of GWAS \n SNPs within "+str(bwindowsizekb)+" kb"
|
|
@@ -1336,4 +1373,4 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
|
|
|
1336
1373
|
raise ValueError("Please select one from the 5 modes: mqq/qqm/m/qq/r/b")
|
|
1337
1374
|
ax4=None
|
|
1338
1375
|
cbar=None
|
|
1339
|
-
return fig, ax1, ax2, ax3, ax4, cbar
|
|
1376
|
+
return fig, ax1, ax2, ax3, ax4, cbar
|
gwaslab/viz_plot_qqplot.py
CHANGED
|
@@ -45,7 +45,7 @@ def _plot_qq(
|
|
|
45
45
|
|
|
46
46
|
# QQ plot #########################################################################################################
|
|
47
47
|
# ax2 qqplot
|
|
48
|
-
|
|
48
|
+
log.write("Start to create QQ plot with "+str(len(sumstats))+" variants:",verbose=verbose )
|
|
49
49
|
|
|
50
50
|
# plotting qq plots using processed data after cut and skip
|
|
51
51
|
|
|
@@ -59,6 +59,7 @@ def _plot_qq(
|
|
|
59
59
|
upper_bound_p = np.power(10.0, -expected_min_mlog10p)
|
|
60
60
|
|
|
61
61
|
if stratified is False:
|
|
62
|
+
log.write(" -Plotting all variants...",verbose=verbose)
|
|
62
63
|
# sort x,y for qq plot
|
|
63
64
|
# high to low
|
|
64
65
|
observed = p_toplot.sort_values(ascending=False)
|
|
@@ -68,13 +69,13 @@ def _plot_qq(
|
|
|
68
69
|
|
|
69
70
|
expected_all = -np.log10(np.linspace(minit,upper_bound_p,len(p_toplot_raw)))[:len(observed)]
|
|
70
71
|
|
|
71
|
-
|
|
72
|
+
log.write(" -Expected range of P: (0,{})".format(upper_bound_p),verbose=verbose)
|
|
72
73
|
#p_toplot = sumstats["scaled_P"]
|
|
73
74
|
ax2.scatter(expected_all,observed,s=marker_size[1],color=colors[0],**qq_scatter_args)
|
|
74
75
|
|
|
75
76
|
else:
|
|
76
77
|
# stratified qq plot
|
|
77
|
-
|
|
78
|
+
log.write(" -Plotting variants stratified by MAF...",verbose=verbose)
|
|
78
79
|
observed = p_toplot.sort_values(ascending=False)
|
|
79
80
|
expected_all = -np.log10(np.linspace(minit,upper_bound_p,len(p_toplot_raw)))[:len(observed)]
|
|
80
81
|
|
|
@@ -115,15 +116,15 @@ def _plot_qq(
|
|
|
115
116
|
|
|
116
117
|
if expected_min_mlog10p!=0:
|
|
117
118
|
level = 1 - np.power(10.0,-np.nanmedian(expected_all))
|
|
118
|
-
|
|
119
|
+
log.write(" -Level for calculating lambda GC : {}".format(1 - level),verbose=verbose)
|
|
119
120
|
|
|
120
|
-
if verbose and not include_chrXYMT : log.write(" -Excluding chrX,Y, MT from calculation of lambda GC.")
|
|
121
|
+
if verbose and not include_chrXYMT : log.write(" -Excluding chrX,Y, MT from calculation of lambda GC.",verbose=verbose)
|
|
121
122
|
lambdagc = lambdaGC(p_toplot_raw,
|
|
122
123
|
mode="MLOG10P",
|
|
123
124
|
level=level,
|
|
124
125
|
include_chrXYMT=include_chrXYMT,
|
|
125
126
|
log=log,
|
|
126
|
-
verbose=
|
|
127
|
+
verbose=verbose)
|
|
127
128
|
|
|
128
129
|
# annotate lambda gc to qq plot
|
|
129
130
|
ax2.text(0.10, 1.03,"$\\lambda_{GC}$ = "+"{:.4f}".format(lambdagc),
|
|
@@ -147,7 +148,9 @@ def _plot_qq(
|
|
|
147
148
|
font_family=font_family,
|
|
148
149
|
ylabels=ylabels,
|
|
149
150
|
ytick3=ytick3,
|
|
150
|
-
ylabels_converted=ylabels_converted
|
|
151
|
+
ylabels_converted=ylabels_converted,
|
|
152
|
+
log=log,
|
|
153
|
+
verbose=verbose
|
|
151
154
|
)
|
|
152
155
|
|
|
153
156
|
#if cut == 0:
|
|
@@ -181,7 +184,7 @@ def _plot_qq(
|
|
|
181
184
|
if qtitle:
|
|
182
185
|
ax2.set_title(qtitle,fontsize=title_fontsize,pad=10,family=font_family)
|
|
183
186
|
|
|
184
|
-
|
|
187
|
+
log.write("Finished creating QQ plot successfully!",verbose=verbose)
|
|
185
188
|
|
|
186
189
|
# Creating QQ plot Finished #############################################################################################
|
|
187
190
|
return ax2
|
gwaslab/viz_plot_regionalplot.py
CHANGED
|
@@ -122,6 +122,8 @@ def _plot_regional(
|
|
|
122
122
|
region_ld_colors=region_ld_colors2,
|
|
123
123
|
position=2)
|
|
124
124
|
cbar = [cbar1, cbar2]
|
|
125
|
+
else:
|
|
126
|
+
cbar=None
|
|
125
127
|
if region_title is not None:
|
|
126
128
|
ax1 = _add_region_title(region_title, ax1=ax1,region_title_args=region_title_args )
|
|
127
129
|
## recombinnation rate ##################################################
|
|
@@ -540,7 +542,7 @@ def process_vcf(sumstats, vcf_path, region,region_ref, region_ref_second, log, v
|
|
|
540
542
|
# no position match
|
|
541
543
|
return None
|
|
542
544
|
if verbose: log.write(" -Matching variants using POS, NEA, EA ...")
|
|
543
|
-
sumstats["REFINDEX"] = sumstats
|
|
545
|
+
sumstats["REFINDEX"] = sumstats[[pos,nea,ea]].apply(lambda x: match_varaint(x),axis=1)
|
|
544
546
|
#############################################################################################
|
|
545
547
|
#sumstats["REFINDEX"] = sumstats[pos].apply(lambda x: np.where(ref_genotype["variants/POS"] == x )[0][0] if np.any(ref_genotype["variants/POS"] == x) else None)
|
|
546
548
|
|