gwaslab 3.4.36__py3-none-any.whl → 3.4.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/__init__.py +1 -1
- gwaslab/data/formatbook.json +722 -721
- gwaslab/g_Log.py +8 -0
- gwaslab/g_Sumstats.py +80 -178
- gwaslab/g_SumstatsPair.py +6 -2
- gwaslab/g_Sumstats_summary.py +3 -3
- gwaslab/g_meta.py +13 -3
- gwaslab/g_version.py +2 -2
- gwaslab/hm_casting.py +29 -15
- gwaslab/hm_harmonize_sumstats.py +312 -159
- gwaslab/hm_rsid_to_chrpos.py +1 -1
- gwaslab/io_preformat_input.py +46 -37
- gwaslab/io_to_formats.py +428 -295
- gwaslab/qc_check_datatype.py +15 -1
- gwaslab/qc_fix_sumstats.py +956 -719
- gwaslab/util_ex_calculate_ldmatrix.py +29 -11
- gwaslab/util_ex_gwascatalog.py +1 -1
- gwaslab/util_ex_ldproxyfinder.py +1 -1
- gwaslab/util_ex_process_h5.py +26 -17
- gwaslab/util_ex_process_ref.py +3 -3
- gwaslab/util_ex_run_coloc.py +26 -4
- gwaslab/util_in_convert_h2.py +1 -1
- gwaslab/util_in_fill_data.py +44 -5
- gwaslab/util_in_filter_value.py +122 -34
- gwaslab/util_in_get_density.py +2 -2
- gwaslab/util_in_get_sig.py +41 -9
- gwaslab/viz_aux_quickfix.py +26 -21
- gwaslab/viz_aux_reposition_text.py +7 -4
- gwaslab/viz_aux_save_figure.py +6 -5
- gwaslab/viz_plot_compare_af.py +5 -5
- gwaslab/viz_plot_compare_effect.py +22 -5
- gwaslab/viz_plot_miamiplot2.py +28 -20
- gwaslab/viz_plot_mqqplot.py +214 -98
- gwaslab/viz_plot_qqplot.py +11 -8
- gwaslab/viz_plot_regionalplot.py +16 -9
- gwaslab/viz_plot_trumpetplot.py +15 -6
- {gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/METADATA +3 -3
- gwaslab-3.4.38.dist-info/RECORD +72 -0
- gwaslab-3.4.36.dist-info/RECORD +0 -72
- {gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/WHEEL +0 -0
- {gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/top_level.txt +0 -0
gwaslab/viz_plot_mqqplot.py
CHANGED
|
@@ -104,6 +104,15 @@ def mqqplot(insumstats,
|
|
|
104
104
|
region_protein_coding = True,
|
|
105
105
|
region_flank_factor = 0.05,
|
|
106
106
|
region_anno_bbox_args = None,
|
|
107
|
+
cbar_title='LD $r^{2}$',
|
|
108
|
+
cbar_fontsize = None,
|
|
109
|
+
cbar_font_family = None,
|
|
110
|
+
track_n=4,
|
|
111
|
+
track_n_offset=0,
|
|
112
|
+
track_fontsize_ratio=0.95,
|
|
113
|
+
track_exon_ratio=1,
|
|
114
|
+
track_text_offset=1,
|
|
115
|
+
track_font_family = None,
|
|
107
116
|
taf = None,
|
|
108
117
|
# track_n, track_n_offset,font_ratio,exon_ratio,text_offset
|
|
109
118
|
tabix=None,
|
|
@@ -204,7 +213,6 @@ def mqqplot(insumstats,
|
|
|
204
213
|
chr_dict = get_chr_to_number()
|
|
205
214
|
if xtick_chr_dict is None:
|
|
206
215
|
xtick_chr_dict = get_number_to_chr()
|
|
207
|
-
|
|
208
216
|
if gtf_chr_dict is None:
|
|
209
217
|
gtf_chr_dict = get_number_to_chr()
|
|
210
218
|
if rr_chr_dict is None:
|
|
@@ -243,8 +251,14 @@ def mqqplot(insumstats,
|
|
|
243
251
|
region_ld_colors2 = ["#E4E4E4","#D8E2F2","#AFCBE3","#86B3D4","#5D98C4","#367EB7","#367EB7"]
|
|
244
252
|
if region_title_args is None:
|
|
245
253
|
region_title_args = {"size":10}
|
|
254
|
+
if cbar_fontsize is None:
|
|
255
|
+
cbar_fontsize = fontsize
|
|
256
|
+
if cbar_font_family is None:
|
|
257
|
+
cbar_font_family = font_family
|
|
258
|
+
if track_font_family is None:
|
|
259
|
+
track_font_family = font_family
|
|
246
260
|
if taf is None:
|
|
247
|
-
taf = [
|
|
261
|
+
taf = [track_n,track_n_offset,track_fontsize_ratio,track_exon_ratio,track_text_offset]
|
|
248
262
|
if maf_bins is None:
|
|
249
263
|
maf_bins=[(0, 0.01), (0.01, 0.05), (0.05, 0.25),(0.25,0.5)]
|
|
250
264
|
if maf_bin_colors is None:
|
|
@@ -289,40 +303,42 @@ def mqqplot(insumstats,
|
|
|
289
303
|
scatter_args["rasterized"]=True
|
|
290
304
|
qq_scatter_args["rasterized"]=True
|
|
291
305
|
|
|
292
|
-
|
|
293
|
-
|
|
306
|
+
log.write("Start to create MQQ plot...{}:".format(_get_version()),verbose=verbose)
|
|
307
|
+
log.write(" -Genomic coordinates version: {}...".format(build),verbose=verbose)
|
|
294
308
|
if build is None or build=="99":
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
309
|
+
log.warning("Genomic coordinates version is unknown.")
|
|
310
|
+
log.write(" -Genome-wide significance level to plot is set to "+str(sig_level_plot)+" ...",verbose=verbose)
|
|
311
|
+
log.write(" -Raw input contains "+str(len(insumstats))+" variants...",verbose=verbose)
|
|
312
|
+
log.write(" -MQQ plot layout mode is : "+mode,verbose=verbose)
|
|
313
|
+
|
|
299
314
|
if len(anno_set)>0 and ("m" in mode):
|
|
300
|
-
|
|
315
|
+
log.write(" -Variants to annotate : "+",".join(anno_set),verbose=verbose)
|
|
316
|
+
|
|
301
317
|
if len(highlight)>0 and ("m" in mode):
|
|
302
318
|
if pd.api.types.is_list_like(highlight[0]):
|
|
303
319
|
if highlight_chrpos==False:
|
|
304
|
-
if len(highlight
|
|
305
|
-
log.
|
|
320
|
+
if len(highlight) != len(highlight_color):
|
|
321
|
+
log.warning("Number of locus groups in the list does not match number of provided colors.")
|
|
306
322
|
for i, highlight_set in enumerate(highlight):
|
|
307
|
-
|
|
323
|
+
log.write(" -Set {} loci to highlight ({}) : ".format(i+1, highlight_color[i%len(highlight_color)])+",".join(highlight_set),verbose=verbose)
|
|
308
324
|
else:
|
|
309
|
-
|
|
310
|
-
|
|
325
|
+
log.write(" -Loci to highlight ({}): {}".format(highlight_color,highlight),verbose=verbose)
|
|
326
|
+
log.write(" -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
|
|
311
327
|
else:
|
|
312
|
-
|
|
313
|
-
|
|
328
|
+
log.write(" -Loci to highlight ({}): ".format(highlight_color)+",".join(highlight),verbose=verbose)
|
|
329
|
+
log.write(" -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
|
|
314
330
|
|
|
315
331
|
if len(pinpoint)>0 :
|
|
316
332
|
if pd.api.types.is_list_like(pinpoint[0]):
|
|
317
|
-
if len(pinpoint
|
|
318
|
-
log.
|
|
333
|
+
if len(pinpoint) != len(pinpoint_color):
|
|
334
|
+
log.warning("Number of variant groups in the list does not match number of provided colors.")
|
|
319
335
|
for i, pinpoint_set in enumerate(pinpoint):
|
|
320
|
-
|
|
336
|
+
log.write(" -Set {} variants to pinpoint ({}) : ".format(i+1,pinpoint_color[i%len(pinpoint_color)])+",".join(pinpoint_set),verbose=verbose)
|
|
321
337
|
else:
|
|
322
|
-
|
|
338
|
+
log.write(" -Variants to pinpoint ({}) : ".format(pinpoint_color)+",".join(pinpoint),verbose=verbose)
|
|
323
339
|
|
|
324
340
|
if region is not None:
|
|
325
|
-
|
|
341
|
+
log.write(" -Region to plot : chr"+str(region[0])+":"+str(region[1])+"-"+str(region[2])+".",verbose=verbose)
|
|
326
342
|
|
|
327
343
|
# construct line series for coversion
|
|
328
344
|
if additional_line is None:
|
|
@@ -340,13 +356,14 @@ def mqqplot(insumstats,
|
|
|
340
356
|
# ax2 : qq plot
|
|
341
357
|
# ax3 : gene track
|
|
342
358
|
# ax4 : recombination rate
|
|
359
|
+
# cbar : color bar
|
|
343
360
|
# ax5 : miami plot lower panel
|
|
344
361
|
|
|
345
362
|
# "m" : Manhattan plot
|
|
346
363
|
# "qq": QQ plot
|
|
347
364
|
# "r" : regional plot
|
|
348
365
|
|
|
349
|
-
fig, ax1, ax2, ax3 = _process_layout(mode=mode,
|
|
366
|
+
fig, ax1, ax2, ax3, ax4, cbar = _process_layout(mode=mode,
|
|
350
367
|
figax=figax,
|
|
351
368
|
fig_args=fig_args,
|
|
352
369
|
mqqratio=mqqratio,
|
|
@@ -383,7 +400,7 @@ def mqqplot(insumstats,
|
|
|
383
400
|
pinpoint=pinpoint,
|
|
384
401
|
density_color=density_color)
|
|
385
402
|
|
|
386
|
-
sumstats = insumstats
|
|
403
|
+
sumstats = insumstats[usecols].copy()
|
|
387
404
|
|
|
388
405
|
#################################################################################################
|
|
389
406
|
|
|
@@ -392,7 +409,7 @@ def mqqplot(insumstats,
|
|
|
392
409
|
if (anno == "GENENAME"):
|
|
393
410
|
anno_sig=True
|
|
394
411
|
elif (anno is not None) and (anno is not True):
|
|
395
|
-
sumstats["Annotation"]=sumstats
|
|
412
|
+
sumstats["Annotation"]=sumstats[anno].astype("string")
|
|
396
413
|
|
|
397
414
|
## P value
|
|
398
415
|
## m, qq, r
|
|
@@ -416,15 +433,15 @@ def mqqplot(insumstats,
|
|
|
416
433
|
region_start = region[1]
|
|
417
434
|
region_end = region[2]
|
|
418
435
|
marker_size=(25,45)
|
|
419
|
-
|
|
436
|
+
log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]),verbose=verbose)
|
|
420
437
|
|
|
421
|
-
in_region_snp = (sumstats[chrom]==region_chr) &(sumstats[pos]<region_end) &(sumstats[pos]>region_start)
|
|
438
|
+
in_region_snp = (sumstats[chrom]==region_chr) & (sumstats[pos]<region_end) & (sumstats[pos]>region_start)
|
|
422
439
|
|
|
423
|
-
|
|
440
|
+
log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)),verbose=verbose)
|
|
424
441
|
sumstats = sumstats.loc[in_region_snp,:]
|
|
425
442
|
|
|
426
443
|
if len(sumstats)==0:
|
|
427
|
-
log.
|
|
444
|
+
log.warning("No valid data! Please check the input.")
|
|
428
445
|
return None
|
|
429
446
|
|
|
430
447
|
## EAF
|
|
@@ -438,11 +455,11 @@ def mqqplot(insumstats,
|
|
|
438
455
|
sumstats["HUE"] = pd.NA
|
|
439
456
|
sumstats["HUE"] = sumstats["HUE"].astype("Int64")
|
|
440
457
|
|
|
441
|
-
|
|
458
|
+
log.write("Finished loading specified columns from the sumstats.",verbose=verbose)
|
|
442
459
|
|
|
443
460
|
|
|
444
461
|
#sanity check############################################################################################################
|
|
445
|
-
log.write("Start conversion and sanity check:",verbose=verbose)
|
|
462
|
+
log.write("Start data conversion and sanity check:",verbose=verbose)
|
|
446
463
|
|
|
447
464
|
if _if_quick_qc == False:
|
|
448
465
|
log.write(" -Sanity check will be skipped.", verbose=verbose)
|
|
@@ -511,15 +528,19 @@ def mqqplot(insumstats,
|
|
|
511
528
|
lines_to_plot=lines_to_plot,
|
|
512
529
|
log = log)
|
|
513
530
|
except:
|
|
514
|
-
log.
|
|
531
|
+
log.warning("No valid data! Please check the input.")
|
|
515
532
|
return None
|
|
516
533
|
|
|
534
|
+
log.write("Finished data conversion and sanity check.",verbose=verbose)
|
|
535
|
+
|
|
517
536
|
# Manhattan plot ##########################################################################################################
|
|
537
|
+
log.write("Start to create MQQ plot with "+str(len(sumstats))+" variants...",verbose=verbose)
|
|
518
538
|
## regional plot ->rsq
|
|
519
539
|
#calculate rsq]
|
|
520
540
|
if vcf_path is not None:
|
|
521
541
|
if tabix is None:
|
|
522
542
|
tabix = which("tabix")
|
|
543
|
+
log.write(" -tabix will be used: {}".format(tabix),verbose=verbose)
|
|
523
544
|
sumstats = process_vcf(sumstats=sumstats,
|
|
524
545
|
vcf_path=vcf_path,
|
|
525
546
|
region=region,
|
|
@@ -552,8 +573,6 @@ def mqqplot(insumstats,
|
|
|
552
573
|
|
|
553
574
|
if vcf_path is not None:
|
|
554
575
|
sumstats["chr_hue"]=sumstats["LD"]
|
|
555
|
-
|
|
556
|
-
if verbose:log.write("Start to create manhattan plot with "+str(len(sumstats))+" variants:")
|
|
557
576
|
## default seetings
|
|
558
577
|
|
|
559
578
|
palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
|
|
@@ -585,6 +604,7 @@ def mqqplot(insumstats,
|
|
|
585
604
|
## if highlight
|
|
586
605
|
highlight_i = pd.DataFrame()
|
|
587
606
|
if len(highlight) >0:
|
|
607
|
+
log.write(" -Creating background plot...",verbose=verbose)
|
|
588
608
|
plot = sns.scatterplot(data=sumstats, x='i', y='scaled_P',
|
|
589
609
|
hue='chr_hue',
|
|
590
610
|
palette=palette,
|
|
@@ -596,8 +616,7 @@ def mqqplot(insumstats,
|
|
|
596
616
|
zorder=2,ax=ax1,edgecolor=edgecolor, **scatter_args)
|
|
597
617
|
if pd.api.types.is_list_like(highlight[0]) and highlight_chrpos==False:
|
|
598
618
|
for i, highlight_set in enumerate(highlight):
|
|
599
|
-
|
|
600
|
-
print(sumstats["HUE"].dtype)
|
|
619
|
+
log.write(" -Highlighting set {} target loci...".format(i+1),verbose=verbose)
|
|
601
620
|
sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==i], x='i', y='scaled_P',
|
|
602
621
|
hue="HUE",
|
|
603
622
|
palette={i:highlight_color[i%len(highlight_color)]},
|
|
@@ -609,7 +628,7 @@ def mqqplot(insumstats,
|
|
|
609
628
|
zorder=3+i,ax=ax1,edgecolor=edgecolor,**scatter_args)
|
|
610
629
|
highlight_i = sumstats.loc[~sumstats["HUE"].isna(),"i"].values
|
|
611
630
|
else:
|
|
612
|
-
|
|
631
|
+
log.write(" -Highlighting target loci...",verbose=verbose)
|
|
613
632
|
sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==0], x='i', y='scaled_P',
|
|
614
633
|
hue="HUE",
|
|
615
634
|
palette={0:highlight_color},
|
|
@@ -658,6 +677,7 @@ def mqqplot(insumstats,
|
|
|
658
677
|
hue = 'chr_hue'
|
|
659
678
|
hue_norm=None
|
|
660
679
|
to_plot = sumstats
|
|
680
|
+
log.write(" -Creating background plot...",verbose=verbose)
|
|
661
681
|
plot = sns.scatterplot(data=to_plot, x='i', y='scaled_P',
|
|
662
682
|
hue=hue,
|
|
663
683
|
palette= palette,
|
|
@@ -677,17 +697,17 @@ def mqqplot(insumstats,
|
|
|
677
697
|
for i, pinpoint_set in enumerate(pinpoint):
|
|
678
698
|
if sum(sumstats[snpid].isin(pinpoint_set))>0:
|
|
679
699
|
to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint_set),:]
|
|
680
|
-
|
|
700
|
+
log.write(" -Pinpointing set {} target vairants...".format(i+1),verbose=verbose)
|
|
681
701
|
ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color[i%len(pinpoint_color)],zorder=100,s=marker_size[1]+1)
|
|
682
702
|
else:
|
|
683
|
-
|
|
703
|
+
log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
|
|
684
704
|
else:
|
|
685
705
|
if sum(sumstats[snpid].isin(pinpoint))>0:
|
|
686
706
|
to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint),:]
|
|
687
|
-
|
|
707
|
+
log.write(" -Pinpointing target vairants...",verbose=verbose)
|
|
688
708
|
ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color,zorder=100,s=marker_size[1]+1)
|
|
689
709
|
else:
|
|
690
|
-
|
|
710
|
+
log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
|
|
691
711
|
|
|
692
712
|
|
|
693
713
|
|
|
@@ -697,7 +717,7 @@ def mqqplot(insumstats,
|
|
|
697
717
|
# if regional plot : pinpoint lead , add color bar ##################################################
|
|
698
718
|
if (region is not None) and ("r" in mode):
|
|
699
719
|
|
|
700
|
-
ax1, ax3, lead_snp_i, lead_snp_i2 =_plot_regional(
|
|
720
|
+
ax1, ax3, ax4, cbar, lead_snp_i, lead_snp_i2 =_plot_regional(
|
|
701
721
|
sumstats=sumstats,
|
|
702
722
|
fig=fig,
|
|
703
723
|
ax1=ax1,
|
|
@@ -738,6 +758,7 @@ def mqqplot(insumstats,
|
|
|
738
758
|
region_recombination = region_recombination,
|
|
739
759
|
region_protein_coding=region_protein_coding,
|
|
740
760
|
region_flank_factor =region_flank_factor,
|
|
761
|
+
track_font_family=track_font_family,
|
|
741
762
|
taf=taf,
|
|
742
763
|
tabix=tabix,
|
|
743
764
|
chrom=chrom,
|
|
@@ -745,50 +766,20 @@ def mqqplot(insumstats,
|
|
|
745
766
|
verbose=verbose,
|
|
746
767
|
log=log
|
|
747
768
|
)
|
|
769
|
+
|
|
748
770
|
else:
|
|
749
771
|
lead_snp_i= None
|
|
750
772
|
lead_snp_i2=None
|
|
751
|
-
|
|
752
|
-
if region is None:
|
|
753
|
-
ax1 = _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family)
|
|
754
|
-
|
|
755
|
-
# genomewide significant line
|
|
756
|
-
ax1 = _process_line(ax1,
|
|
757
|
-
sig_line,
|
|
758
|
-
suggestive_sig_line,
|
|
759
|
-
additional_line,
|
|
760
|
-
lines_to_plot ,
|
|
761
|
-
sc_linewidth,
|
|
762
|
-
sig_line_color,
|
|
763
|
-
suggestive_sig_line_color,
|
|
764
|
-
additional_line_color,
|
|
765
|
-
mode,
|
|
766
|
-
bmean,
|
|
767
|
-
bmedian )
|
|
768
773
|
|
|
769
|
-
|
|
770
|
-
cutfactor=cutfactor,
|
|
771
|
-
cut_log=cut_log,
|
|
772
|
-
ax1=ax1,
|
|
773
|
-
skip=skip,
|
|
774
|
-
maxy=maxy,
|
|
775
|
-
maxticker=maxticker,
|
|
776
|
-
ystep=ystep,
|
|
777
|
-
sc_linewidth=sc_linewidth,
|
|
778
|
-
cut_line_color=cut_line_color,
|
|
779
|
-
fontsize=fontsize,
|
|
780
|
-
font_family=font_family,
|
|
781
|
-
ytick3=ytick3,
|
|
782
|
-
ylabels=ylabels,
|
|
783
|
-
ylabels_converted=ylabels_converted
|
|
784
|
-
)
|
|
774
|
+
log.write("Finished creating MQQ plot successfully!",verbose=verbose)
|
|
785
775
|
|
|
786
776
|
# Get top variants for annotation #######################################################
|
|
777
|
+
log.write("Start to extract variants for annotation...",verbose=verbose)
|
|
787
778
|
if (anno and anno!=True) or (len(anno_set)>0):
|
|
788
779
|
if len(anno_set)>0:
|
|
789
780
|
to_annotate=sumstats.loc[sumstats[snpid].isin(anno_set),:]
|
|
790
781
|
if to_annotate.empty is not True:
|
|
791
|
-
|
|
782
|
+
log.write(" -Found "+str(len(to_annotate))+" specified variants to annotate...",verbose=verbose)
|
|
792
783
|
else:
|
|
793
784
|
to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
|
|
794
785
|
snpid,
|
|
@@ -801,7 +792,7 @@ def mqqplot(insumstats,
|
|
|
801
792
|
mlog10p="scaled_P",
|
|
802
793
|
verbose=False)
|
|
803
794
|
if (to_annotate.empty is not True) and ("b" not in mode):
|
|
804
|
-
|
|
795
|
+
log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
|
|
805
796
|
else:
|
|
806
797
|
to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
|
|
807
798
|
"i",
|
|
@@ -814,7 +805,7 @@ def mqqplot(insumstats,
|
|
|
814
805
|
mlog10p="scaled_P",
|
|
815
806
|
sig_level=sig_level_lead)
|
|
816
807
|
if (to_annotate.empty is not True) and ("b" not in mode):
|
|
817
|
-
|
|
808
|
+
log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
|
|
818
809
|
if (to_annotate.empty is not True) and anno=="GENENAME":
|
|
819
810
|
to_annotate = annogene(to_annotate,
|
|
820
811
|
id=snpid,
|
|
@@ -824,21 +815,105 @@ def mqqplot(insumstats,
|
|
|
824
815
|
build=build,
|
|
825
816
|
source=anno_source,
|
|
826
817
|
verbose=verbose).rename(columns={"GENE":"Annotation"})
|
|
818
|
+
log.write("Finished extracting variants for annotation...",verbose=verbose)
|
|
827
819
|
|
|
828
820
|
# Configure X, Y axes #######################################################
|
|
829
|
-
|
|
830
|
-
|
|
821
|
+
log.write("Start to process figure arts.",verbose=verbose)
|
|
822
|
+
if region is None:
|
|
823
|
+
# if Manhattan plot
|
|
824
|
+
|
|
825
|
+
ax1 = _process_xtick(ax1=ax1,
|
|
826
|
+
chrom_df=chrom_df,
|
|
827
|
+
xtick_chr_dict=xtick_chr_dict,
|
|
828
|
+
fontsize = fontsize,
|
|
829
|
+
font_family=font_family,
|
|
830
|
+
log=log,
|
|
831
|
+
verbose=verbose)
|
|
832
|
+
|
|
833
|
+
ax1, ax3 = _process_xlabel(region=region,
|
|
834
|
+
xlabel=xlabel,
|
|
835
|
+
ax1=ax1,
|
|
836
|
+
gtf_path=gtf_path,
|
|
837
|
+
mode=mode,
|
|
838
|
+
fontsize=fontsize,
|
|
839
|
+
font_family=font_family,
|
|
840
|
+
ax3=ax3,
|
|
841
|
+
log=log,
|
|
842
|
+
verbose=verbose)
|
|
843
|
+
|
|
844
|
+
ax1, ax4 = _process_ylabel(ylabel=ylabel,
|
|
845
|
+
ax1=ax1,
|
|
846
|
+
mode=mode,
|
|
847
|
+
bwindowsizekb=bwindowsizekb,
|
|
848
|
+
fontsize=fontsize,
|
|
849
|
+
font_family=font_family,
|
|
850
|
+
ax4=ax4,
|
|
851
|
+
log=log,
|
|
852
|
+
verbose=verbose)
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
ax1 = _set_yticklabels(cut=cut,
|
|
856
|
+
cutfactor=cutfactor,
|
|
857
|
+
cut_log=cut_log,
|
|
858
|
+
ax1=ax1,
|
|
859
|
+
skip=skip,
|
|
860
|
+
maxy=maxy,
|
|
861
|
+
maxticker=maxticker,
|
|
862
|
+
ystep=ystep,
|
|
863
|
+
sc_linewidth=sc_linewidth,
|
|
864
|
+
cut_line_color=cut_line_color,
|
|
865
|
+
fontsize=fontsize,
|
|
866
|
+
font_family=font_family,
|
|
867
|
+
ytick3=ytick3,
|
|
868
|
+
ylabels=ylabels,
|
|
869
|
+
ylabels_converted=ylabels_converted,
|
|
870
|
+
log=log,
|
|
871
|
+
verbose=verbose)
|
|
872
|
+
|
|
873
|
+
ax1, ax4 = _process_ytick(ax1=ax1,
|
|
874
|
+
fontsize=fontsize,
|
|
875
|
+
font_family=font_family,
|
|
876
|
+
ax4=ax4,
|
|
877
|
+
log=log,
|
|
878
|
+
verbose=verbose)
|
|
879
|
+
|
|
880
|
+
# regional plot cbar
|
|
881
|
+
if cbar is not None:
|
|
882
|
+
cbar = _process_cbar(cbar,
|
|
883
|
+
cbar_fontsize=fontsize,
|
|
884
|
+
cbar_font_family=font_family,
|
|
885
|
+
cbar_title=cbar_title,
|
|
886
|
+
log=log,
|
|
887
|
+
verbose=verbose)
|
|
888
|
+
|
|
831
889
|
ax1 = _process_spine(ax1, mode)
|
|
832
890
|
|
|
833
|
-
|
|
891
|
+
# genomewide significant line
|
|
892
|
+
ax1 = _process_line(ax1,
|
|
893
|
+
sig_line,
|
|
894
|
+
suggestive_sig_line,
|
|
895
|
+
additional_line,
|
|
896
|
+
lines_to_plot ,
|
|
897
|
+
sc_linewidth,
|
|
898
|
+
sig_line_color,
|
|
899
|
+
suggestive_sig_line_color,
|
|
900
|
+
additional_line_color,
|
|
901
|
+
mode,
|
|
902
|
+
bmean,
|
|
903
|
+
bmedian,
|
|
904
|
+
log=log,
|
|
905
|
+
verbose=verbose )
|
|
906
|
+
|
|
834
907
|
|
|
835
908
|
if mtitle and anno and len(to_annotate)>0:
|
|
836
909
|
pad=(ax1.transData.transform((skip, title_pad*maxy))[1]-ax1.transData.transform((skip, maxy)))[1]
|
|
837
910
|
ax1.set_title(mtitle,pad=pad,fontsize=title_fontsize,family=font_family)
|
|
838
911
|
elif mtitle:
|
|
839
912
|
ax1.set_title(mtitle,fontsize=title_fontsize,family=font_family)
|
|
840
|
-
|
|
913
|
+
log.write("Finished processing figure arts.",verbose=verbose)
|
|
914
|
+
|
|
841
915
|
# Add annotation arrows and texts
|
|
916
|
+
log.write("Start to annotate variants...",verbose=verbose)
|
|
842
917
|
ax1 = annotate_single(
|
|
843
918
|
sumstats=sumstats,
|
|
844
919
|
anno=anno,
|
|
@@ -872,7 +947,8 @@ def mqqplot(insumstats,
|
|
|
872
947
|
log=log,
|
|
873
948
|
_invert=_invert
|
|
874
949
|
)
|
|
875
|
-
|
|
950
|
+
log.write("Finished annotating variants.",verbose=verbose)
|
|
951
|
+
# Manhatann-like plot Finished #####################################################################
|
|
876
952
|
|
|
877
953
|
# QQ plot #########################################################################################################
|
|
878
954
|
if "qq" in mode:
|
|
@@ -916,9 +992,9 @@ def mqqplot(insumstats,
|
|
|
916
992
|
|
|
917
993
|
# Y axis jagged
|
|
918
994
|
if jagged==True:
|
|
919
|
-
ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
|
|
995
|
+
ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
|
|
920
996
|
if "qq" in mode:
|
|
921
|
-
ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
|
|
997
|
+
ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
|
|
922
998
|
|
|
923
999
|
# XY lim
|
|
924
1000
|
if ylim is not None:
|
|
@@ -943,6 +1019,8 @@ def mqqplot(insumstats,
|
|
|
943
1019
|
# Return matplotlib figure object #######################################################################################
|
|
944
1020
|
if _get_region_lead==True:
|
|
945
1021
|
return fig, log, lead_snp_i, lead_snp_i2
|
|
1022
|
+
|
|
1023
|
+
log.write("Finished creating plot successfully!",verbose=verbose)
|
|
946
1024
|
return fig, log
|
|
947
1025
|
|
|
948
1026
|
##############################################################################################################################################################################
|
|
@@ -1037,22 +1115,22 @@ def _sanity_check(sumstats, mode, chrom, pos, stratified, _if_quick_qc, log, ver
|
|
|
1037
1115
|
#sanity check : drop variants with na values in chr and pos df
|
|
1038
1116
|
sumstats = sumstats.dropna(subset=[chrom,pos])
|
|
1039
1117
|
after_number=len(sumstats)
|
|
1040
|
-
|
|
1118
|
+
log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in CHR or POS column ...",verbose=verbose)
|
|
1041
1119
|
out_of_range_chr = sumstats[chrom]<=0
|
|
1042
|
-
|
|
1120
|
+
log.write(" -Removed {} variants with CHR <=0...".format(sum(out_of_range_chr)),verbose=verbose)
|
|
1043
1121
|
sumstats = sumstats.loc[~out_of_range_chr,:]
|
|
1044
1122
|
|
|
1045
1123
|
if stratified is True and _if_quick_qc:
|
|
1046
1124
|
pre_number=len(sumstats)
|
|
1047
1125
|
sumstats = sumstats.dropna(subset=["MAF"])
|
|
1048
1126
|
after_number=len(sumstats)
|
|
1049
|
-
|
|
1127
|
+
log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in EAF column ...",verbose=verbose)
|
|
1050
1128
|
|
|
1051
1129
|
if "b" not in mode and _if_quick_qc:
|
|
1052
1130
|
pre_number=len(sumstats)
|
|
1053
1131
|
sumstats = sumstats.dropna(subset=["raw_P"])
|
|
1054
1132
|
after_number=len(sumstats)
|
|
1055
|
-
|
|
1133
|
+
log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in P column ...",verbose=verbose)
|
|
1056
1134
|
return sumstats
|
|
1057
1135
|
|
|
1058
1136
|
def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
|
|
@@ -1061,7 +1139,7 @@ def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
|
|
|
1061
1139
|
sumstats["scaled_P"] = sumstats["DENSITY"].copy()
|
|
1062
1140
|
sumstats["raw_P"] = -np.log10(sumstats["DENSITY"].copy()+2)
|
|
1063
1141
|
elif scaled is True:
|
|
1064
|
-
|
|
1142
|
+
log.write(" -P values are already converted to -log10(P)!",verbose=verbose)
|
|
1065
1143
|
sumstats["scaled_P"] = sumstats["raw_P"].copy()
|
|
1066
1144
|
sumstats["raw_P"] = np.power(10,-sumstats["scaled_P"].astype("float64"))
|
|
1067
1145
|
else:
|
|
@@ -1109,7 +1187,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
|
|
|
1109
1187
|
sumstats.loc[right_chr&up_pos&low_pos,"HUE"]=0
|
|
1110
1188
|
else:
|
|
1111
1189
|
# highlight for one set
|
|
1112
|
-
|
|
1190
|
+
to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
|
|
1113
1191
|
#assign colors: 0 is hightlight color
|
|
1114
1192
|
for index,row in to_highlight.iterrows():
|
|
1115
1193
|
target_chr = int(row[chrom])
|
|
@@ -1122,7 +1200,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
|
|
|
1122
1200
|
|
|
1123
1201
|
def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
|
|
1124
1202
|
if "b" in mode and "DENSITY" not in sumstats.columns:
|
|
1125
|
-
|
|
1203
|
+
log.write(" -Calculating DENSITY with windowsize of ",bwindowsizekb ," kb",verbose=verbose)
|
|
1126
1204
|
large_number = _get_largenumber(sumstats[pos].max(),log=log)
|
|
1127
1205
|
|
|
1128
1206
|
stack=[]
|
|
@@ -1143,11 +1221,12 @@ def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
|
|
|
1143
1221
|
elif "b" in mode and "DENSITY" in sumstats.columns:
|
|
1144
1222
|
bmean=sumstats["DENSITY"].mean()
|
|
1145
1223
|
bmedian=sumstats["DENSITY"].median()
|
|
1146
|
-
|
|
1224
|
+
log.write(" -DENSITY column exists. Skipping calculation...",verbose=verbose)
|
|
1147
1225
|
return sumstats, bmean, bmedian
|
|
1148
1226
|
|
|
1149
|
-
def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian ):
|
|
1227
|
+
def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian , log=Log(),verbose=True):
|
|
1150
1228
|
# genomewide significant line
|
|
1229
|
+
log.write(" -Processing lines...",verbose=verbose)
|
|
1151
1230
|
if sig_line is True:
|
|
1152
1231
|
sigline = ax1.axhline(y=lines_to_plot[0],
|
|
1153
1232
|
linewidth = sc_linewidth,
|
|
@@ -1173,12 +1252,39 @@ def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_
|
|
|
1173
1252
|
medianline = ax1.axhline(y=bmedian, linewidth = sc_linewidth,linestyle="--",color=sig_line_color,zorder=1000)
|
|
1174
1253
|
return ax1
|
|
1175
1254
|
|
|
1176
|
-
def
|
|
1255
|
+
def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title, log=Log(),verbose=True):
|
|
1256
|
+
log.write(" -Processing color bar...",verbose=verbose)
|
|
1257
|
+
if type(cbar) == list:
|
|
1258
|
+
for cbar_single in cbar:
|
|
1259
|
+
cbar_yticklabels = cbar_single.ax.get_yticklabels()
|
|
1260
|
+
cbar_single.ax.set_yticklabels(cbar_yticklabels, fontsize=cbar_fontsize, family=cbar_font_family )
|
|
1261
|
+
cbar_single.ax.set_title(cbar_title, fontsize=cbar_fontsize, family=cbar_font_family, loc="center",y=-0.2 )
|
|
1262
|
+
else:
|
|
1263
|
+
cbar_yticklabels = cbar.ax.get_yticklabels()
|
|
1264
|
+
cbar.ax.set_yticklabels(cbar_yticklabels, fontsize=cbar_fontsize, family=cbar_font_family )
|
|
1265
|
+
cbar.ax.set_title(cbar_title, fontsize=cbar_fontsize, family=cbar_font_family, loc="center",y=-0.2 )
|
|
1266
|
+
return cbar
|
|
1267
|
+
|
|
1268
|
+
def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log(),verbose=True):
|
|
1269
|
+
log.write(" -Processing X ticks...",verbose=verbose)
|
|
1177
1270
|
ax1.set_xticks(chrom_df.astype("float64"))
|
|
1178
1271
|
ax1.set_xticklabels(chrom_df.index.astype("Int64").map(xtick_chr_dict),fontsize=fontsize,family=font_family)
|
|
1179
1272
|
return ax1
|
|
1180
1273
|
|
|
1181
|
-
def
|
|
1274
|
+
def _process_ytick(ax1, fontsize, font_family, ax4, log=Log(),verbose=True):
|
|
1275
|
+
log.write(" -Processing Y labels...",verbose=verbose)
|
|
1276
|
+
ax1_yticklabels = ax1.get_yticklabels()
|
|
1277
|
+
#ax1.set_yticklabels(ax1_yticklabels,fontsize=fontsize,family=font_family)
|
|
1278
|
+
ax1_yticks = ax1.get_yticks()
|
|
1279
|
+
ax1.set_yticks(ax1_yticks,ax1_yticklabels,fontsize=fontsize,family=font_family)
|
|
1280
|
+
if ax4 is not None:
|
|
1281
|
+
ax4_yticklabels = ax4.get_yticklabels()
|
|
1282
|
+
ax4_yticks = ax4.get_yticks()
|
|
1283
|
+
ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
|
|
1284
|
+
return ax1, ax4
|
|
1285
|
+
|
|
1286
|
+
def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None , log=Log(),verbose=True):
|
|
1287
|
+
log.write(" -Processing X labels...",verbose=verbose)
|
|
1182
1288
|
if region is not None:
|
|
1183
1289
|
if xlabel is None:
|
|
1184
1290
|
xlabel = "Chromosome "+str(region[0])+" (MB)"
|
|
@@ -1192,7 +1298,8 @@ def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family,
|
|
|
1192
1298
|
ax1.set_xlabel(xlabel,fontsize=fontsize,family=font_family)
|
|
1193
1299
|
return ax1, ax3
|
|
1194
1300
|
|
|
1195
|
-
def _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family):
|
|
1301
|
+
def _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family, ax4=None, log=Log(),verbose=True):
|
|
1302
|
+
log.write(" -Processing Y labels...",verbose=verbose)
|
|
1196
1303
|
if "b" in mode:
|
|
1197
1304
|
if ylabel is None:
|
|
1198
1305
|
ylabel ="Density of GWAS \n SNPs within "+str(bwindowsizekb)+" kb"
|
|
@@ -1201,7 +1308,10 @@ def _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family):
|
|
|
1201
1308
|
if ylabel is None:
|
|
1202
1309
|
ylabel ="$-log_{10}(P)$"
|
|
1203
1310
|
ax1.set_ylabel(ylabel,fontsize=fontsize,family=font_family)
|
|
1204
|
-
|
|
1311
|
+
if ax4 is not None:
|
|
1312
|
+
ax4_ylabel = ax4.get_ylabel()
|
|
1313
|
+
ax4.set_ylabel(ax4_ylabel, fontsize=fontsize, family=font_family )
|
|
1314
|
+
return ax1, ax4
|
|
1205
1315
|
|
|
1206
1316
|
def _process_spine(ax1, mode):
|
|
1207
1317
|
ax1.spines["top"].set_visible(False)
|
|
@@ -1218,6 +1328,7 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
|
|
|
1218
1328
|
if mode=="qqm":
|
|
1219
1329
|
fig, (ax2, ax1) = plt.subplots(1, 2,gridspec_kw={'width_ratios': [1, mqqratio]},**fig_args)
|
|
1220
1330
|
ax3 = None
|
|
1331
|
+
|
|
1221
1332
|
elif mode=="mqq":
|
|
1222
1333
|
if figax is not None:
|
|
1223
1334
|
fig = figax[0]
|
|
@@ -1226,6 +1337,7 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
|
|
|
1226
1337
|
else:
|
|
1227
1338
|
fig, (ax1, ax2) = plt.subplots(1, 2,gridspec_kw={'width_ratios': [mqqratio, 1]},**fig_args)
|
|
1228
1339
|
ax3 = None
|
|
1340
|
+
|
|
1229
1341
|
elif mode=="m":
|
|
1230
1342
|
if figax is not None:
|
|
1231
1343
|
fig = figax[0]
|
|
@@ -1234,10 +1346,12 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
|
|
|
1234
1346
|
fig, ax1 = plt.subplots(1, 1,**fig_args)
|
|
1235
1347
|
ax2 = None
|
|
1236
1348
|
ax3 = None
|
|
1349
|
+
|
|
1237
1350
|
elif mode=="qq":
|
|
1238
1351
|
fig, ax2 = plt.subplots(1, 1,**fig_args)
|
|
1239
1352
|
ax1=None
|
|
1240
1353
|
ax3=None
|
|
1354
|
+
|
|
1241
1355
|
elif mode=="r":
|
|
1242
1356
|
if figax is not None:
|
|
1243
1357
|
fig = figax[0]
|
|
@@ -1257,4 +1371,6 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
|
|
|
1257
1371
|
ax3 = None
|
|
1258
1372
|
else:
|
|
1259
1373
|
raise ValueError("Please select one from the 5 modes: mqq/qqm/m/qq/r/b")
|
|
1260
|
-
|
|
1374
|
+
ax4=None
|
|
1375
|
+
cbar=None
|
|
1376
|
+
return fig, ax1, ax2, ax3, ax4, cbar
|