gwaslab 3.4.36__py3-none-any.whl → 3.4.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (42) hide show
  1. gwaslab/__init__.py +1 -1
  2. gwaslab/data/formatbook.json +722 -721
  3. gwaslab/g_Log.py +8 -0
  4. gwaslab/g_Sumstats.py +80 -178
  5. gwaslab/g_SumstatsPair.py +6 -2
  6. gwaslab/g_Sumstats_summary.py +3 -3
  7. gwaslab/g_meta.py +13 -3
  8. gwaslab/g_version.py +2 -2
  9. gwaslab/hm_casting.py +29 -15
  10. gwaslab/hm_harmonize_sumstats.py +312 -159
  11. gwaslab/hm_rsid_to_chrpos.py +1 -1
  12. gwaslab/io_preformat_input.py +46 -37
  13. gwaslab/io_to_formats.py +428 -295
  14. gwaslab/qc_check_datatype.py +15 -1
  15. gwaslab/qc_fix_sumstats.py +956 -719
  16. gwaslab/util_ex_calculate_ldmatrix.py +29 -11
  17. gwaslab/util_ex_gwascatalog.py +1 -1
  18. gwaslab/util_ex_ldproxyfinder.py +1 -1
  19. gwaslab/util_ex_process_h5.py +26 -17
  20. gwaslab/util_ex_process_ref.py +3 -3
  21. gwaslab/util_ex_run_coloc.py +26 -4
  22. gwaslab/util_in_convert_h2.py +1 -1
  23. gwaslab/util_in_fill_data.py +44 -5
  24. gwaslab/util_in_filter_value.py +122 -34
  25. gwaslab/util_in_get_density.py +2 -2
  26. gwaslab/util_in_get_sig.py +41 -9
  27. gwaslab/viz_aux_quickfix.py +26 -21
  28. gwaslab/viz_aux_reposition_text.py +7 -4
  29. gwaslab/viz_aux_save_figure.py +6 -5
  30. gwaslab/viz_plot_compare_af.py +5 -5
  31. gwaslab/viz_plot_compare_effect.py +22 -5
  32. gwaslab/viz_plot_miamiplot2.py +28 -20
  33. gwaslab/viz_plot_mqqplot.py +214 -98
  34. gwaslab/viz_plot_qqplot.py +11 -8
  35. gwaslab/viz_plot_regionalplot.py +16 -9
  36. gwaslab/viz_plot_trumpetplot.py +15 -6
  37. {gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/METADATA +3 -3
  38. gwaslab-3.4.38.dist-info/RECORD +72 -0
  39. gwaslab-3.4.36.dist-info/RECORD +0 -72
  40. {gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/LICENSE +0 -0
  41. {gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/WHEEL +0 -0
  42. {gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/top_level.txt +0 -0
@@ -104,6 +104,15 @@ def mqqplot(insumstats,
104
104
  region_protein_coding = True,
105
105
  region_flank_factor = 0.05,
106
106
  region_anno_bbox_args = None,
107
+ cbar_title='LD $r^{2}$',
108
+ cbar_fontsize = None,
109
+ cbar_font_family = None,
110
+ track_n=4,
111
+ track_n_offset=0,
112
+ track_fontsize_ratio=0.95,
113
+ track_exon_ratio=1,
114
+ track_text_offset=1,
115
+ track_font_family = None,
107
116
  taf = None,
108
117
  # track_n, track_n_offset,font_ratio,exon_ratio,text_offset
109
118
  tabix=None,
@@ -204,7 +213,6 @@ def mqqplot(insumstats,
204
213
  chr_dict = get_chr_to_number()
205
214
  if xtick_chr_dict is None:
206
215
  xtick_chr_dict = get_number_to_chr()
207
-
208
216
  if gtf_chr_dict is None:
209
217
  gtf_chr_dict = get_number_to_chr()
210
218
  if rr_chr_dict is None:
@@ -243,8 +251,14 @@ def mqqplot(insumstats,
243
251
  region_ld_colors2 = ["#E4E4E4","#D8E2F2","#AFCBE3","#86B3D4","#5D98C4","#367EB7","#367EB7"]
244
252
  if region_title_args is None:
245
253
  region_title_args = {"size":10}
254
+ if cbar_fontsize is None:
255
+ cbar_fontsize = fontsize
256
+ if cbar_font_family is None:
257
+ cbar_font_family = font_family
258
+ if track_font_family is None:
259
+ track_font_family = font_family
246
260
  if taf is None:
247
- taf = [4,0,0.95,1,1]
261
+ taf = [track_n,track_n_offset,track_fontsize_ratio,track_exon_ratio,track_text_offset]
248
262
  if maf_bins is None:
249
263
  maf_bins=[(0, 0.01), (0.01, 0.05), (0.05, 0.25),(0.25,0.5)]
250
264
  if maf_bin_colors is None:
@@ -289,40 +303,42 @@ def mqqplot(insumstats,
289
303
  scatter_args["rasterized"]=True
290
304
  qq_scatter_args["rasterized"]=True
291
305
 
292
- if verbose: log.write("Start to plot manhattan/qq plot with the following basic settings {}:".format(_get_version()))
293
- if verbose: log.write(" -Genomic coordinates version: {}...".format(build))
306
+ log.write("Start to create MQQ plot...{}:".format(_get_version()),verbose=verbose)
307
+ log.write(" -Genomic coordinates version: {}...".format(build),verbose=verbose)
294
308
  if build is None or build=="99":
295
- if verbose: log.write(" -WARNING: Genomic coordinates version is unknown...")
296
- if verbose: log.write(" -Genome-wide significance level to plot is set to "+str(sig_level_plot)+" ...")
297
- if verbose: log.write(" -Raw input contains "+str(len(insumstats))+" variants...")
298
- if verbose: log.write(" -Plot layout mode is : "+mode)
309
+ log.warning("Genomic coordinates version is unknown.")
310
+ log.write(" -Genome-wide significance level to plot is set to "+str(sig_level_plot)+" ...",verbose=verbose)
311
+ log.write(" -Raw input contains "+str(len(insumstats))+" variants...",verbose=verbose)
312
+ log.write(" -MQQ plot layout mode is : "+mode,verbose=verbose)
313
+
299
314
  if len(anno_set)>0 and ("m" in mode):
300
- if verbose: log.write(" -Variants to annotate : "+",".join(anno_set))
315
+ log.write(" -Variants to annotate : "+",".join(anno_set),verbose=verbose)
316
+
301
317
  if len(highlight)>0 and ("m" in mode):
302
318
  if pd.api.types.is_list_like(highlight[0]):
303
319
  if highlight_chrpos==False:
304
- if len(highlight[0]) == len(highlight_color):
305
- log.write(" -WARNING: number of locus list does not match number of colors !!!")
320
+ if len(highlight) != len(highlight_color):
321
+ log.warning("Number of locus groups in the list does not match number of provided colors.")
306
322
  for i, highlight_set in enumerate(highlight):
307
- if verbose: log.write(" -Set {} loci to highlight ({}) : ".format(i+1, highlight_color[i%len(highlight_color)])+",".join(highlight_set))
323
+ log.write(" -Set {} loci to highlight ({}) : ".format(i+1, highlight_color[i%len(highlight_color)])+",".join(highlight_set),verbose=verbose)
308
324
  else:
309
- if verbose: log.write(" -Loci to highlight ({}): {}".format(highlight_color,highlight))
310
- if verbose: log.write(" -Highlight_window is set to: ", highlight_windowkb, " kb")
325
+ log.write(" -Loci to highlight ({}): {}".format(highlight_color,highlight),verbose=verbose)
326
+ log.write(" -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
311
327
  else:
312
- if verbose: log.write(" -Loci to highlight ({}): ".format(highlight_color)+",".join(highlight))
313
- if verbose: log.write(" -Highlight_window is set to: ", highlight_windowkb, " kb")
328
+ log.write(" -Loci to highlight ({}): ".format(highlight_color)+",".join(highlight),verbose=verbose)
329
+ log.write(" -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
314
330
 
315
331
  if len(pinpoint)>0 :
316
332
  if pd.api.types.is_list_like(pinpoint[0]):
317
- if len(pinpoint[0]) == len(pinpoint_color):
318
- log.write(" -WARNING: number of variant list does not match number of colors !!!")
333
+ if len(pinpoint) != len(pinpoint_color):
334
+ log.warning("Number of variant groups in the list does not match number of provided colors.")
319
335
  for i, pinpoint_set in enumerate(pinpoint):
320
- if verbose: log.write(" -Set {} variants to pinpoint ({}) : ".format(i+1,pinpoint_color[i%len(pinpoint_color)])+",".join(pinpoint_set))
336
+ log.write(" -Set {} variants to pinpoint ({}) : ".format(i+1,pinpoint_color[i%len(pinpoint_color)])+",".join(pinpoint_set),verbose=verbose)
321
337
  else:
322
- if verbose: log.write(" -Variants to pinpoint ({}) : ".format(pinpoint_color)+",".join(pinpoint))
338
+ log.write(" -Variants to pinpoint ({}) : ".format(pinpoint_color)+",".join(pinpoint),verbose=verbose)
323
339
 
324
340
  if region is not None:
325
- if verbose: log.write(" -Region to plot : chr"+str(region[0])+":"+str(region[1])+"-"+str(region[2])+".")
341
+ log.write(" -Region to plot : chr"+str(region[0])+":"+str(region[1])+"-"+str(region[2])+".",verbose=verbose)
326
342
 
327
343
  # construct line series for coversion
328
344
  if additional_line is None:
@@ -340,13 +356,14 @@ def mqqplot(insumstats,
340
356
  # ax2 : qq plot
341
357
  # ax3 : gene track
342
358
  # ax4 : recombination rate
359
+ # cbar : color bar
343
360
  # ax5 : miami plot lower panel
344
361
 
345
362
  # "m" : Manhattan plot
346
363
  # "qq": QQ plot
347
364
  # "r" : regional plot
348
365
 
349
- fig, ax1, ax2, ax3 = _process_layout(mode=mode,
366
+ fig, ax1, ax2, ax3, ax4, cbar = _process_layout(mode=mode,
350
367
  figax=figax,
351
368
  fig_args=fig_args,
352
369
  mqqratio=mqqratio,
@@ -383,7 +400,7 @@ def mqqplot(insumstats,
383
400
  pinpoint=pinpoint,
384
401
  density_color=density_color)
385
402
 
386
- sumstats = insumstats.loc[:,usecols].copy()
403
+ sumstats = insumstats[usecols].copy()
387
404
 
388
405
  #################################################################################################
389
406
 
@@ -392,7 +409,7 @@ def mqqplot(insumstats,
392
409
  if (anno == "GENENAME"):
393
410
  anno_sig=True
394
411
  elif (anno is not None) and (anno is not True):
395
- sumstats["Annotation"]=sumstats.loc[:,anno].astype("string")
412
+ sumstats["Annotation"]=sumstats[anno].astype("string")
396
413
 
397
414
  ## P value
398
415
  ## m, qq, r
@@ -416,15 +433,15 @@ def mqqplot(insumstats,
416
433
  region_start = region[1]
417
434
  region_end = region[2]
418
435
  marker_size=(25,45)
419
- if verbose:log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]))
436
+ log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]),verbose=verbose)
420
437
 
421
- in_region_snp = (sumstats[chrom]==region_chr) &(sumstats[pos]<region_end) &(sumstats[pos]>region_start)
438
+ in_region_snp = (sumstats[chrom]==region_chr) & (sumstats[pos]<region_end) & (sumstats[pos]>region_start)
422
439
 
423
- if verbose:log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)))
440
+ log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)),verbose=verbose)
424
441
  sumstats = sumstats.loc[in_region_snp,:]
425
442
 
426
443
  if len(sumstats)==0:
427
- log.write(" -Warning : No valid data! Please check the input.")
444
+ log.warning("No valid data! Please check the input.")
428
445
  return None
429
446
 
430
447
  ## EAF
@@ -438,11 +455,11 @@ def mqqplot(insumstats,
438
455
  sumstats["HUE"] = pd.NA
439
456
  sumstats["HUE"] = sumstats["HUE"].astype("Int64")
440
457
 
441
- if verbose: log.write("Finished loading specified columns from the sumstats.")
458
+ log.write("Finished loading specified columns from the sumstats.",verbose=verbose)
442
459
 
443
460
 
444
461
  #sanity check############################################################################################################
445
- log.write("Start conversion and sanity check:",verbose=verbose)
462
+ log.write("Start data conversion and sanity check:",verbose=verbose)
446
463
 
447
464
  if _if_quick_qc == False:
448
465
  log.write(" -Sanity check will be skipped.", verbose=verbose)
@@ -511,15 +528,19 @@ def mqqplot(insumstats,
511
528
  lines_to_plot=lines_to_plot,
512
529
  log = log)
513
530
  except:
514
- log.write(" -Warning : No valid data! Please check the input.")
531
+ log.warning("No valid data! Please check the input.")
515
532
  return None
516
533
 
534
+ log.write("Finished data conversion and sanity check.",verbose=verbose)
535
+
517
536
  # Manhattan plot ##########################################################################################################
537
+ log.write("Start to create MQQ plot with "+str(len(sumstats))+" variants...",verbose=verbose)
518
538
  ## regional plot ->rsq
519
539
  #calculate rsq]
520
540
  if vcf_path is not None:
521
541
  if tabix is None:
522
542
  tabix = which("tabix")
543
+ log.write(" -tabix will be used: {}".format(tabix),verbose=verbose)
523
544
  sumstats = process_vcf(sumstats=sumstats,
524
545
  vcf_path=vcf_path,
525
546
  region=region,
@@ -552,8 +573,6 @@ def mqqplot(insumstats,
552
573
 
553
574
  if vcf_path is not None:
554
575
  sumstats["chr_hue"]=sumstats["LD"]
555
-
556
- if verbose:log.write("Start to create manhattan plot with "+str(len(sumstats))+" variants:")
557
576
  ## default seetings
558
577
 
559
578
  palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
@@ -585,6 +604,7 @@ def mqqplot(insumstats,
585
604
  ## if highlight
586
605
  highlight_i = pd.DataFrame()
587
606
  if len(highlight) >0:
607
+ log.write(" -Creating background plot...",verbose=verbose)
588
608
  plot = sns.scatterplot(data=sumstats, x='i', y='scaled_P',
589
609
  hue='chr_hue',
590
610
  palette=palette,
@@ -596,8 +616,7 @@ def mqqplot(insumstats,
596
616
  zorder=2,ax=ax1,edgecolor=edgecolor, **scatter_args)
597
617
  if pd.api.types.is_list_like(highlight[0]) and highlight_chrpos==False:
598
618
  for i, highlight_set in enumerate(highlight):
599
- if verbose: log.write(" -Highlighting set {} target loci...".format(i+1))
600
- print(sumstats["HUE"].dtype)
619
+ log.write(" -Highlighting set {} target loci...".format(i+1),verbose=verbose)
601
620
  sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==i], x='i', y='scaled_P',
602
621
  hue="HUE",
603
622
  palette={i:highlight_color[i%len(highlight_color)]},
@@ -609,7 +628,7 @@ def mqqplot(insumstats,
609
628
  zorder=3+i,ax=ax1,edgecolor=edgecolor,**scatter_args)
610
629
  highlight_i = sumstats.loc[~sumstats["HUE"].isna(),"i"].values
611
630
  else:
612
- if verbose: log.write(" -Highlighting target loci...")
631
+ log.write(" -Highlighting target loci...",verbose=verbose)
613
632
  sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==0], x='i', y='scaled_P',
614
633
  hue="HUE",
615
634
  palette={0:highlight_color},
@@ -658,6 +677,7 @@ def mqqplot(insumstats,
658
677
  hue = 'chr_hue'
659
678
  hue_norm=None
660
679
  to_plot = sumstats
680
+ log.write(" -Creating background plot...",verbose=verbose)
661
681
  plot = sns.scatterplot(data=to_plot, x='i', y='scaled_P',
662
682
  hue=hue,
663
683
  palette= palette,
@@ -677,17 +697,17 @@ def mqqplot(insumstats,
677
697
  for i, pinpoint_set in enumerate(pinpoint):
678
698
  if sum(sumstats[snpid].isin(pinpoint_set))>0:
679
699
  to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint_set),:]
680
- if verbose: log.write(" -Pinpointing set {} target vairants...".format(i+1))
700
+ log.write(" -Pinpointing set {} target vairants...".format(i+1),verbose=verbose)
681
701
  ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color[i%len(pinpoint_color)],zorder=100,s=marker_size[1]+1)
682
702
  else:
683
- if verbose: log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...")
703
+ log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
684
704
  else:
685
705
  if sum(sumstats[snpid].isin(pinpoint))>0:
686
706
  to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint),:]
687
- if verbose: log.write(" -Pinpointing target vairants...")
707
+ log.write(" -Pinpointing target vairants...",verbose=verbose)
688
708
  ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color,zorder=100,s=marker_size[1]+1)
689
709
  else:
690
- if verbose: log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...")
710
+ log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
691
711
 
692
712
 
693
713
 
@@ -697,7 +717,7 @@ def mqqplot(insumstats,
697
717
  # if regional plot : pinpoint lead , add color bar ##################################################
698
718
  if (region is not None) and ("r" in mode):
699
719
 
700
- ax1, ax3, lead_snp_i, lead_snp_i2 =_plot_regional(
720
+ ax1, ax3, ax4, cbar, lead_snp_i, lead_snp_i2 =_plot_regional(
701
721
  sumstats=sumstats,
702
722
  fig=fig,
703
723
  ax1=ax1,
@@ -738,6 +758,7 @@ def mqqplot(insumstats,
738
758
  region_recombination = region_recombination,
739
759
  region_protein_coding=region_protein_coding,
740
760
  region_flank_factor =region_flank_factor,
761
+ track_font_family=track_font_family,
741
762
  taf=taf,
742
763
  tabix=tabix,
743
764
  chrom=chrom,
@@ -745,50 +766,20 @@ def mqqplot(insumstats,
745
766
  verbose=verbose,
746
767
  log=log
747
768
  )
769
+
748
770
  else:
749
771
  lead_snp_i= None
750
772
  lead_snp_i2=None
751
-
752
- if region is None:
753
- ax1 = _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family)
754
-
755
- # genomewide significant line
756
- ax1 = _process_line(ax1,
757
- sig_line,
758
- suggestive_sig_line,
759
- additional_line,
760
- lines_to_plot ,
761
- sc_linewidth,
762
- sig_line_color,
763
- suggestive_sig_line_color,
764
- additional_line_color,
765
- mode,
766
- bmean,
767
- bmedian )
768
773
 
769
- ax1 = _set_yticklabels(cut=cut,
770
- cutfactor=cutfactor,
771
- cut_log=cut_log,
772
- ax1=ax1,
773
- skip=skip,
774
- maxy=maxy,
775
- maxticker=maxticker,
776
- ystep=ystep,
777
- sc_linewidth=sc_linewidth,
778
- cut_line_color=cut_line_color,
779
- fontsize=fontsize,
780
- font_family=font_family,
781
- ytick3=ytick3,
782
- ylabels=ylabels,
783
- ylabels_converted=ylabels_converted
784
- )
774
+ log.write("Finished creating MQQ plot successfully!",verbose=verbose)
785
775
 
786
776
  # Get top variants for annotation #######################################################
777
+ log.write("Start to extract variants for annotation...",verbose=verbose)
787
778
  if (anno and anno!=True) or (len(anno_set)>0):
788
779
  if len(anno_set)>0:
789
780
  to_annotate=sumstats.loc[sumstats[snpid].isin(anno_set),:]
790
781
  if to_annotate.empty is not True:
791
- if verbose: log.write(" -Found "+str(len(to_annotate))+" specified variants to annotate...")
782
+ log.write(" -Found "+str(len(to_annotate))+" specified variants to annotate...",verbose=verbose)
792
783
  else:
793
784
  to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
794
785
  snpid,
@@ -801,7 +792,7 @@ def mqqplot(insumstats,
801
792
  mlog10p="scaled_P",
802
793
  verbose=False)
803
794
  if (to_annotate.empty is not True) and ("b" not in mode):
804
- if verbose: log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...")
795
+ log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
805
796
  else:
806
797
  to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
807
798
  "i",
@@ -814,7 +805,7 @@ def mqqplot(insumstats,
814
805
  mlog10p="scaled_P",
815
806
  sig_level=sig_level_lead)
816
807
  if (to_annotate.empty is not True) and ("b" not in mode):
817
- if verbose: log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...")
808
+ log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
818
809
  if (to_annotate.empty is not True) and anno=="GENENAME":
819
810
  to_annotate = annogene(to_annotate,
820
811
  id=snpid,
@@ -824,21 +815,105 @@ def mqqplot(insumstats,
824
815
  build=build,
825
816
  source=anno_source,
826
817
  verbose=verbose).rename(columns={"GENE":"Annotation"})
818
+ log.write("Finished extracting variants for annotation...",verbose=verbose)
827
819
 
828
820
  # Configure X, Y axes #######################################################
829
- ax1 = _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family)
830
- ax1, ax3 = _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=ax3 )
821
+ log.write("Start to process figure arts.",verbose=verbose)
822
+ if region is None:
823
+ # if Manhattan plot
824
+
825
+ ax1 = _process_xtick(ax1=ax1,
826
+ chrom_df=chrom_df,
827
+ xtick_chr_dict=xtick_chr_dict,
828
+ fontsize = fontsize,
829
+ font_family=font_family,
830
+ log=log,
831
+ verbose=verbose)
832
+
833
+ ax1, ax3 = _process_xlabel(region=region,
834
+ xlabel=xlabel,
835
+ ax1=ax1,
836
+ gtf_path=gtf_path,
837
+ mode=mode,
838
+ fontsize=fontsize,
839
+ font_family=font_family,
840
+ ax3=ax3,
841
+ log=log,
842
+ verbose=verbose)
843
+
844
+ ax1, ax4 = _process_ylabel(ylabel=ylabel,
845
+ ax1=ax1,
846
+ mode=mode,
847
+ bwindowsizekb=bwindowsizekb,
848
+ fontsize=fontsize,
849
+ font_family=font_family,
850
+ ax4=ax4,
851
+ log=log,
852
+ verbose=verbose)
853
+
854
+
855
+ ax1 = _set_yticklabels(cut=cut,
856
+ cutfactor=cutfactor,
857
+ cut_log=cut_log,
858
+ ax1=ax1,
859
+ skip=skip,
860
+ maxy=maxy,
861
+ maxticker=maxticker,
862
+ ystep=ystep,
863
+ sc_linewidth=sc_linewidth,
864
+ cut_line_color=cut_line_color,
865
+ fontsize=fontsize,
866
+ font_family=font_family,
867
+ ytick3=ytick3,
868
+ ylabels=ylabels,
869
+ ylabels_converted=ylabels_converted,
870
+ log=log,
871
+ verbose=verbose)
872
+
873
+ ax1, ax4 = _process_ytick(ax1=ax1,
874
+ fontsize=fontsize,
875
+ font_family=font_family,
876
+ ax4=ax4,
877
+ log=log,
878
+ verbose=verbose)
879
+
880
+ # regional plot cbar
881
+ if cbar is not None:
882
+ cbar = _process_cbar(cbar,
883
+ cbar_fontsize=fontsize,
884
+ cbar_font_family=font_family,
885
+ cbar_title=cbar_title,
886
+ log=log,
887
+ verbose=verbose)
888
+
831
889
  ax1 = _process_spine(ax1, mode)
832
890
 
833
- if verbose: log.write("Finished creating Manhattan plot successfully!")
891
+ # genomewide significant line
892
+ ax1 = _process_line(ax1,
893
+ sig_line,
894
+ suggestive_sig_line,
895
+ additional_line,
896
+ lines_to_plot ,
897
+ sc_linewidth,
898
+ sig_line_color,
899
+ suggestive_sig_line_color,
900
+ additional_line_color,
901
+ mode,
902
+ bmean,
903
+ bmedian,
904
+ log=log,
905
+ verbose=verbose )
906
+
834
907
 
835
908
  if mtitle and anno and len(to_annotate)>0:
836
909
  pad=(ax1.transData.transform((skip, title_pad*maxy))[1]-ax1.transData.transform((skip, maxy)))[1]
837
910
  ax1.set_title(mtitle,pad=pad,fontsize=title_fontsize,family=font_family)
838
911
  elif mtitle:
839
912
  ax1.set_title(mtitle,fontsize=title_fontsize,family=font_family)
840
-
913
+ log.write("Finished processing figure arts.",verbose=verbose)
914
+
841
915
  # Add annotation arrows and texts
916
+ log.write("Start to annotate variants...",verbose=verbose)
842
917
  ax1 = annotate_single(
843
918
  sumstats=sumstats,
844
919
  anno=anno,
@@ -872,7 +947,8 @@ def mqqplot(insumstats,
872
947
  log=log,
873
948
  _invert=_invert
874
949
  )
875
- # Manhatann plot Finished #####################################################################
950
+ log.write("Finished annotating variants.",verbose=verbose)
951
+ # Manhatann-like plot Finished #####################################################################
876
952
 
877
953
  # QQ plot #########################################################################################################
878
954
  if "qq" in mode:
@@ -916,9 +992,9 @@ def mqqplot(insumstats,
916
992
 
917
993
  # Y axis jagged
918
994
  if jagged==True:
919
- ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
995
+ ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
920
996
  if "qq" in mode:
921
- ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
997
+ ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
922
998
 
923
999
  # XY lim
924
1000
  if ylim is not None:
@@ -943,6 +1019,8 @@ def mqqplot(insumstats,
943
1019
  # Return matplotlib figure object #######################################################################################
944
1020
  if _get_region_lead==True:
945
1021
  return fig, log, lead_snp_i, lead_snp_i2
1022
+
1023
+ log.write("Finished creating plot successfully!",verbose=verbose)
946
1024
  return fig, log
947
1025
 
948
1026
  ##############################################################################################################################################################################
@@ -1037,22 +1115,22 @@ def _sanity_check(sumstats, mode, chrom, pos, stratified, _if_quick_qc, log, ver
1037
1115
  #sanity check : drop variants with na values in chr and pos df
1038
1116
  sumstats = sumstats.dropna(subset=[chrom,pos])
1039
1117
  after_number=len(sumstats)
1040
- if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in CHR or POS column ...")
1118
+ log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in CHR or POS column ...",verbose=verbose)
1041
1119
  out_of_range_chr = sumstats[chrom]<=0
1042
- if verbose:log.write(" -Removed {} variants with CHR <=0...".format(sum(out_of_range_chr)))
1120
+ log.write(" -Removed {} variants with CHR <=0...".format(sum(out_of_range_chr)),verbose=verbose)
1043
1121
  sumstats = sumstats.loc[~out_of_range_chr,:]
1044
1122
 
1045
1123
  if stratified is True and _if_quick_qc:
1046
1124
  pre_number=len(sumstats)
1047
1125
  sumstats = sumstats.dropna(subset=["MAF"])
1048
1126
  after_number=len(sumstats)
1049
- if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in EAF column ...")
1127
+ log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in EAF column ...",verbose=verbose)
1050
1128
 
1051
1129
  if "b" not in mode and _if_quick_qc:
1052
1130
  pre_number=len(sumstats)
1053
1131
  sumstats = sumstats.dropna(subset=["raw_P"])
1054
1132
  after_number=len(sumstats)
1055
- if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in P column ...")
1133
+ log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in P column ...",verbose=verbose)
1056
1134
  return sumstats
1057
1135
 
1058
1136
  def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
@@ -1061,7 +1139,7 @@ def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
1061
1139
  sumstats["scaled_P"] = sumstats["DENSITY"].copy()
1062
1140
  sumstats["raw_P"] = -np.log10(sumstats["DENSITY"].copy()+2)
1063
1141
  elif scaled is True:
1064
- if verbose:log.write(" -P values are already converted to -log10(P)!")
1142
+ log.write(" -P values are already converted to -log10(P)!",verbose=verbose)
1065
1143
  sumstats["scaled_P"] = sumstats["raw_P"].copy()
1066
1144
  sumstats["raw_P"] = np.power(10,-sumstats["scaled_P"].astype("float64"))
1067
1145
  else:
@@ -1109,7 +1187,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
1109
1187
  sumstats.loc[right_chr&up_pos&low_pos,"HUE"]=0
1110
1188
  else:
1111
1189
  # highlight for one set
1112
- # to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
1190
+ to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
1113
1191
  #assign colors: 0 is hightlight color
1114
1192
  for index,row in to_highlight.iterrows():
1115
1193
  target_chr = int(row[chrom])
@@ -1122,7 +1200,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
1122
1200
 
1123
1201
  def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
1124
1202
  if "b" in mode and "DENSITY" not in sumstats.columns:
1125
- if verbose:log.write(" -Calculating DENSITY with windowsize of ",bwindowsizekb ," kb")
1203
+ log.write(" -Calculating DENSITY with windowsize of ",bwindowsizekb ," kb",verbose=verbose)
1126
1204
  large_number = _get_largenumber(sumstats[pos].max(),log=log)
1127
1205
 
1128
1206
  stack=[]
@@ -1143,11 +1221,12 @@ def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
1143
1221
  elif "b" in mode and "DENSITY" in sumstats.columns:
1144
1222
  bmean=sumstats["DENSITY"].mean()
1145
1223
  bmedian=sumstats["DENSITY"].median()
1146
- if verbose:log.write(" -DENSITY column exists. Skipping calculation...")
1224
+ log.write(" -DENSITY column exists. Skipping calculation...",verbose=verbose)
1147
1225
  return sumstats, bmean, bmedian
1148
1226
 
1149
- def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian ):
1227
+ def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian , log=Log(),verbose=True):
1150
1228
  # genomewide significant line
1229
+ log.write(" -Processing lines...",verbose=verbose)
1151
1230
  if sig_line is True:
1152
1231
  sigline = ax1.axhline(y=lines_to_plot[0],
1153
1232
  linewidth = sc_linewidth,
@@ -1173,12 +1252,39 @@ def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_
1173
1252
  medianline = ax1.axhline(y=bmedian, linewidth = sc_linewidth,linestyle="--",color=sig_line_color,zorder=1000)
1174
1253
  return ax1
1175
1254
 
1176
- def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family):
1255
+ def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title, log=Log(),verbose=True):
1256
+ log.write(" -Processing color bar...",verbose=verbose)
1257
+ if type(cbar) == list:
1258
+ for cbar_single in cbar:
1259
+ cbar_yticklabels = cbar_single.ax.get_yticklabels()
1260
+ cbar_single.ax.set_yticklabels(cbar_yticklabels, fontsize=cbar_fontsize, family=cbar_font_family )
1261
+ cbar_single.ax.set_title(cbar_title, fontsize=cbar_fontsize, family=cbar_font_family, loc="center",y=-0.2 )
1262
+ else:
1263
+ cbar_yticklabels = cbar.ax.get_yticklabels()
1264
+ cbar.ax.set_yticklabels(cbar_yticklabels, fontsize=cbar_fontsize, family=cbar_font_family )
1265
+ cbar.ax.set_title(cbar_title, fontsize=cbar_fontsize, family=cbar_font_family, loc="center",y=-0.2 )
1266
+ return cbar
1267
+
1268
+ def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log(),verbose=True):
1269
+ log.write(" -Processing X ticks...",verbose=verbose)
1177
1270
  ax1.set_xticks(chrom_df.astype("float64"))
1178
1271
  ax1.set_xticklabels(chrom_df.index.astype("Int64").map(xtick_chr_dict),fontsize=fontsize,family=font_family)
1179
1272
  return ax1
1180
1273
 
1181
- def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None ):
1274
+ def _process_ytick(ax1, fontsize, font_family, ax4, log=Log(),verbose=True):
1275
+ log.write(" -Processing Y labels...",verbose=verbose)
1276
+ ax1_yticklabels = ax1.get_yticklabels()
1277
+ #ax1.set_yticklabels(ax1_yticklabels,fontsize=fontsize,family=font_family)
1278
+ ax1_yticks = ax1.get_yticks()
1279
+ ax1.set_yticks(ax1_yticks,ax1_yticklabels,fontsize=fontsize,family=font_family)
1280
+ if ax4 is not None:
1281
+ ax4_yticklabels = ax4.get_yticklabels()
1282
+ ax4_yticks = ax4.get_yticks()
1283
+ ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
1284
+ return ax1, ax4
1285
+
1286
+ def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None , log=Log(),verbose=True):
1287
+ log.write(" -Processing X labels...",verbose=verbose)
1182
1288
  if region is not None:
1183
1289
  if xlabel is None:
1184
1290
  xlabel = "Chromosome "+str(region[0])+" (MB)"
@@ -1192,7 +1298,8 @@ def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family,
1192
1298
  ax1.set_xlabel(xlabel,fontsize=fontsize,family=font_family)
1193
1299
  return ax1, ax3
1194
1300
 
1195
- def _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family):
1301
+ def _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family, ax4=None, log=Log(),verbose=True):
1302
+ log.write(" -Processing Y labels...",verbose=verbose)
1196
1303
  if "b" in mode:
1197
1304
  if ylabel is None:
1198
1305
  ylabel ="Density of GWAS \n SNPs within "+str(bwindowsizekb)+" kb"
@@ -1201,7 +1308,10 @@ def _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family):
1201
1308
  if ylabel is None:
1202
1309
  ylabel ="$-log_{10}(P)$"
1203
1310
  ax1.set_ylabel(ylabel,fontsize=fontsize,family=font_family)
1204
- return ax1
1311
+ if ax4 is not None:
1312
+ ax4_ylabel = ax4.get_ylabel()
1313
+ ax4.set_ylabel(ax4_ylabel, fontsize=fontsize, family=font_family )
1314
+ return ax1, ax4
1205
1315
 
1206
1316
  def _process_spine(ax1, mode):
1207
1317
  ax1.spines["top"].set_visible(False)
@@ -1218,6 +1328,7 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
1218
1328
  if mode=="qqm":
1219
1329
  fig, (ax2, ax1) = plt.subplots(1, 2,gridspec_kw={'width_ratios': [1, mqqratio]},**fig_args)
1220
1330
  ax3 = None
1331
+
1221
1332
  elif mode=="mqq":
1222
1333
  if figax is not None:
1223
1334
  fig = figax[0]
@@ -1226,6 +1337,7 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
1226
1337
  else:
1227
1338
  fig, (ax1, ax2) = plt.subplots(1, 2,gridspec_kw={'width_ratios': [mqqratio, 1]},**fig_args)
1228
1339
  ax3 = None
1340
+
1229
1341
  elif mode=="m":
1230
1342
  if figax is not None:
1231
1343
  fig = figax[0]
@@ -1234,10 +1346,12 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
1234
1346
  fig, ax1 = plt.subplots(1, 1,**fig_args)
1235
1347
  ax2 = None
1236
1348
  ax3 = None
1349
+
1237
1350
  elif mode=="qq":
1238
1351
  fig, ax2 = plt.subplots(1, 1,**fig_args)
1239
1352
  ax1=None
1240
1353
  ax3=None
1354
+
1241
1355
  elif mode=="r":
1242
1356
  if figax is not None:
1243
1357
  fig = figax[0]
@@ -1257,4 +1371,6 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
1257
1371
  ax3 = None
1258
1372
  else:
1259
1373
  raise ValueError("Please select one from the 5 modes: mqq/qqm/m/qq/r/b")
1260
- return fig, ax1, ax2, ax3
1374
+ ax4=None
1375
+ cbar=None
1376
+ return fig, ax1, ax2, ax3, ax4, cbar