gwaslab 3.4.37__py3-none-any.whl → 3.4.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (37) hide show
  1. gwaslab/data/formatbook.json +722 -721
  2. gwaslab/g_Log.py +8 -0
  3. gwaslab/g_Sumstats.py +26 -147
  4. gwaslab/g_SumstatsPair.py +6 -2
  5. gwaslab/g_Sumstats_summary.py +3 -3
  6. gwaslab/g_version.py +2 -2
  7. gwaslab/hm_casting.py +29 -15
  8. gwaslab/hm_harmonize_sumstats.py +291 -163
  9. gwaslab/hm_rsid_to_chrpos.py +1 -1
  10. gwaslab/io_preformat_input.py +43 -37
  11. gwaslab/io_to_formats.py +428 -295
  12. gwaslab/qc_check_datatype.py +3 -3
  13. gwaslab/qc_fix_sumstats.py +793 -682
  14. gwaslab/util_ex_calculate_ldmatrix.py +29 -11
  15. gwaslab/util_ex_gwascatalog.py +1 -1
  16. gwaslab/util_ex_ldproxyfinder.py +1 -1
  17. gwaslab/util_ex_process_ref.py +3 -3
  18. gwaslab/util_ex_run_coloc.py +26 -4
  19. gwaslab/util_in_convert_h2.py +1 -1
  20. gwaslab/util_in_fill_data.py +2 -2
  21. gwaslab/util_in_filter_value.py +122 -34
  22. gwaslab/util_in_get_density.py +2 -2
  23. gwaslab/util_in_get_sig.py +41 -9
  24. gwaslab/viz_aux_quickfix.py +24 -19
  25. gwaslab/viz_aux_reposition_text.py +7 -4
  26. gwaslab/viz_aux_save_figure.py +6 -5
  27. gwaslab/viz_plot_compare_af.py +5 -5
  28. gwaslab/viz_plot_miamiplot2.py +28 -20
  29. gwaslab/viz_plot_mqqplot.py +109 -72
  30. gwaslab/viz_plot_qqplot.py +11 -8
  31. gwaslab/viz_plot_regionalplot.py +3 -1
  32. gwaslab/viz_plot_trumpetplot.py +15 -6
  33. {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/METADATA +2 -2
  34. {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/RECORD +37 -37
  35. {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/LICENSE +0 -0
  36. {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/WHEEL +0 -0
  37. {gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/top_level.txt +0 -0
@@ -213,7 +213,6 @@ def mqqplot(insumstats,
213
213
  chr_dict = get_chr_to_number()
214
214
  if xtick_chr_dict is None:
215
215
  xtick_chr_dict = get_number_to_chr()
216
-
217
216
  if gtf_chr_dict is None:
218
217
  gtf_chr_dict = get_number_to_chr()
219
218
  if rr_chr_dict is None:
@@ -304,40 +303,42 @@ def mqqplot(insumstats,
304
303
  scatter_args["rasterized"]=True
305
304
  qq_scatter_args["rasterized"]=True
306
305
 
307
- if verbose: log.write("Start to create MQQ plot with the following basic settings {}:".format(_get_version()))
308
- if verbose: log.write(" -Genomic coordinates version: {}...".format(build))
306
+ log.write("Start to create MQQ plot...{}:".format(_get_version()),verbose=verbose)
307
+ log.write(" -Genomic coordinates version: {}...".format(build),verbose=verbose)
309
308
  if build is None or build=="99":
310
- if verbose: log.write(" -WARNING: Genomic coordinates version is unknown...")
311
- if verbose: log.write(" -Genome-wide significance level to plot is set to "+str(sig_level_plot)+" ...")
312
- if verbose: log.write(" -Raw input contains "+str(len(insumstats))+" variants...")
313
- if verbose: log.write(" -MQQ plot layout mode is : "+mode)
309
+ log.warning("Genomic coordinates version is unknown.")
310
+ log.write(" -Genome-wide significance level to plot is set to "+str(sig_level_plot)+" ...",verbose=verbose)
311
+ log.write(" -Raw input contains "+str(len(insumstats))+" variants...",verbose=verbose)
312
+ log.write(" -MQQ plot layout mode is : "+mode,verbose=verbose)
313
+
314
314
  if len(anno_set)>0 and ("m" in mode):
315
- if verbose: log.write(" -Variants to annotate : "+",".join(anno_set))
315
+ log.write(" -Variants to annotate : "+",".join(anno_set),verbose=verbose)
316
+
316
317
  if len(highlight)>0 and ("m" in mode):
317
318
  if pd.api.types.is_list_like(highlight[0]):
318
319
  if highlight_chrpos==False:
319
- if len(highlight[0]) == len(highlight_color):
320
- log.write(" -WARNING: number of locus list does not match number of colors !!!")
320
+ if len(highlight) != len(highlight_color):
321
+ log.warning("Number of locus groups in the list does not match number of provided colors.")
321
322
  for i, highlight_set in enumerate(highlight):
322
- if verbose: log.write(" -Set {} loci to highlight ({}) : ".format(i+1, highlight_color[i%len(highlight_color)])+",".join(highlight_set))
323
+ log.write(" -Set {} loci to highlight ({}) : ".format(i+1, highlight_color[i%len(highlight_color)])+",".join(highlight_set),verbose=verbose)
323
324
  else:
324
- if verbose: log.write(" -Loci to highlight ({}): {}".format(highlight_color,highlight))
325
- if verbose: log.write(" -Highlight_window is set to: ", highlight_windowkb, " kb")
325
+ log.write(" -Loci to highlight ({}): {}".format(highlight_color,highlight),verbose=verbose)
326
+ log.write(" -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
326
327
  else:
327
- if verbose: log.write(" -Loci to highlight ({}): ".format(highlight_color)+",".join(highlight))
328
- if verbose: log.write(" -Highlight_window is set to: ", highlight_windowkb, " kb")
328
+ log.write(" -Loci to highlight ({}): ".format(highlight_color)+",".join(highlight),verbose=verbose)
329
+ log.write(" -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
329
330
 
330
331
  if len(pinpoint)>0 :
331
332
  if pd.api.types.is_list_like(pinpoint[0]):
332
- if len(pinpoint[0]) == len(pinpoint_color):
333
- log.write(" -WARNING: number of variant list does not match number of colors !!!")
333
+ if len(pinpoint) != len(pinpoint_color):
334
+ log.warning("Number of variant groups in the list does not match number of provided colors.")
334
335
  for i, pinpoint_set in enumerate(pinpoint):
335
- if verbose: log.write(" -Set {} variants to pinpoint ({}) : ".format(i+1,pinpoint_color[i%len(pinpoint_color)])+",".join(pinpoint_set))
336
+ log.write(" -Set {} variants to pinpoint ({}) : ".format(i+1,pinpoint_color[i%len(pinpoint_color)])+",".join(pinpoint_set),verbose=verbose)
336
337
  else:
337
- if verbose: log.write(" -Variants to pinpoint ({}) : ".format(pinpoint_color)+",".join(pinpoint))
338
+ log.write(" -Variants to pinpoint ({}) : ".format(pinpoint_color)+",".join(pinpoint),verbose=verbose)
338
339
 
339
340
  if region is not None:
340
- if verbose: log.write(" -Region to plot : chr"+str(region[0])+":"+str(region[1])+"-"+str(region[2])+".")
341
+ log.write(" -Region to plot : chr"+str(region[0])+":"+str(region[1])+"-"+str(region[2])+".",verbose=verbose)
341
342
 
342
343
  # construct line series for coversion
343
344
  if additional_line is None:
@@ -399,7 +400,7 @@ def mqqplot(insumstats,
399
400
  pinpoint=pinpoint,
400
401
  density_color=density_color)
401
402
 
402
- sumstats = insumstats.loc[:,usecols].copy()
403
+ sumstats = insumstats[usecols].copy()
403
404
 
404
405
  #################################################################################################
405
406
 
@@ -408,7 +409,7 @@ def mqqplot(insumstats,
408
409
  if (anno == "GENENAME"):
409
410
  anno_sig=True
410
411
  elif (anno is not None) and (anno is not True):
411
- sumstats["Annotation"]=sumstats.loc[:,anno].astype("string")
412
+ sumstats["Annotation"]=sumstats[anno].astype("string")
412
413
 
413
414
  ## P value
414
415
  ## m, qq, r
@@ -432,15 +433,15 @@ def mqqplot(insumstats,
432
433
  region_start = region[1]
433
434
  region_end = region[2]
434
435
  marker_size=(25,45)
435
- if verbose:log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]))
436
+ log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]),verbose=verbose)
436
437
 
437
- in_region_snp = (sumstats[chrom]==region_chr) &(sumstats[pos]<region_end) &(sumstats[pos]>region_start)
438
+ in_region_snp = (sumstats[chrom]==region_chr) & (sumstats[pos]<region_end) & (sumstats[pos]>region_start)
438
439
 
439
- if verbose:log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)))
440
+ log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)),verbose=verbose)
440
441
  sumstats = sumstats.loc[in_region_snp,:]
441
442
 
442
443
  if len(sumstats)==0:
443
- log.write(" -Warning : No valid data! Please check the input.")
444
+ log.warning("No valid data! Please check the input.")
444
445
  return None
445
446
 
446
447
  ## EAF
@@ -454,11 +455,11 @@ def mqqplot(insumstats,
454
455
  sumstats["HUE"] = pd.NA
455
456
  sumstats["HUE"] = sumstats["HUE"].astype("Int64")
456
457
 
457
- if verbose: log.write("Finished loading specified columns from the sumstats.")
458
+ log.write("Finished loading specified columns from the sumstats.",verbose=verbose)
458
459
 
459
460
 
460
461
  #sanity check############################################################################################################
461
- log.write("Start conversion and sanity check:",verbose=verbose)
462
+ log.write("Start data conversion and sanity check:",verbose=verbose)
462
463
 
463
464
  if _if_quick_qc == False:
464
465
  log.write(" -Sanity check will be skipped.", verbose=verbose)
@@ -527,15 +528,19 @@ def mqqplot(insumstats,
527
528
  lines_to_plot=lines_to_plot,
528
529
  log = log)
529
530
  except:
530
- log.write(" -Warning : No valid data! Please check the input.")
531
+ log.warning("No valid data! Please check the input.")
531
532
  return None
532
533
 
534
+ log.write("Finished data conversion and sanity check.",verbose=verbose)
535
+
533
536
  # Manhattan plot ##########################################################################################################
537
+ log.write("Start to create MQQ plot with "+str(len(sumstats))+" variants...",verbose=verbose)
534
538
  ## regional plot ->rsq
535
539
  #calculate rsq]
536
540
  if vcf_path is not None:
537
541
  if tabix is None:
538
542
  tabix = which("tabix")
543
+ log.write(" -tabix will be used: {}".format(tabix),verbose=verbose)
539
544
  sumstats = process_vcf(sumstats=sumstats,
540
545
  vcf_path=vcf_path,
541
546
  region=region,
@@ -568,8 +573,6 @@ def mqqplot(insumstats,
568
573
 
569
574
  if vcf_path is not None:
570
575
  sumstats["chr_hue"]=sumstats["LD"]
571
-
572
- if verbose:log.write("Start to create MQQ plot with "+str(len(sumstats))+" variants:")
573
576
  ## default seetings
574
577
 
575
578
  palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
@@ -601,6 +604,7 @@ def mqqplot(insumstats,
601
604
  ## if highlight
602
605
  highlight_i = pd.DataFrame()
603
606
  if len(highlight) >0:
607
+ log.write(" -Creating background plot...",verbose=verbose)
604
608
  plot = sns.scatterplot(data=sumstats, x='i', y='scaled_P',
605
609
  hue='chr_hue',
606
610
  palette=palette,
@@ -612,8 +616,7 @@ def mqqplot(insumstats,
612
616
  zorder=2,ax=ax1,edgecolor=edgecolor, **scatter_args)
613
617
  if pd.api.types.is_list_like(highlight[0]) and highlight_chrpos==False:
614
618
  for i, highlight_set in enumerate(highlight):
615
- if verbose: log.write(" -Highlighting set {} target loci...".format(i+1))
616
- print(sumstats["HUE"].dtype)
619
+ log.write(" -Highlighting set {} target loci...".format(i+1),verbose=verbose)
617
620
  sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==i], x='i', y='scaled_P',
618
621
  hue="HUE",
619
622
  palette={i:highlight_color[i%len(highlight_color)]},
@@ -625,7 +628,7 @@ def mqqplot(insumstats,
625
628
  zorder=3+i,ax=ax1,edgecolor=edgecolor,**scatter_args)
626
629
  highlight_i = sumstats.loc[~sumstats["HUE"].isna(),"i"].values
627
630
  else:
628
- if verbose: log.write(" -Highlighting target loci...")
631
+ log.write(" -Highlighting target loci...",verbose=verbose)
629
632
  sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==0], x='i', y='scaled_P',
630
633
  hue="HUE",
631
634
  palette={0:highlight_color},
@@ -674,6 +677,7 @@ def mqqplot(insumstats,
674
677
  hue = 'chr_hue'
675
678
  hue_norm=None
676
679
  to_plot = sumstats
680
+ log.write(" -Creating background plot...",verbose=verbose)
677
681
  plot = sns.scatterplot(data=to_plot, x='i', y='scaled_P',
678
682
  hue=hue,
679
683
  palette= palette,
@@ -693,17 +697,17 @@ def mqqplot(insumstats,
693
697
  for i, pinpoint_set in enumerate(pinpoint):
694
698
  if sum(sumstats[snpid].isin(pinpoint_set))>0:
695
699
  to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint_set),:]
696
- if verbose: log.write(" -Pinpointing set {} target vairants...".format(i+1))
700
+ log.write(" -Pinpointing set {} target vairants...".format(i+1),verbose=verbose)
697
701
  ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color[i%len(pinpoint_color)],zorder=100,s=marker_size[1]+1)
698
702
  else:
699
- if verbose: log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...")
703
+ log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
700
704
  else:
701
705
  if sum(sumstats[snpid].isin(pinpoint))>0:
702
706
  to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint),:]
703
- if verbose: log.write(" -Pinpointing target vairants...")
707
+ log.write(" -Pinpointing target vairants...",verbose=verbose)
704
708
  ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color,zorder=100,s=marker_size[1]+1)
705
709
  else:
706
- if verbose: log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...")
710
+ log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
707
711
 
708
712
 
709
713
 
@@ -767,12 +771,15 @@ def mqqplot(insumstats,
767
771
  lead_snp_i= None
768
772
  lead_snp_i2=None
769
773
 
774
+ log.write("Finished creating MQQ plot successfully!",verbose=verbose)
775
+
770
776
  # Get top variants for annotation #######################################################
777
+ log.write("Start to extract variants for annotation...",verbose=verbose)
771
778
  if (anno and anno!=True) or (len(anno_set)>0):
772
779
  if len(anno_set)>0:
773
780
  to_annotate=sumstats.loc[sumstats[snpid].isin(anno_set),:]
774
781
  if to_annotate.empty is not True:
775
- if verbose: log.write(" -Found "+str(len(to_annotate))+" specified variants to annotate...")
782
+ log.write(" -Found "+str(len(to_annotate))+" specified variants to annotate...",verbose=verbose)
776
783
  else:
777
784
  to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
778
785
  snpid,
@@ -785,7 +792,7 @@ def mqqplot(insumstats,
785
792
  mlog10p="scaled_P",
786
793
  verbose=False)
787
794
  if (to_annotate.empty is not True) and ("b" not in mode):
788
- if verbose: log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...")
795
+ log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
789
796
  else:
790
797
  to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
791
798
  "i",
@@ -798,7 +805,7 @@ def mqqplot(insumstats,
798
805
  mlog10p="scaled_P",
799
806
  sig_level=sig_level_lead)
800
807
  if (to_annotate.empty is not True) and ("b" not in mode):
801
- if verbose: log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...")
808
+ log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
802
809
  if (to_annotate.empty is not True) and anno=="GENENAME":
803
810
  to_annotate = annogene(to_annotate,
804
811
  id=snpid,
@@ -808,16 +815,21 @@ def mqqplot(insumstats,
808
815
  build=build,
809
816
  source=anno_source,
810
817
  verbose=verbose).rename(columns={"GENE":"Annotation"})
818
+ log.write("Finished extracting variants for annotation...",verbose=verbose)
811
819
 
812
820
  # Configure X, Y axes #######################################################
821
+ log.write("Start to process figure arts.",verbose=verbose)
813
822
  if region is None:
814
823
  # if Manhattan plot
824
+
815
825
  ax1 = _process_xtick(ax1=ax1,
816
826
  chrom_df=chrom_df,
817
827
  xtick_chr_dict=xtick_chr_dict,
818
828
  fontsize = fontsize,
819
- font_family=font_family)
820
-
829
+ font_family=font_family,
830
+ log=log,
831
+ verbose=verbose)
832
+
821
833
  ax1, ax3 = _process_xlabel(region=region,
822
834
  xlabel=xlabel,
823
835
  ax1=ax1,
@@ -825,7 +837,9 @@ def mqqplot(insumstats,
825
837
  mode=mode,
826
838
  fontsize=fontsize,
827
839
  font_family=font_family,
828
- ax3=ax3 )
840
+ ax3=ax3,
841
+ log=log,
842
+ verbose=verbose)
829
843
 
830
844
  ax1, ax4 = _process_ylabel(ylabel=ylabel,
831
845
  ax1=ax1,
@@ -833,8 +847,11 @@ def mqqplot(insumstats,
833
847
  bwindowsizekb=bwindowsizekb,
834
848
  fontsize=fontsize,
835
849
  font_family=font_family,
836
- ax4=ax4)
850
+ ax4=ax4,
851
+ log=log,
852
+ verbose=verbose)
837
853
 
854
+
838
855
  ax1 = _set_yticklabels(cut=cut,
839
856
  cutfactor=cutfactor,
840
857
  cut_log=cut_log,
@@ -849,19 +866,28 @@ def mqqplot(insumstats,
849
866
  font_family=font_family,
850
867
  ytick3=ytick3,
851
868
  ylabels=ylabels,
852
- ylabels_converted=ylabels_converted
853
- )
869
+ ylabels_converted=ylabels_converted,
870
+ log=log,
871
+ verbose=verbose)
854
872
 
855
873
  ax1, ax4 = _process_ytick(ax1=ax1,
856
874
  fontsize=fontsize,
857
875
  font_family=font_family,
858
- ax4=ax4)
876
+ ax4=ax4,
877
+ log=log,
878
+ verbose=verbose)
859
879
 
860
- if cbar is not None:
861
- # regional plot cbar
862
- cbar = _process_cbar(cbar, cbar_fontsize=fontsize, cbar_font_family=font_family, cbar_title=cbar_title)
880
+ # regional plot cbar
881
+ if cbar is not None:
882
+ cbar = _process_cbar(cbar,
883
+ cbar_fontsize=fontsize,
884
+ cbar_font_family=font_family,
885
+ cbar_title=cbar_title,
886
+ log=log,
887
+ verbose=verbose)
863
888
 
864
889
  ax1 = _process_spine(ax1, mode)
890
+
865
891
  # genomewide significant line
866
892
  ax1 = _process_line(ax1,
867
893
  sig_line,
@@ -874,7 +900,9 @@ def mqqplot(insumstats,
874
900
  additional_line_color,
875
901
  mode,
876
902
  bmean,
877
- bmedian )
903
+ bmedian,
904
+ log=log,
905
+ verbose=verbose )
878
906
 
879
907
 
880
908
  if mtitle and anno and len(to_annotate)>0:
@@ -882,8 +910,10 @@ def mqqplot(insumstats,
882
910
  ax1.set_title(mtitle,pad=pad,fontsize=title_fontsize,family=font_family)
883
911
  elif mtitle:
884
912
  ax1.set_title(mtitle,fontsize=title_fontsize,family=font_family)
885
-
913
+ log.write("Finished processing figure arts.",verbose=verbose)
914
+
886
915
  # Add annotation arrows and texts
916
+ log.write("Start to annotate variants...",verbose=verbose)
887
917
  ax1 = annotate_single(
888
918
  sumstats=sumstats,
889
919
  anno=anno,
@@ -917,6 +947,7 @@ def mqqplot(insumstats,
917
947
  log=log,
918
948
  _invert=_invert
919
949
  )
950
+ log.write("Finished annotating variants.",verbose=verbose)
920
951
  # Manhatann-like plot Finished #####################################################################
921
952
 
922
953
  # QQ plot #########################################################################################################
@@ -961,9 +992,9 @@ def mqqplot(insumstats,
961
992
 
962
993
  # Y axis jagged
963
994
  if jagged==True:
964
- ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
995
+ ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
965
996
  if "qq" in mode:
966
- ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
997
+ ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
967
998
 
968
999
  # XY lim
969
1000
  if ylim is not None:
@@ -989,7 +1020,7 @@ def mqqplot(insumstats,
989
1020
  if _get_region_lead==True:
990
1021
  return fig, log, lead_snp_i, lead_snp_i2
991
1022
 
992
- if verbose: log.write("Finished creating MQQ plot successfully!")
1023
+ log.write("Finished creating plot successfully!",verbose=verbose)
993
1024
  return fig, log
994
1025
 
995
1026
  ##############################################################################################################################################################################
@@ -1084,22 +1115,22 @@ def _sanity_check(sumstats, mode, chrom, pos, stratified, _if_quick_qc, log, ver
1084
1115
  #sanity check : drop variants with na values in chr and pos df
1085
1116
  sumstats = sumstats.dropna(subset=[chrom,pos])
1086
1117
  after_number=len(sumstats)
1087
- if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in CHR or POS column ...")
1118
+ log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in CHR or POS column ...",verbose=verbose)
1088
1119
  out_of_range_chr = sumstats[chrom]<=0
1089
- if verbose:log.write(" -Removed {} variants with CHR <=0...".format(sum(out_of_range_chr)))
1120
+ log.write(" -Removed {} variants with CHR <=0...".format(sum(out_of_range_chr)),verbose=verbose)
1090
1121
  sumstats = sumstats.loc[~out_of_range_chr,:]
1091
1122
 
1092
1123
  if stratified is True and _if_quick_qc:
1093
1124
  pre_number=len(sumstats)
1094
1125
  sumstats = sumstats.dropna(subset=["MAF"])
1095
1126
  after_number=len(sumstats)
1096
- if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in EAF column ...")
1127
+ log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in EAF column ...",verbose=verbose)
1097
1128
 
1098
1129
  if "b" not in mode and _if_quick_qc:
1099
1130
  pre_number=len(sumstats)
1100
1131
  sumstats = sumstats.dropna(subset=["raw_P"])
1101
1132
  after_number=len(sumstats)
1102
- if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in P column ...")
1133
+ log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in P column ...",verbose=verbose)
1103
1134
  return sumstats
1104
1135
 
1105
1136
  def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
@@ -1108,7 +1139,7 @@ def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
1108
1139
  sumstats["scaled_P"] = sumstats["DENSITY"].copy()
1109
1140
  sumstats["raw_P"] = -np.log10(sumstats["DENSITY"].copy()+2)
1110
1141
  elif scaled is True:
1111
- if verbose:log.write(" -P values are already converted to -log10(P)!")
1142
+ log.write(" -P values are already converted to -log10(P)!",verbose=verbose)
1112
1143
  sumstats["scaled_P"] = sumstats["raw_P"].copy()
1113
1144
  sumstats["raw_P"] = np.power(10,-sumstats["scaled_P"].astype("float64"))
1114
1145
  else:
@@ -1156,7 +1187,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
1156
1187
  sumstats.loc[right_chr&up_pos&low_pos,"HUE"]=0
1157
1188
  else:
1158
1189
  # highlight for one set
1159
- # to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
1190
+ to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
1160
1191
  #assign colors: 0 is hightlight color
1161
1192
  for index,row in to_highlight.iterrows():
1162
1193
  target_chr = int(row[chrom])
@@ -1169,7 +1200,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
1169
1200
 
1170
1201
  def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
1171
1202
  if "b" in mode and "DENSITY" not in sumstats.columns:
1172
- if verbose:log.write(" -Calculating DENSITY with windowsize of ",bwindowsizekb ," kb")
1203
+ log.write(" -Calculating DENSITY with windowsize of ",bwindowsizekb ," kb",verbose=verbose)
1173
1204
  large_number = _get_largenumber(sumstats[pos].max(),log=log)
1174
1205
 
1175
1206
  stack=[]
@@ -1190,11 +1221,12 @@ def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
1190
1221
  elif "b" in mode and "DENSITY" in sumstats.columns:
1191
1222
  bmean=sumstats["DENSITY"].mean()
1192
1223
  bmedian=sumstats["DENSITY"].median()
1193
- if verbose:log.write(" -DENSITY column exists. Skipping calculation...")
1224
+ log.write(" -DENSITY column exists. Skipping calculation...",verbose=verbose)
1194
1225
  return sumstats, bmean, bmedian
1195
1226
 
1196
- def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian ):
1227
+ def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian , log=Log(),verbose=True):
1197
1228
  # genomewide significant line
1229
+ log.write(" -Processing lines...",verbose=verbose)
1198
1230
  if sig_line is True:
1199
1231
  sigline = ax1.axhline(y=lines_to_plot[0],
1200
1232
  linewidth = sc_linewidth,
@@ -1220,8 +1252,9 @@ def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_
1220
1252
  medianline = ax1.axhline(y=bmedian, linewidth = sc_linewidth,linestyle="--",color=sig_line_color,zorder=1000)
1221
1253
  return ax1
1222
1254
 
1223
- def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title):
1224
- if str(type(cbar))=="list":
1255
+ def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title, log=Log(),verbose=True):
1256
+ log.write(" -Processing color bar...",verbose=verbose)
1257
+ if type(cbar) == list:
1225
1258
  for cbar_single in cbar:
1226
1259
  cbar_yticklabels = cbar_single.ax.get_yticklabels()
1227
1260
  cbar_single.ax.set_yticklabels(cbar_yticklabels, fontsize=cbar_fontsize, family=cbar_font_family )
@@ -1232,12 +1265,14 @@ def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title):
1232
1265
  cbar.ax.set_title(cbar_title, fontsize=cbar_fontsize, family=cbar_font_family, loc="center",y=-0.2 )
1233
1266
  return cbar
1234
1267
 
1235
- def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family):
1268
+ def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log(),verbose=True):
1269
+ log.write(" -Processing X ticks...",verbose=verbose)
1236
1270
  ax1.set_xticks(chrom_df.astype("float64"))
1237
1271
  ax1.set_xticklabels(chrom_df.index.astype("Int64").map(xtick_chr_dict),fontsize=fontsize,family=font_family)
1238
1272
  return ax1
1239
1273
 
1240
- def _process_ytick(ax1, fontsize, font_family, ax4):
1274
+ def _process_ytick(ax1, fontsize, font_family, ax4, log=Log(),verbose=True):
1275
+ log.write(" -Processing Y labels...",verbose=verbose)
1241
1276
  ax1_yticklabels = ax1.get_yticklabels()
1242
1277
  #ax1.set_yticklabels(ax1_yticklabels,fontsize=fontsize,family=font_family)
1243
1278
  ax1_yticks = ax1.get_yticks()
@@ -1248,7 +1283,8 @@ def _process_ytick(ax1, fontsize, font_family, ax4):
1248
1283
  ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
1249
1284
  return ax1, ax4
1250
1285
 
1251
- def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None ):
1286
+ def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None , log=Log(),verbose=True):
1287
+ log.write(" -Processing X labels...",verbose=verbose)
1252
1288
  if region is not None:
1253
1289
  if xlabel is None:
1254
1290
  xlabel = "Chromosome "+str(region[0])+" (MB)"
@@ -1262,7 +1298,8 @@ def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family,
1262
1298
  ax1.set_xlabel(xlabel,fontsize=fontsize,family=font_family)
1263
1299
  return ax1, ax3
1264
1300
 
1265
- def _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family, ax4=None):
1301
+ def _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family, ax4=None, log=Log(),verbose=True):
1302
+ log.write(" -Processing Y labels...",verbose=verbose)
1266
1303
  if "b" in mode:
1267
1304
  if ylabel is None:
1268
1305
  ylabel ="Density of GWAS \n SNPs within "+str(bwindowsizekb)+" kb"
@@ -1336,4 +1373,4 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
1336
1373
  raise ValueError("Please select one from the 5 modes: mqq/qqm/m/qq/r/b")
1337
1374
  ax4=None
1338
1375
  cbar=None
1339
- return fig, ax1, ax2, ax3, ax4, cbar
1376
+ return fig, ax1, ax2, ax3, ax4, cbar
@@ -45,7 +45,7 @@ def _plot_qq(
45
45
 
46
46
  # QQ plot #########################################################################################################
47
47
  # ax2 qqplot
48
- if verbose:log.write("Start to create QQ plot with "+str(len(sumstats))+" variants:")
48
+ log.write("Start to create QQ plot with "+str(len(sumstats))+" variants:",verbose=verbose )
49
49
 
50
50
  # plotting qq plots using processed data after cut and skip
51
51
 
@@ -59,6 +59,7 @@ def _plot_qq(
59
59
  upper_bound_p = np.power(10.0, -expected_min_mlog10p)
60
60
 
61
61
  if stratified is False:
62
+ log.write(" -Plotting all variants...",verbose=verbose)
62
63
  # sort x,y for qq plot
63
64
  # high to low
64
65
  observed = p_toplot.sort_values(ascending=False)
@@ -68,13 +69,13 @@ def _plot_qq(
68
69
 
69
70
  expected_all = -np.log10(np.linspace(minit,upper_bound_p,len(p_toplot_raw)))[:len(observed)]
70
71
 
71
- if verbose:log.write("Expected range of P: (0,{})".format(upper_bound_p))
72
+ log.write(" -Expected range of P: (0,{})".format(upper_bound_p),verbose=verbose)
72
73
  #p_toplot = sumstats["scaled_P"]
73
74
  ax2.scatter(expected_all,observed,s=marker_size[1],color=colors[0],**qq_scatter_args)
74
75
 
75
76
  else:
76
77
  # stratified qq plot
77
-
78
+ log.write(" -Plotting variants stratified by MAF...",verbose=verbose)
78
79
  observed = p_toplot.sort_values(ascending=False)
79
80
  expected_all = -np.log10(np.linspace(minit,upper_bound_p,len(p_toplot_raw)))[:len(observed)]
80
81
 
@@ -115,15 +116,15 @@ def _plot_qq(
115
116
 
116
117
  if expected_min_mlog10p!=0:
117
118
  level = 1 - np.power(10.0,-np.nanmedian(expected_all))
118
- if verbose: log.write(" -Level for calculating lambda GC : {}".format(1 - level))
119
+ log.write(" -Level for calculating lambda GC : {}".format(1 - level),verbose=verbose)
119
120
 
120
- if verbose and not include_chrXYMT : log.write(" -Excluding chrX,Y, MT from calculation of lambda GC.")
121
+ if verbose and not include_chrXYMT : log.write(" -Excluding chrX,Y, MT from calculation of lambda GC.",verbose=verbose)
121
122
  lambdagc = lambdaGC(p_toplot_raw,
122
123
  mode="MLOG10P",
123
124
  level=level,
124
125
  include_chrXYMT=include_chrXYMT,
125
126
  log=log,
126
- verbose=True)
127
+ verbose=verbose)
127
128
 
128
129
  # annotate lambda gc to qq plot
129
130
  ax2.text(0.10, 1.03,"$\\lambda_{GC}$ = "+"{:.4f}".format(lambdagc),
@@ -147,7 +148,9 @@ def _plot_qq(
147
148
  font_family=font_family,
148
149
  ylabels=ylabels,
149
150
  ytick3=ytick3,
150
- ylabels_converted=ylabels_converted
151
+ ylabels_converted=ylabels_converted,
152
+ log=log,
153
+ verbose=verbose
151
154
  )
152
155
 
153
156
  #if cut == 0:
@@ -181,7 +184,7 @@ def _plot_qq(
181
184
  if qtitle:
182
185
  ax2.set_title(qtitle,fontsize=title_fontsize,pad=10,family=font_family)
183
186
 
184
- if verbose: log.write("Finished creating QQ plot successfully!")
187
+ log.write("Finished creating QQ plot successfully!",verbose=verbose)
185
188
 
186
189
  # Creating QQ plot Finished #############################################################################################
187
190
  return ax2
@@ -122,6 +122,8 @@ def _plot_regional(
122
122
  region_ld_colors=region_ld_colors2,
123
123
  position=2)
124
124
  cbar = [cbar1, cbar2]
125
+ else:
126
+ cbar=None
125
127
  if region_title is not None:
126
128
  ax1 = _add_region_title(region_title, ax1=ax1,region_title_args=region_title_args )
127
129
  ## recombinnation rate ##################################################
@@ -540,7 +542,7 @@ def process_vcf(sumstats, vcf_path, region,region_ref, region_ref_second, log, v
540
542
  # no position match
541
543
  return None
542
544
  if verbose: log.write(" -Matching variants using POS, NEA, EA ...")
543
- sumstats["REFINDEX"] = sumstats.loc[:,[pos,nea,ea]].apply(lambda x: match_varaint(x),axis=1)
545
+ sumstats["REFINDEX"] = sumstats[[pos,nea,ea]].apply(lambda x: match_varaint(x),axis=1)
544
546
  #############################################################################################
545
547
  #sumstats["REFINDEX"] = sumstats[pos].apply(lambda x: np.where(ref_genotype["variants/POS"] == x )[0][0] if np.any(ref_genotype["variants/POS"] == x) else None)
546
548