gwaslab 3.4.37__py3-none-any.whl → 3.4.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (57) hide show
  1. gwaslab/bd_common_data.py +6 -3
  2. gwaslab/bd_download.py +9 -9
  3. gwaslab/bd_get_hapmap3.py +43 -9
  4. gwaslab/data/formatbook.json +722 -721
  5. gwaslab/g_Log.py +22 -5
  6. gwaslab/g_Sumstats.py +110 -163
  7. gwaslab/g_SumstatsPair.py +76 -25
  8. gwaslab/g_SumstatsT.py +2 -2
  9. gwaslab/g_Sumstats_summary.py +3 -3
  10. gwaslab/g_version.py +10 -10
  11. gwaslab/hm_casting.py +36 -17
  12. gwaslab/hm_harmonize_sumstats.py +354 -221
  13. gwaslab/hm_rsid_to_chrpos.py +1 -1
  14. gwaslab/io_preformat_input.py +49 -43
  15. gwaslab/io_read_ldsc.py +49 -1
  16. gwaslab/io_to_formats.py +428 -295
  17. gwaslab/ldsc_irwls.py +198 -0
  18. gwaslab/ldsc_jackknife.py +514 -0
  19. gwaslab/ldsc_ldscore.py +417 -0
  20. gwaslab/ldsc_parse.py +294 -0
  21. gwaslab/ldsc_regressions.py +747 -0
  22. gwaslab/ldsc_sumstats.py +629 -0
  23. gwaslab/qc_check_datatype.py +3 -3
  24. gwaslab/qc_fix_sumstats.py +891 -778
  25. gwaslab/util_ex_calculate_ldmatrix.py +31 -13
  26. gwaslab/util_ex_gwascatalog.py +25 -25
  27. gwaslab/util_ex_ldproxyfinder.py +10 -10
  28. gwaslab/util_ex_ldsc.py +189 -0
  29. gwaslab/util_ex_process_ref.py +3 -3
  30. gwaslab/util_ex_run_coloc.py +26 -4
  31. gwaslab/util_in_calculate_gc.py +6 -6
  32. gwaslab/util_in_calculate_power.py +42 -43
  33. gwaslab/util_in_convert_h2.py +8 -8
  34. gwaslab/util_in_fill_data.py +30 -30
  35. gwaslab/util_in_filter_value.py +201 -74
  36. gwaslab/util_in_get_density.py +10 -10
  37. gwaslab/util_in_get_sig.py +445 -71
  38. gwaslab/viz_aux_annotate_plot.py +12 -12
  39. gwaslab/viz_aux_quickfix.py +42 -37
  40. gwaslab/viz_aux_reposition_text.py +10 -7
  41. gwaslab/viz_aux_save_figure.py +18 -8
  42. gwaslab/viz_plot_compare_af.py +32 -33
  43. gwaslab/viz_plot_compare_effect.py +63 -71
  44. gwaslab/viz_plot_miamiplot2.py +34 -26
  45. gwaslab/viz_plot_mqqplot.py +126 -75
  46. gwaslab/viz_plot_qqplot.py +11 -8
  47. gwaslab/viz_plot_regionalplot.py +36 -33
  48. gwaslab/viz_plot_rg_heatmap.py +28 -26
  49. gwaslab/viz_plot_stackedregional.py +40 -21
  50. gwaslab/viz_plot_trumpetplot.py +65 -61
  51. gwaslab-3.4.39.dist-info/LICENSE +674 -0
  52. {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/METADATA +5 -4
  53. gwaslab-3.4.39.dist-info/RECORD +80 -0
  54. gwaslab-3.4.37.dist-info/RECORD +0 -72
  55. /gwaslab-3.4.37.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
  56. {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
  57. {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0
@@ -174,6 +174,8 @@ def mqqplot(insumstats,
174
174
  include_chrXYMT = True,
175
175
  ylim=None,
176
176
  xpad=None,
177
+ xpadl=None,
178
+ xpadr=None,
177
179
  chrpad=0.03,
178
180
  drop_chr_start=False,
179
181
  title =None,
@@ -213,7 +215,6 @@ def mqqplot(insumstats,
213
215
  chr_dict = get_chr_to_number()
214
216
  if xtick_chr_dict is None:
215
217
  xtick_chr_dict = get_number_to_chr()
216
-
217
218
  if gtf_chr_dict is None:
218
219
  gtf_chr_dict = get_number_to_chr()
219
220
  if rr_chr_dict is None:
@@ -304,40 +305,42 @@ def mqqplot(insumstats,
304
305
  scatter_args["rasterized"]=True
305
306
  qq_scatter_args["rasterized"]=True
306
307
 
307
- if verbose: log.write("Start to create MQQ plot with the following basic settings {}:".format(_get_version()))
308
- if verbose: log.write(" -Genomic coordinates version: {}...".format(build))
308
+ log.write("Start to create MQQ plot...{}:".format(_get_version()),verbose=verbose)
309
+ log.write(" -Genomic coordinates version: {}...".format(build),verbose=verbose)
309
310
  if build is None or build=="99":
310
- if verbose: log.write(" -WARNING: Genomic coordinates version is unknown...")
311
- if verbose: log.write(" -Genome-wide significance level to plot is set to "+str(sig_level_plot)+" ...")
312
- if verbose: log.write(" -Raw input contains "+str(len(insumstats))+" variants...")
313
- if verbose: log.write(" -MQQ plot layout mode is : "+mode)
311
+ log.warning("Genomic coordinates version is unknown.")
312
+ log.write(" -Genome-wide significance level to plot is set to "+str(sig_level_plot)+" ...",verbose=verbose)
313
+ log.write(" -Raw input contains "+str(len(insumstats))+" variants...",verbose=verbose)
314
+ log.write(" -MQQ plot layout mode is : "+mode,verbose=verbose)
315
+
314
316
  if len(anno_set)>0 and ("m" in mode):
315
- if verbose: log.write(" -Variants to annotate : "+",".join(anno_set))
317
+ log.write(" -Variants to annotate : "+",".join(anno_set),verbose=verbose)
318
+
316
319
  if len(highlight)>0 and ("m" in mode):
317
320
  if pd.api.types.is_list_like(highlight[0]):
318
321
  if highlight_chrpos==False:
319
- if len(highlight[0]) == len(highlight_color):
320
- log.write(" -WARNING: number of locus list does not match number of colors !!!")
322
+ if len(highlight) != len(highlight_color):
323
+ log.warning("Number of locus groups in the list does not match number of provided colors.")
321
324
  for i, highlight_set in enumerate(highlight):
322
- if verbose: log.write(" -Set {} loci to highlight ({}) : ".format(i+1, highlight_color[i%len(highlight_color)])+",".join(highlight_set))
325
+ log.write(" -Set {} loci to highlight ({}) : ".format(i+1, highlight_color[i%len(highlight_color)])+",".join(highlight_set),verbose=verbose)
323
326
  else:
324
- if verbose: log.write(" -Loci to highlight ({}): {}".format(highlight_color,highlight))
325
- if verbose: log.write(" -Highlight_window is set to: ", highlight_windowkb, " kb")
327
+ log.write(" -Loci to highlight ({}): {}".format(highlight_color,highlight),verbose=verbose)
328
+ log.write(" -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
326
329
  else:
327
- if verbose: log.write(" -Loci to highlight ({}): ".format(highlight_color)+",".join(highlight))
328
- if verbose: log.write(" -Highlight_window is set to: ", highlight_windowkb, " kb")
330
+ log.write(" -Loci to highlight ({}): ".format(highlight_color)+",".join(highlight),verbose=verbose)
331
+ log.write(" -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
329
332
 
330
333
  if len(pinpoint)>0 :
331
334
  if pd.api.types.is_list_like(pinpoint[0]):
332
- if len(pinpoint[0]) == len(pinpoint_color):
333
- log.write(" -WARNING: number of variant list does not match number of colors !!!")
335
+ if len(pinpoint) != len(pinpoint_color):
336
+ log.warning("Number of variant groups in the list does not match number of provided colors.")
334
337
  for i, pinpoint_set in enumerate(pinpoint):
335
- if verbose: log.write(" -Set {} variants to pinpoint ({}) : ".format(i+1,pinpoint_color[i%len(pinpoint_color)])+",".join(pinpoint_set))
338
+ log.write(" -Set {} variants to pinpoint ({}) : ".format(i+1,pinpoint_color[i%len(pinpoint_color)])+",".join(pinpoint_set),verbose=verbose)
336
339
  else:
337
- if verbose: log.write(" -Variants to pinpoint ({}) : ".format(pinpoint_color)+",".join(pinpoint))
340
+ log.write(" -Variants to pinpoint ({}) : ".format(pinpoint_color)+",".join(pinpoint),verbose=verbose)
338
341
 
339
342
  if region is not None:
340
- if verbose: log.write(" -Region to plot : chr"+str(region[0])+":"+str(region[1])+"-"+str(region[2])+".")
343
+ log.write(" -Region to plot : chr"+str(region[0])+":"+str(region[1])+"-"+str(region[2])+".",verbose=verbose)
341
344
 
342
345
  # construct line series for coversion
343
346
  if additional_line is None:
@@ -399,7 +402,7 @@ def mqqplot(insumstats,
399
402
  pinpoint=pinpoint,
400
403
  density_color=density_color)
401
404
 
402
- sumstats = insumstats.loc[:,usecols].copy()
405
+ sumstats = insumstats[usecols].copy()
403
406
 
404
407
  #################################################################################################
405
408
 
@@ -408,7 +411,7 @@ def mqqplot(insumstats,
408
411
  if (anno == "GENENAME"):
409
412
  anno_sig=True
410
413
  elif (anno is not None) and (anno is not True):
411
- sumstats["Annotation"]=sumstats.loc[:,anno].astype("string")
414
+ sumstats["Annotation"]=sumstats[anno].astype("string")
412
415
 
413
416
  ## P value
414
417
  ## m, qq, r
@@ -432,15 +435,15 @@ def mqqplot(insumstats,
432
435
  region_start = region[1]
433
436
  region_end = region[2]
434
437
  marker_size=(25,45)
435
- if verbose:log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]))
438
+ log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]),verbose=verbose)
436
439
 
437
- in_region_snp = (sumstats[chrom]==region_chr) &(sumstats[pos]<region_end) &(sumstats[pos]>region_start)
440
+ in_region_snp = (sumstats[chrom]==region_chr) & (sumstats[pos]<region_end) & (sumstats[pos]>region_start)
438
441
 
439
- if verbose:log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)))
442
+ log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)),verbose=verbose)
440
443
  sumstats = sumstats.loc[in_region_snp,:]
441
444
 
442
445
  if len(sumstats)==0:
443
- log.write(" -Warning : No valid data! Please check the input.")
446
+ log.warning("No valid data! Please check the input.")
444
447
  return None
445
448
 
446
449
  ## EAF
@@ -454,11 +457,11 @@ def mqqplot(insumstats,
454
457
  sumstats["HUE"] = pd.NA
455
458
  sumstats["HUE"] = sumstats["HUE"].astype("Int64")
456
459
 
457
- if verbose: log.write("Finished loading specified columns from the sumstats.")
460
+ log.write("Finished loading specified columns from the sumstats.",verbose=verbose)
458
461
 
459
462
 
460
463
  #sanity check############################################################################################################
461
- log.write("Start conversion and sanity check:",verbose=verbose)
464
+ log.write("Start data conversion and sanity check:",verbose=verbose)
462
465
 
463
466
  if _if_quick_qc == False:
464
467
  log.write(" -Sanity check will be skipped.", verbose=verbose)
@@ -527,15 +530,19 @@ def mqqplot(insumstats,
527
530
  lines_to_plot=lines_to_plot,
528
531
  log = log)
529
532
  except:
530
- log.write(" -Warning : No valid data! Please check the input.")
533
+ log.warning("No valid data! Please check the input.")
531
534
  return None
532
535
 
536
+ log.write("Finished data conversion and sanity check.",verbose=verbose)
537
+
533
538
  # Manhattan plot ##########################################################################################################
539
+ log.write("Start to create MQQ plot with "+str(len(sumstats))+" variants...",verbose=verbose)
534
540
  ## regional plot ->rsq
535
541
  #calculate rsq]
536
542
  if vcf_path is not None:
537
543
  if tabix is None:
538
544
  tabix = which("tabix")
545
+ log.write(" -tabix will be used: {}".format(tabix),verbose=verbose)
539
546
  sumstats = process_vcf(sumstats=sumstats,
540
547
  vcf_path=vcf_path,
541
548
  region=region,
@@ -568,8 +575,6 @@ def mqqplot(insumstats,
568
575
 
569
576
  if vcf_path is not None:
570
577
  sumstats["chr_hue"]=sumstats["LD"]
571
-
572
- if verbose:log.write("Start to create MQQ plot with "+str(len(sumstats))+" variants:")
573
578
  ## default seetings
574
579
 
575
580
  palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
@@ -601,6 +606,7 @@ def mqqplot(insumstats,
601
606
  ## if highlight
602
607
  highlight_i = pd.DataFrame()
603
608
  if len(highlight) >0:
609
+ log.write(" -Creating background plot...",verbose=verbose)
604
610
  plot = sns.scatterplot(data=sumstats, x='i', y='scaled_P',
605
611
  hue='chr_hue',
606
612
  palette=palette,
@@ -612,8 +618,7 @@ def mqqplot(insumstats,
612
618
  zorder=2,ax=ax1,edgecolor=edgecolor, **scatter_args)
613
619
  if pd.api.types.is_list_like(highlight[0]) and highlight_chrpos==False:
614
620
  for i, highlight_set in enumerate(highlight):
615
- if verbose: log.write(" -Highlighting set {} target loci...".format(i+1))
616
- print(sumstats["HUE"].dtype)
621
+ log.write(" -Highlighting set {} target loci...".format(i+1),verbose=verbose)
617
622
  sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==i], x='i', y='scaled_P',
618
623
  hue="HUE",
619
624
  palette={i:highlight_color[i%len(highlight_color)]},
@@ -625,7 +630,7 @@ def mqqplot(insumstats,
625
630
  zorder=3+i,ax=ax1,edgecolor=edgecolor,**scatter_args)
626
631
  highlight_i = sumstats.loc[~sumstats["HUE"].isna(),"i"].values
627
632
  else:
628
- if verbose: log.write(" -Highlighting target loci...")
633
+ log.write(" -Highlighting target loci...",verbose=verbose)
629
634
  sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==0], x='i', y='scaled_P',
630
635
  hue="HUE",
631
636
  palette={0:highlight_color},
@@ -674,6 +679,7 @@ def mqqplot(insumstats,
674
679
  hue = 'chr_hue'
675
680
  hue_norm=None
676
681
  to_plot = sumstats
682
+ log.write(" -Creating background plot...",verbose=verbose)
677
683
  plot = sns.scatterplot(data=to_plot, x='i', y='scaled_P',
678
684
  hue=hue,
679
685
  palette= palette,
@@ -693,17 +699,17 @@ def mqqplot(insumstats,
693
699
  for i, pinpoint_set in enumerate(pinpoint):
694
700
  if sum(sumstats[snpid].isin(pinpoint_set))>0:
695
701
  to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint_set),:]
696
- if verbose: log.write(" -Pinpointing set {} target vairants...".format(i+1))
702
+ log.write(" -Pinpointing set {} target vairants...".format(i+1),verbose=verbose)
697
703
  ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color[i%len(pinpoint_color)],zorder=100,s=marker_size[1]+1)
698
704
  else:
699
- if verbose: log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...")
705
+ log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
700
706
  else:
701
707
  if sum(sumstats[snpid].isin(pinpoint))>0:
702
708
  to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint),:]
703
- if verbose: log.write(" -Pinpointing target vairants...")
709
+ log.write(" -Pinpointing target vairants...",verbose=verbose)
704
710
  ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color,zorder=100,s=marker_size[1]+1)
705
711
  else:
706
- if verbose: log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...")
712
+ log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
707
713
 
708
714
 
709
715
 
@@ -767,12 +773,15 @@ def mqqplot(insumstats,
767
773
  lead_snp_i= None
768
774
  lead_snp_i2=None
769
775
 
776
+ log.write("Finished creating MQQ plot successfully!",verbose=verbose)
777
+
770
778
  # Get top variants for annotation #######################################################
779
+ log.write("Start to extract variants for annotation...",verbose=verbose)
771
780
  if (anno and anno!=True) or (len(anno_set)>0):
772
781
  if len(anno_set)>0:
773
782
  to_annotate=sumstats.loc[sumstats[snpid].isin(anno_set),:]
774
783
  if to_annotate.empty is not True:
775
- if verbose: log.write(" -Found "+str(len(to_annotate))+" specified variants to annotate...")
784
+ log.write(" -Found "+str(len(to_annotate))+" specified variants to annotate...",verbose=verbose)
776
785
  else:
777
786
  to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
778
787
  snpid,
@@ -785,7 +794,7 @@ def mqqplot(insumstats,
785
794
  mlog10p="scaled_P",
786
795
  verbose=False)
787
796
  if (to_annotate.empty is not True) and ("b" not in mode):
788
- if verbose: log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...")
797
+ log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
789
798
  else:
790
799
  to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
791
800
  "i",
@@ -798,7 +807,7 @@ def mqqplot(insumstats,
798
807
  mlog10p="scaled_P",
799
808
  sig_level=sig_level_lead)
800
809
  if (to_annotate.empty is not True) and ("b" not in mode):
801
- if verbose: log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...")
810
+ log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
802
811
  if (to_annotate.empty is not True) and anno=="GENENAME":
803
812
  to_annotate = annogene(to_annotate,
804
813
  id=snpid,
@@ -808,16 +817,21 @@ def mqqplot(insumstats,
808
817
  build=build,
809
818
  source=anno_source,
810
819
  verbose=verbose).rename(columns={"GENE":"Annotation"})
820
+ log.write("Finished extracting variants for annotation...",verbose=verbose)
811
821
 
812
822
  # Configure X, Y axes #######################################################
823
+ log.write("Start to process figure arts.",verbose=verbose)
813
824
  if region is None:
814
825
  # if Manhattan plot
826
+
815
827
  ax1 = _process_xtick(ax1=ax1,
816
828
  chrom_df=chrom_df,
817
829
  xtick_chr_dict=xtick_chr_dict,
818
830
  fontsize = fontsize,
819
- font_family=font_family)
820
-
831
+ font_family=font_family,
832
+ log=log,
833
+ verbose=verbose)
834
+
821
835
  ax1, ax3 = _process_xlabel(region=region,
822
836
  xlabel=xlabel,
823
837
  ax1=ax1,
@@ -825,7 +839,9 @@ def mqqplot(insumstats,
825
839
  mode=mode,
826
840
  fontsize=fontsize,
827
841
  font_family=font_family,
828
- ax3=ax3 )
842
+ ax3=ax3,
843
+ log=log,
844
+ verbose=verbose)
829
845
 
830
846
  ax1, ax4 = _process_ylabel(ylabel=ylabel,
831
847
  ax1=ax1,
@@ -833,8 +849,11 @@ def mqqplot(insumstats,
833
849
  bwindowsizekb=bwindowsizekb,
834
850
  fontsize=fontsize,
835
851
  font_family=font_family,
836
- ax4=ax4)
852
+ ax4=ax4,
853
+ log=log,
854
+ verbose=verbose)
837
855
 
856
+
838
857
  ax1 = _set_yticklabels(cut=cut,
839
858
  cutfactor=cutfactor,
840
859
  cut_log=cut_log,
@@ -849,19 +868,28 @@ def mqqplot(insumstats,
849
868
  font_family=font_family,
850
869
  ytick3=ytick3,
851
870
  ylabels=ylabels,
852
- ylabels_converted=ylabels_converted
853
- )
871
+ ylabels_converted=ylabels_converted,
872
+ log=log,
873
+ verbose=verbose)
854
874
 
855
875
  ax1, ax4 = _process_ytick(ax1=ax1,
856
876
  fontsize=fontsize,
857
877
  font_family=font_family,
858
- ax4=ax4)
878
+ ax4=ax4,
879
+ log=log,
880
+ verbose=verbose)
859
881
 
860
- if cbar is not None:
861
- # regional plot cbar
862
- cbar = _process_cbar(cbar, cbar_fontsize=fontsize, cbar_font_family=font_family, cbar_title=cbar_title)
882
+ # regional plot cbar
883
+ if cbar is not None:
884
+ cbar = _process_cbar(cbar,
885
+ cbar_fontsize=fontsize,
886
+ cbar_font_family=font_family,
887
+ cbar_title=cbar_title,
888
+ log=log,
889
+ verbose=verbose)
863
890
 
864
891
  ax1 = _process_spine(ax1, mode)
892
+
865
893
  # genomewide significant line
866
894
  ax1 = _process_line(ax1,
867
895
  sig_line,
@@ -874,7 +902,9 @@ def mqqplot(insumstats,
874
902
  additional_line_color,
875
903
  mode,
876
904
  bmean,
877
- bmedian )
905
+ bmedian,
906
+ log=log,
907
+ verbose=verbose )
878
908
 
879
909
 
880
910
  if mtitle and anno and len(to_annotate)>0:
@@ -882,8 +912,10 @@ def mqqplot(insumstats,
882
912
  ax1.set_title(mtitle,pad=pad,fontsize=title_fontsize,family=font_family)
883
913
  elif mtitle:
884
914
  ax1.set_title(mtitle,fontsize=title_fontsize,family=font_family)
885
-
915
+ log.write("Finished processing figure arts.",verbose=verbose)
916
+
886
917
  # Add annotation arrows and texts
918
+ log.write("Start to annotate variants...",verbose=verbose)
887
919
  ax1 = annotate_single(
888
920
  sumstats=sumstats,
889
921
  anno=anno,
@@ -917,6 +949,7 @@ def mqqplot(insumstats,
917
949
  log=log,
918
950
  _invert=_invert
919
951
  )
952
+ log.write("Finished annotating variants.",verbose=verbose)
920
953
  # Manhatann-like plot Finished #####################################################################
921
954
 
922
955
  # QQ plot #########################################################################################################
@@ -961,9 +994,9 @@ def mqqplot(insumstats,
961
994
 
962
995
  # Y axis jagged
963
996
  if jagged==True:
964
- ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
997
+ ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
965
998
  if "qq" in mode:
966
- ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
999
+ ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
967
1000
 
968
1001
  # XY lim
969
1002
  if ylim is not None:
@@ -971,8 +1004,7 @@ def mqqplot(insumstats,
971
1004
  if "qq" in mode:
972
1005
  ax2.set_ylim(ylim)
973
1006
 
974
- if xpad!=None:
975
- ax1.set_xlim([0 - xpad* sumstats["i"].max(),(1+xpad)*sumstats["i"].max()])
1007
+ ax1 = _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats)
976
1008
 
977
1009
  # Titles
978
1010
  if title and anno and len(to_annotate)>0:
@@ -989,7 +1021,7 @@ def mqqplot(insumstats,
989
1021
  if _get_region_lead==True:
990
1022
  return fig, log, lead_snp_i, lead_snp_i2
991
1023
 
992
- if verbose: log.write("Finished creating MQQ plot successfully!")
1024
+ log.write("Finished creating plot successfully!",verbose=verbose)
993
1025
  return fig, log
994
1026
 
995
1027
  ##############################################################################################################################################################################
@@ -997,8 +1029,21 @@ def mqqplot(insumstats,
997
1029
 
998
1030
 
999
1031
 
1032
+ def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats):
1033
+
1034
+ xmin, xmax = ax1.get_xlim()
1035
+
1036
+ if xpad is not None:
1037
+ pad = xpad* sumstats["i"].max()
1038
+ ax1.set_xlim([xmin - pad, xmin + pad])
1039
+ if xpadl is not None:
1040
+ pad = xpadl* sumstats["i"].max()
1041
+ ax1.set_xlim([xmin - pad,xmax])
1042
+ if xpadr is not None:
1043
+ pad = xpadr* sumstats["i"].max()
1044
+ ax1.set_xlim([xmin, xmax + pad])
1000
1045
 
1001
-
1046
+ return ax1
1002
1047
 
1003
1048
 
1004
1049
 
@@ -1084,22 +1129,22 @@ def _sanity_check(sumstats, mode, chrom, pos, stratified, _if_quick_qc, log, ver
1084
1129
  #sanity check : drop variants with na values in chr and pos df
1085
1130
  sumstats = sumstats.dropna(subset=[chrom,pos])
1086
1131
  after_number=len(sumstats)
1087
- if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in CHR or POS column ...")
1132
+ log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in CHR or POS column ...",verbose=verbose)
1088
1133
  out_of_range_chr = sumstats[chrom]<=0
1089
- if verbose:log.write(" -Removed {} variants with CHR <=0...".format(sum(out_of_range_chr)))
1134
+ log.write(" -Removed {} variants with CHR <=0...".format(sum(out_of_range_chr)),verbose=verbose)
1090
1135
  sumstats = sumstats.loc[~out_of_range_chr,:]
1091
1136
 
1092
1137
  if stratified is True and _if_quick_qc:
1093
1138
  pre_number=len(sumstats)
1094
1139
  sumstats = sumstats.dropna(subset=["MAF"])
1095
1140
  after_number=len(sumstats)
1096
- if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in EAF column ...")
1141
+ log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in EAF column ...",verbose=verbose)
1097
1142
 
1098
1143
  if "b" not in mode and _if_quick_qc:
1099
1144
  pre_number=len(sumstats)
1100
1145
  sumstats = sumstats.dropna(subset=["raw_P"])
1101
1146
  after_number=len(sumstats)
1102
- if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in P column ...")
1147
+ log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in P column ...",verbose=verbose)
1103
1148
  return sumstats
1104
1149
 
1105
1150
  def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
@@ -1108,7 +1153,7 @@ def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
1108
1153
  sumstats["scaled_P"] = sumstats["DENSITY"].copy()
1109
1154
  sumstats["raw_P"] = -np.log10(sumstats["DENSITY"].copy()+2)
1110
1155
  elif scaled is True:
1111
- if verbose:log.write(" -P values are already converted to -log10(P)!")
1156
+ log.write(" -P values are already converted to -log10(P)!",verbose=verbose)
1112
1157
  sumstats["scaled_P"] = sumstats["raw_P"].copy()
1113
1158
  sumstats["raw_P"] = np.power(10,-sumstats["scaled_P"].astype("float64"))
1114
1159
  else:
@@ -1156,7 +1201,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
1156
1201
  sumstats.loc[right_chr&up_pos&low_pos,"HUE"]=0
1157
1202
  else:
1158
1203
  # highlight for one set
1159
- # to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
1204
+ to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
1160
1205
  #assign colors: 0 is hightlight color
1161
1206
  for index,row in to_highlight.iterrows():
1162
1207
  target_chr = int(row[chrom])
@@ -1169,7 +1214,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
1169
1214
 
1170
1215
  def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
1171
1216
  if "b" in mode and "DENSITY" not in sumstats.columns:
1172
- if verbose:log.write(" -Calculating DENSITY with windowsize of ",bwindowsizekb ," kb")
1217
+ log.write(" -Calculating DENSITY with windowsize of ",bwindowsizekb ," kb",verbose=verbose)
1173
1218
  large_number = _get_largenumber(sumstats[pos].max(),log=log)
1174
1219
 
1175
1220
  stack=[]
@@ -1190,11 +1235,12 @@ def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
1190
1235
  elif "b" in mode and "DENSITY" in sumstats.columns:
1191
1236
  bmean=sumstats["DENSITY"].mean()
1192
1237
  bmedian=sumstats["DENSITY"].median()
1193
- if verbose:log.write(" -DENSITY column exists. Skipping calculation...")
1238
+ log.write(" -DENSITY column exists. Skipping calculation...",verbose=verbose)
1194
1239
  return sumstats, bmean, bmedian
1195
1240
 
1196
- def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian ):
1241
+ def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian , log=Log(),verbose=True):
1197
1242
  # genomewide significant line
1243
+ log.write(" -Processing lines...",verbose=verbose)
1198
1244
  if sig_line is True:
1199
1245
  sigline = ax1.axhline(y=lines_to_plot[0],
1200
1246
  linewidth = sc_linewidth,
@@ -1220,8 +1266,9 @@ def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_
1220
1266
  medianline = ax1.axhline(y=bmedian, linewidth = sc_linewidth,linestyle="--",color=sig_line_color,zorder=1000)
1221
1267
  return ax1
1222
1268
 
1223
- def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title):
1224
- if str(type(cbar))=="list":
1269
+ def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title, log=Log(),verbose=True):
1270
+ log.write(" -Processing color bar...",verbose=verbose)
1271
+ if type(cbar) == list:
1225
1272
  for cbar_single in cbar:
1226
1273
  cbar_yticklabels = cbar_single.ax.get_yticklabels()
1227
1274
  cbar_single.ax.set_yticklabels(cbar_yticklabels, fontsize=cbar_fontsize, family=cbar_font_family )
@@ -1232,12 +1279,14 @@ def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title):
1232
1279
  cbar.ax.set_title(cbar_title, fontsize=cbar_fontsize, family=cbar_font_family, loc="center",y=-0.2 )
1233
1280
  return cbar
1234
1281
 
1235
- def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family):
1282
+ def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log(),verbose=True):
1283
+ log.write(" -Processing X ticks...",verbose=verbose)
1236
1284
  ax1.set_xticks(chrom_df.astype("float64"))
1237
1285
  ax1.set_xticklabels(chrom_df.index.astype("Int64").map(xtick_chr_dict),fontsize=fontsize,family=font_family)
1238
1286
  return ax1
1239
1287
 
1240
- def _process_ytick(ax1, fontsize, font_family, ax4):
1288
+ def _process_ytick(ax1, fontsize, font_family, ax4, log=Log(),verbose=True):
1289
+ log.write(" -Processing Y labels...",verbose=verbose)
1241
1290
  ax1_yticklabels = ax1.get_yticklabels()
1242
1291
  #ax1.set_yticklabels(ax1_yticklabels,fontsize=fontsize,family=font_family)
1243
1292
  ax1_yticks = ax1.get_yticks()
@@ -1248,7 +1297,8 @@ def _process_ytick(ax1, fontsize, font_family, ax4):
1248
1297
  ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
1249
1298
  return ax1, ax4
1250
1299
 
1251
- def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None ):
1300
+ def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None , log=Log(),verbose=True):
1301
+ log.write(" -Processing X labels...",verbose=verbose)
1252
1302
  if region is not None:
1253
1303
  if xlabel is None:
1254
1304
  xlabel = "Chromosome "+str(region[0])+" (MB)"
@@ -1262,7 +1312,8 @@ def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family,
1262
1312
  ax1.set_xlabel(xlabel,fontsize=fontsize,family=font_family)
1263
1313
  return ax1, ax3
1264
1314
 
1265
- def _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family, ax4=None):
1315
+ def _process_ylabel(ylabel, ax1, mode, bwindowsizekb, fontsize, font_family, ax4=None, log=Log(),verbose=True):
1316
+ log.write(" -Processing Y labels...",verbose=verbose)
1266
1317
  if "b" in mode:
1267
1318
  if ylabel is None:
1268
1319
  ylabel ="Density of GWAS \n SNPs within "+str(bwindowsizekb)+" kb"
@@ -1336,4 +1387,4 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
1336
1387
  raise ValueError("Please select one from the 5 modes: mqq/qqm/m/qq/r/b")
1337
1388
  ax4=None
1338
1389
  cbar=None
1339
- return fig, ax1, ax2, ax3, ax4, cbar
1390
+ return fig, ax1, ax2, ax3, ax4, cbar
@@ -45,7 +45,7 @@ def _plot_qq(
45
45
 
46
46
  # QQ plot #########################################################################################################
47
47
  # ax2 qqplot
48
- if verbose:log.write("Start to create QQ plot with "+str(len(sumstats))+" variants:")
48
+ log.write("Start to create QQ plot with "+str(len(sumstats))+" variants:",verbose=verbose )
49
49
 
50
50
  # plotting qq plots using processed data after cut and skip
51
51
 
@@ -59,6 +59,7 @@ def _plot_qq(
59
59
  upper_bound_p = np.power(10.0, -expected_min_mlog10p)
60
60
 
61
61
  if stratified is False:
62
+ log.write(" -Plotting all variants...",verbose=verbose)
62
63
  # sort x,y for qq plot
63
64
  # high to low
64
65
  observed = p_toplot.sort_values(ascending=False)
@@ -68,13 +69,13 @@ def _plot_qq(
68
69
 
69
70
  expected_all = -np.log10(np.linspace(minit,upper_bound_p,len(p_toplot_raw)))[:len(observed)]
70
71
 
71
- if verbose:log.write("Expected range of P: (0,{})".format(upper_bound_p))
72
+ log.write(" -Expected range of P: (0,{})".format(upper_bound_p),verbose=verbose)
72
73
  #p_toplot = sumstats["scaled_P"]
73
74
  ax2.scatter(expected_all,observed,s=marker_size[1],color=colors[0],**qq_scatter_args)
74
75
 
75
76
  else:
76
77
  # stratified qq plot
77
-
78
+ log.write(" -Plotting variants stratified by MAF...",verbose=verbose)
78
79
  observed = p_toplot.sort_values(ascending=False)
79
80
  expected_all = -np.log10(np.linspace(minit,upper_bound_p,len(p_toplot_raw)))[:len(observed)]
80
81
 
@@ -115,15 +116,15 @@ def _plot_qq(
115
116
 
116
117
  if expected_min_mlog10p!=0:
117
118
  level = 1 - np.power(10.0,-np.nanmedian(expected_all))
118
- if verbose: log.write(" -Level for calculating lambda GC : {}".format(1 - level))
119
+ log.write(" -Level for calculating lambda GC : {}".format(1 - level),verbose=verbose)
119
120
 
120
- if verbose and not include_chrXYMT : log.write(" -Excluding chrX,Y, MT from calculation of lambda GC.")
121
+ if not include_chrXYMT : log.write(" -Excluding chrX,Y, MT from calculation of lambda GC.",verbose=verbose)
121
122
  lambdagc = lambdaGC(p_toplot_raw,
122
123
  mode="MLOG10P",
123
124
  level=level,
124
125
  include_chrXYMT=include_chrXYMT,
125
126
  log=log,
126
- verbose=True)
127
+ verbose=verbose)
127
128
 
128
129
  # annotate lambda gc to qq plot
129
130
  ax2.text(0.10, 1.03,"$\\lambda_{GC}$ = "+"{:.4f}".format(lambdagc),
@@ -147,7 +148,9 @@ def _plot_qq(
147
148
  font_family=font_family,
148
149
  ylabels=ylabels,
149
150
  ytick3=ytick3,
150
- ylabels_converted=ylabels_converted
151
+ ylabels_converted=ylabels_converted,
152
+ log=log,
153
+ verbose=verbose
151
154
  )
152
155
 
153
156
  #if cut == 0:
@@ -181,7 +184,7 @@ def _plot_qq(
181
184
  if qtitle:
182
185
  ax2.set_title(qtitle,fontsize=title_fontsize,pad=10,family=font_family)
183
186
 
184
- if verbose: log.write("Finished creating QQ plot successfully!")
187
+ log.write("Finished creating QQ plot successfully!",verbose=verbose)
185
188
 
186
189
  # Creating QQ plot Finished #############################################################################################
187
190
  return ax2