PyPI - gwaslab - Versions diffs - 3.4.36__py3-none-any.whl → 3.4.38__py3-none-any.whl - Mend

gwaslab 3.4.36py3-none-any.whl → 3.4.38py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (42) hide show

gwaslab/__init__.py +1 -1
gwaslab/data/formatbook.json +722 -721
gwaslab/g_Log.py +8 -0
gwaslab/g_Sumstats.py +80 -178
gwaslab/g_SumstatsPair.py +6 -2
gwaslab/g_Sumstats_summary.py +3 -3
gwaslab/g_meta.py +13 -3
gwaslab/g_version.py +2 -2
gwaslab/hm_casting.py +29 -15
gwaslab/hm_harmonize_sumstats.py +312 -159
gwaslab/hm_rsid_to_chrpos.py +1 -1
gwaslab/io_preformat_input.py +46 -37
gwaslab/io_to_formats.py +428 -295
gwaslab/qc_check_datatype.py +15 -1
gwaslab/qc_fix_sumstats.py +956 -719
gwaslab/util_ex_calculate_ldmatrix.py +29 -11
gwaslab/util_ex_gwascatalog.py +1 -1
gwaslab/util_ex_ldproxyfinder.py +1 -1
gwaslab/util_ex_process_h5.py +26 -17
gwaslab/util_ex_process_ref.py +3 -3
gwaslab/util_ex_run_coloc.py +26 -4
gwaslab/util_in_convert_h2.py +1 -1
gwaslab/util_in_fill_data.py +44 -5
gwaslab/util_in_filter_value.py +122 -34
gwaslab/util_in_get_density.py +2 -2
gwaslab/util_in_get_sig.py +41 -9
gwaslab/viz_aux_quickfix.py +26 -21
gwaslab/viz_aux_reposition_text.py +7 -4
gwaslab/viz_aux_save_figure.py +6 -5
gwaslab/viz_plot_compare_af.py +5 -5
gwaslab/viz_plot_compare_effect.py +22 -5
gwaslab/viz_plot_miamiplot2.py +28 -20
gwaslab/viz_plot_mqqplot.py +214 -98
gwaslab/viz_plot_qqplot.py +11 -8
gwaslab/viz_plot_regionalplot.py +16 -9
gwaslab/viz_plot_trumpetplot.py +15 -6
{gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/METADATA +3 -3
gwaslab-3.4.38.dist-info/RECORD +72 -0
gwaslab-3.4.36.dist-info/RECORD +0 -72
{gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/LICENSE +0 -0
{gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/WHEEL +0 -0
{gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/top_level.txt +0 -0

gwaslab/viz_plot_mqqplot.py CHANGED Viewed

@@ -104,6 +104,15 @@ def mqqplot(insumstats,
           region_protein_coding = True,
           region_flank_factor = 0.05,
           region_anno_bbox_args = None,
+          cbar_title='LD $r^{2}$',
+          cbar_fontsize = None,
+          cbar_font_family = None,
+          track_n=4,
+          track_n_offset=0,
+          track_fontsize_ratio=0.95,
+          track_exon_ratio=1,
+          track_text_offset=1,
+          track_font_family = None,
           taf = None,
           # track_n, track_n_offset,font_ratio,exon_ratio,text_offset
           tabix=None,
@@ -204,7 +213,6 @@ def mqqplot(insumstats,
         chr_dict = get_chr_to_number()
     if xtick_chr_dict is None:
         xtick_chr_dict = get_number_to_chr()
     if gtf_chr_dict is None:
         gtf_chr_dict = get_number_to_chr()
     if rr_chr_dict is None:
@@ -243,8 +251,14 @@ def mqqplot(insumstats,
         region_ld_colors2 = ["#E4E4E4","#D8E2F2","#AFCBE3","#86B3D4","#5D98C4","#367EB7","#367EB7"]
     if region_title_args is None:
         region_title_args = {"size":10}
+    if cbar_fontsize is None:
+        cbar_fontsize = fontsize
+    if cbar_font_family is None:
+        cbar_font_family = font_family
+    if track_font_family is None:
+        track_font_family = font_family
     if taf is None:
-        taf = [4,0,0.95,1,1]
+        taf = [track_n,track_n_offset,track_fontsize_ratio,track_exon_ratio,track_text_offset]
     if maf_bins is None:
         maf_bins=[(0, 0.01), (0.01, 0.05), (0.05, 0.25),(0.25,0.5)]
     if maf_bin_colors is None:
@@ -289,40 +303,42 @@ def mqqplot(insumstats,
                     scatter_args["rasterized"]=True
                     qq_scatter_args["rasterized"]=True
-    if verbose: log.write("Start to plot manhattan/qq plot with the following basic settings {}:".format(_get_version()))
-    if verbose: log.write(" -Genomic coordinates version: {}...".format(build))
+    log.write("Start to create MQQ plot...{}:".format(_get_version()),verbose=verbose)
+    log.write(" -Genomic coordinates version: {}...".format(build),verbose=verbose)
     if build is None or build=="99":
-        if verbose: log.write("   -WARNING: Genomic coordinates version is unknown...")
-    if verbose: log.write(" -Genome-wide significance level to plot is set to "+str(sig_level_plot)+" ...")
-    if verbose: log.write(" -Raw input contains "+str(len(insumstats))+" variants...")
-    if verbose: log.write(" -Plot layout mode is : "+mode)
+        log.warning("Genomic coordinates version is unknown.")
+    log.write(" -Genome-wide significance level to plot is set to "+str(sig_level_plot)+" ...",verbose=verbose)
+    log.write(" -Raw input contains "+str(len(insumstats))+" variants...",verbose=verbose)
+    log.write(" -MQQ plot layout mode is : "+mode,verbose=verbose)
     if len(anno_set)>0 and ("m" in mode):
-        if verbose: log.write(" -Variants to annotate : "+",".join(anno_set))
+        log.write(" -Variants to annotate : "+",".join(anno_set),verbose=verbose)
     if len(highlight)>0 and ("m" in mode):
         if pd.api.types.is_list_like(highlight[0]):
             if highlight_chrpos==False:
-                if len(highlight[0]) == len(highlight_color):
-                    log.write(" -WARNING: number of locus list does not match number of colors !!!")
+                if len(highlight) != len(highlight_color):
+                    log.warning("Number of locus groups in the list does not match number of provided colors.")
                 for i, highlight_set in enumerate(highlight):
-                    if verbose: log.write(" -Set {} loci to highlight ({}) : ".format(i+1, highlight_color[i%len(highlight_color)])+",".join(highlight_set))
+                    log.write(" -Set {} loci to highlight ({}) : ".format(i+1, highlight_color[i%len(highlight_color)])+",".join(highlight_set),verbose=verbose)
             else:
-                if verbose: log.write(" -Loci to highlight ({}): {}".format(highlight_color,highlight))
-            if verbose: log.write("  -Highlight_window is set to: ", highlight_windowkb, " kb")
+                log.write(" -Loci to highlight ({}): {}".format(highlight_color,highlight),verbose=verbose)
+            log.write("  -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
         else:
-            if verbose: log.write(" -Loci to highlight ({}): ".format(highlight_color)+",".join(highlight))
-            if verbose: log.write("  -Highlight_window is set to: ", highlight_windowkb, " kb")
+            log.write(" -Loci to highlight ({}): ".format(highlight_color)+",".join(highlight),verbose=verbose)
+            log.write("  -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
     if len(pinpoint)>0 :
         if pd.api.types.is_list_like(pinpoint[0]):
-            if len(pinpoint[0]) == len(pinpoint_color):
-                log.write(" -WARNING: number of variant list does not match number of colors !!!")
+            if len(pinpoint) != len(pinpoint_color):
+                log.warning("Number of variant groups in the list does not match number of provided colors.")
             for i, pinpoint_set in enumerate(pinpoint):
-                  if verbose: log.write(" -Set {} variants to pinpoint ({}) : ".format(i+1,pinpoint_color[i%len(pinpoint_color)])+",".join(pinpoint_set))
+                  log.write(" -Set {} variants to pinpoint ({}) : ".format(i+1,pinpoint_color[i%len(pinpoint_color)])+",".join(pinpoint_set),verbose=verbose)
         else:
-            if verbose: log.write(" -Variants to pinpoint ({}) : ".format(pinpoint_color)+",".join(pinpoint))
+            log.write(" -Variants to pinpoint ({}) : ".format(pinpoint_color)+",".join(pinpoint),verbose=verbose)
     if region is not None:
-        if verbose: log.write(" -Region to plot : chr"+str(region[0])+":"+str(region[1])+"-"+str(region[2])+".")
+        log.write(" -Region to plot : chr"+str(region[0])+":"+str(region[1])+"-"+str(region[2])+".",verbose=verbose)
     # construct line series for coversion
     if additional_line is None:
@@ -340,13 +356,14 @@ def mqqplot(insumstats,
     # ax2 : qq plot
     # ax3 : gene track
     # ax4 : recombination rate
+    # cbar : color bar
     # ax5 : miami plot lower panel
     # "m" : Manhattan plot
     # "qq": QQ plot
     # "r" : regional plot
-    fig, ax1, ax2, ax3 = _process_layout(mode=mode,
+    fig, ax1, ax2, ax3, ax4, cbar = _process_layout(mode=mode,
                                          figax=figax,
                                          fig_args=fig_args,
                                          mqqratio=mqqratio,
@@ -383,7 +400,7 @@ def mqqplot(insumstats,
                                      pinpoint=pinpoint,
                                      density_color=density_color)
-    sumstats = insumstats.loc[:,usecols].copy()
+    sumstats = insumstats[usecols].copy()
     #################################################################################################
@@ -392,7 +409,7 @@ def mqqplot(insumstats,
     if (anno == "GENENAME"):
         anno_sig=True
     elif (anno is not None) and (anno is not True):
-        sumstats["Annotation"]=sumstats.loc[:,anno].astype("string")
+        sumstats["Annotation"]=sumstats[anno].astype("string")
     ## P value
     ## m, qq, r
@@ -416,15 +433,15 @@ def mqqplot(insumstats,
         region_start = region[1]
         region_end = region[2]
         marker_size=(25,45)
-        if verbose:log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]))
+        log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]),verbose=verbose)
-        in_region_snp = (sumstats[chrom]==region_chr) &(sumstats[pos]<region_end) &(sumstats[pos]>region_start)
+        in_region_snp = (sumstats[chrom]==region_chr) & (sumstats[pos]<region_end) & (sumstats[pos]>region_start)
-        if verbose:log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)))
+        log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)),verbose=verbose)
         sumstats = sumstats.loc[in_region_snp,:]
         if len(sumstats)==0:
-            log.write(" -Warning : No valid data! Please check the input.")
+            log.warning("No valid data! Please check the input.")
             return None
     ## EAF
@@ -438,11 +455,11 @@ def mqqplot(insumstats,
         sumstats["HUE"] = pd.NA
         sumstats["HUE"] = sumstats["HUE"].astype("Int64")
-    if verbose: log.write("Finished loading specified columns from the sumstats.")
+    log.write("Finished loading specified columns from the sumstats.",verbose=verbose)
 #sanity check############################################################################################################
-    log.write("Start conversion and sanity check:",verbose=verbose)
+    log.write("Start data conversion and sanity check:",verbose=verbose)
     if _if_quick_qc == False:
         log.write(" -Sanity check will be skipped.", verbose=verbose)
@@ -511,15 +528,19 @@ def mqqplot(insumstats,
                                                                         lines_to_plot=lines_to_plot,
                                                                         log = log)
     except:
-        log.write(" -Warning : No valid data! Please check the input.")
+        log.warning("No valid data! Please check the input.")
         return None
+    log.write("Finished data conversion and sanity check.",verbose=verbose)
     # Manhattan plot ##########################################################################################################
+    log.write("Start to create MQQ plot with "+str(len(sumstats))+" variants...",verbose=verbose)
     ## regional plot ->rsq
         #calculate rsq]
     if vcf_path is not None:
         if tabix is None:
             tabix = which("tabix")
+            log.write(" -tabix will be used: {}".format(tabix),verbose=verbose)
         sumstats = process_vcf(sumstats=sumstats,
                                vcf_path=vcf_path,
                                region=region,
@@ -552,8 +573,6 @@ def mqqplot(insumstats,
         if vcf_path is not None:
             sumstats["chr_hue"]=sumstats["LD"]
-        if verbose:log.write("Start to create manhattan plot with "+str(len(sumstats))+" variants:")
         ## default seetings
         palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
@@ -585,6 +604,7 @@ def mqqplot(insumstats,
         ## if highlight
         highlight_i = pd.DataFrame()
         if len(highlight) >0:
+            log.write(" -Creating background plot...",verbose=verbose)
             plot = sns.scatterplot(data=sumstats, x='i', y='scaled_P',
                                hue='chr_hue',
                                palette=palette,
@@ -596,8 +616,7 @@ def mqqplot(insumstats,
                                zorder=2,ax=ax1,edgecolor=edgecolor, **scatter_args)
             if pd.api.types.is_list_like(highlight[0]) and highlight_chrpos==False:
                 for i, highlight_set in enumerate(highlight):
-                    if verbose: log.write(" -Highlighting set {} target loci...".format(i+1))
-                    print(sumstats["HUE"].dtype)
+                    log.write(" -Highlighting set {} target loci...".format(i+1),verbose=verbose)
                     sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==i], x='i', y='scaled_P',
                         hue="HUE",
                         palette={i:highlight_color[i%len(highlight_color)]},
@@ -609,7 +628,7 @@ def mqqplot(insumstats,
                         zorder=3+i,ax=ax1,edgecolor=edgecolor,**scatter_args)
                 highlight_i = sumstats.loc[~sumstats["HUE"].isna(),"i"].values
             else:
-                if verbose: log.write(" -Highlighting target loci...")
+                log.write(" -Highlighting target loci...",verbose=verbose)
                 sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==0], x='i', y='scaled_P',
                     hue="HUE",
                     palette={0:highlight_color},
@@ -658,6 +677,7 @@ def mqqplot(insumstats,
                 hue = 'chr_hue'
                 hue_norm=None
                 to_plot = sumstats
+                log.write(" -Creating background plot...",verbose=verbose)
                 plot = sns.scatterplot(data=to_plot, x='i', y='scaled_P',
                        hue=hue,
                        palette= palette,
@@ -677,17 +697,17 @@ def mqqplot(insumstats,
                 for i, pinpoint_set in enumerate(pinpoint):
                     if sum(sumstats[snpid].isin(pinpoint_set))>0:
                         to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint_set),:]
-                        if verbose: log.write(" -Pinpointing set {} target vairants...".format(i+1))
+                        log.write(" -Pinpointing set {} target vairants...".format(i+1),verbose=verbose)
                         ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color[i%len(pinpoint_color)],zorder=100,s=marker_size[1]+1)
                     else:
-                        if verbose: log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...")
+                        log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
             else:
                 if sum(sumstats[snpid].isin(pinpoint))>0:
                     to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint),:]
-                    if verbose: log.write(" -Pinpointing target vairants...")
+                    log.write(" -Pinpointing target vairants...",verbose=verbose)
                     ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color,zorder=100,s=marker_size[1]+1)
                 else:
-                    if verbose: log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...")
+                    log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
@@ -697,7 +717,7 @@ def mqqplot(insumstats,
         # if regional plot : pinpoint lead , add color bar ##################################################
         if (region is not None) and ("r" in mode):
-            ax1, ax3, lead_snp_i, lead_snp_i2 =_plot_regional(
+            ax1, ax3, ax4, cbar, lead_snp_i, lead_snp_i2 =_plot_regional(
                                 sumstats=sumstats,
                                 fig=fig,
                                 ax1=ax1,
@@ -738,6 +758,7 @@ def mqqplot(insumstats,
                                 region_recombination = region_recombination,
                                 region_protein_coding=region_protein_coding,
                                 region_flank_factor =region_flank_factor,
+                                track_font_family=track_font_family,
                                 taf=taf,
                                 tabix=tabix,
                                 chrom=chrom,
@@ -745,50 +766,20 @@ def mqqplot(insumstats,
                                 verbose=verbose,
                                 log=log
                             )
         else:
             lead_snp_i= None
             lead_snp_i2=None
-        if region is None:
-            ax1 = _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family)
-        # genomewide significant line
-        ax1 = _process_line(ax1,
-                            sig_line,
-                            suggestive_sig_line,
-                            additional_line,
-                            lines_to_plot ,
-                            sc_linewidth,
-                            sig_line_color,
-                            suggestive_sig_line_color,
-                            additional_line_color,
-                            mode,
-                            bmean,
-                            bmedian )
-        ax1 = _set_yticklabels(cut=cut,
-                     cutfactor=cutfactor,
-                     cut_log=cut_log,
-                     ax1=ax1,
-                     skip=skip,
-                     maxy=maxy,
-                     maxticker=maxticker,
-                     ystep=ystep,
-                     sc_linewidth=sc_linewidth,
-                     cut_line_color=cut_line_color,
-                     fontsize=fontsize,
-                     font_family=font_family,
-                     ytick3=ytick3,
-                     ylabels=ylabels,
-                     ylabels_converted=ylabels_converted
-                     )
+        log.write("Finished creating MQQ plot successfully!",verbose=verbose)
         # Get top variants for annotation #######################################################
+        log.write("Start to extract variants for annotation...",verbose=verbose)
         if (anno and anno!=True) or (len(anno_set)>0):
             if len(anno_set)>0:
                 to_annotate=sumstats.loc[sumstats[snpid].isin(anno_set),:]
                 if to_annotate.empty is not True:
-                    if verbose: log.write(" -Found "+str(len(to_annotate))+" specified variants to annotate...")
+                    log.write(" -Found "+str(len(to_annotate))+" specified variants to annotate...",verbose=verbose)
             else:
                 to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
                                snpid,
@@ -801,7 +792,7 @@ def mqqplot(insumstats,
                                mlog10p="scaled_P",
                                verbose=False)
                 if (to_annotate.empty is not True) and ("b" not in mode):
-                    if verbose: log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...")
+                    log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
         else:
             to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
                                "i",
@@ -814,7 +805,7 @@ def mqqplot(insumstats,
                                mlog10p="scaled_P",
                                sig_level=sig_level_lead)
             if (to_annotate.empty is not True) and ("b" not in mode):
-                if verbose: log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...")
+                log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
         if (to_annotate.empty is not True) and anno=="GENENAME":
             to_annotate = annogene(to_annotate,
                                    id=snpid,
@@ -824,21 +815,105 @@ def mqqplot(insumstats,
                                    build=build,
                                    source=anno_source,
                                    verbose=verbose).rename(columns={"GENE":"Annotation"})
+        log.write("Finished extracting variants for annotation...",verbose=verbose)
         # Configure X, Y axes #######################################################
-        ax1 = _process_ylabel(ylabel, ax1,  mode, bwindowsizekb, fontsize, font_family)
-        ax1, ax3 = _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family,  ax3=ax3 )
+        log.write("Start to process figure arts.",verbose=verbose)
+        if region is None:
+            # if Manhattan plot
+            ax1 = _process_xtick(ax1=ax1,
+                                 chrom_df=chrom_df,
+                                 xtick_chr_dict=xtick_chr_dict,
+                                 fontsize = fontsize,
+                                 font_family=font_family,
+                                 log=log,
+                                 verbose=verbose)
+        ax1, ax3 = _process_xlabel(region=region,
+                                   xlabel=xlabel,
+                                   ax1=ax1,
+                                   gtf_path=gtf_path,
+                                   mode=mode,
+                                   fontsize=fontsize,
+                                   font_family=font_family,
+                                   ax3=ax3,
+                                   log=log,
+                                   verbose=verbose)
+        ax1, ax4 = _process_ylabel(ylabel=ylabel,
+                                   ax1=ax1,
+                                   mode=mode,
+                                   bwindowsizekb=bwindowsizekb,
+                                   fontsize=fontsize,
+                                   font_family=font_family,
+                                   ax4=ax4,
+                                   log=log,
+                                   verbose=verbose)
+        ax1 = _set_yticklabels(cut=cut,
+                     cutfactor=cutfactor,
+                     cut_log=cut_log,
+                     ax1=ax1,
+                     skip=skip,
+                     maxy=maxy,
+                     maxticker=maxticker,
+                     ystep=ystep,
+                     sc_linewidth=sc_linewidth,
+                     cut_line_color=cut_line_color,
+                     fontsize=fontsize,
+                     font_family=font_family,
+                     ytick3=ytick3,
+                     ylabels=ylabels,
+                     ylabels_converted=ylabels_converted,
+                     log=log,
+                     verbose=verbose)
+        ax1, ax4 = _process_ytick(ax1=ax1,
+                                   fontsize=fontsize,
+                                   font_family=font_family,
+                                   ax4=ax4,
+                                   log=log,
+                                   verbose=verbose)
+        # regional plot cbar
+        if cbar is not None:
+            cbar = _process_cbar(cbar,
+                                 cbar_fontsize=fontsize,
+                                 cbar_font_family=font_family,
+                                 cbar_title=cbar_title,
+                                 log=log,
+                                 verbose=verbose)
         ax1 = _process_spine(ax1, mode)
-        if verbose: log.write("Finished creating Manhattan plot successfully!")
+        # genomewide significant line
+        ax1 = _process_line(ax1,
+                            sig_line,
+                            suggestive_sig_line,
+                            additional_line,
+                            lines_to_plot ,
+                            sc_linewidth,
+                            sig_line_color,
+                            suggestive_sig_line_color,
+                            additional_line_color,
+                            mode,
+                            bmean,
+                            bmedian,
+                            log=log,
+                            verbose=verbose )
         if mtitle and anno and len(to_annotate)>0:
             pad=(ax1.transData.transform((skip, title_pad*maxy))[1]-ax1.transData.transform((skip, maxy)))[1]
             ax1.set_title(mtitle,pad=pad,fontsize=title_fontsize,family=font_family)
         elif mtitle:
             ax1.set_title(mtitle,fontsize=title_fontsize,family=font_family)
+        log.write("Finished processing figure arts.",verbose=verbose)
         # Add annotation arrows and texts
+        log.write("Start to annotate variants...",verbose=verbose)
         ax1 = annotate_single(
                                 sumstats=sumstats,
                                 anno=anno,
@@ -872,7 +947,8 @@ def mqqplot(insumstats,
                                 log=log,
                                _invert=_invert
                             )
-    # Manhatann plot Finished #####################################################################
+        log.write("Finished annotating variants.",verbose=verbose)
+    # Manhatann-like plot Finished #####################################################################
     # QQ plot #########################################################################################################
     if "qq" in mode:
@@ -916,9 +992,9 @@ def mqqplot(insumstats,
     # Y axis jagged
     if jagged==True:
-        ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
+        ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
         if "qq" in mode:
-            ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
+            ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
     # XY lim
     if ylim is not None:
@@ -943,6 +1019,8 @@ def mqqplot(insumstats,
     # Return matplotlib figure object #######################################################################################
     if _get_region_lead==True:
         return fig, log, lead_snp_i, lead_snp_i2
+    log.write("Finished creating plot successfully!",verbose=verbose)
     return fig, log
 ##############################################################################################################################################################################
@@ -1037,22 +1115,22 @@ def _sanity_check(sumstats, mode, chrom, pos, stratified, _if_quick_qc, log, ver
         #sanity check : drop variants with na values in chr and pos df
         sumstats = sumstats.dropna(subset=[chrom,pos])
         after_number=len(sumstats)
-        if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in CHR or POS column ...")
+        log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in CHR or POS column ...",verbose=verbose)
         out_of_range_chr = sumstats[chrom]<=0
-        if verbose:log.write(" -Removed {} variants with CHR <=0...".format(sum(out_of_range_chr)))
+        log.write(" -Removed {} variants with CHR <=0...".format(sum(out_of_range_chr)),verbose=verbose)
         sumstats = sumstats.loc[~out_of_range_chr,:]
     if stratified is True and _if_quick_qc:
         pre_number=len(sumstats)
         sumstats = sumstats.dropna(subset=["MAF"])
         after_number=len(sumstats)
-        if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in EAF column ...")
+        log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in EAF column ...",verbose=verbose)
     if "b" not in mode and _if_quick_qc:
         pre_number=len(sumstats)
         sumstats = sumstats.dropna(subset=["raw_P"])
         after_number=len(sumstats)
-        if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in P column ...")
+        log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in P column ...",verbose=verbose)
     return sumstats
 def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
@@ -1061,7 +1139,7 @@ def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
         sumstats["scaled_P"] = sumstats["DENSITY"].copy()
         sumstats["raw_P"] = -np.log10(sumstats["DENSITY"].copy()+2)
     elif scaled is True:
-        if verbose:log.write(" -P values are already converted to -log10(P)!")
+        log.write(" -P values are already converted to -log10(P)!",verbose=verbose)
         sumstats["scaled_P"] = sumstats["raw_P"].copy()
         sumstats["raw_P"] = np.power(10,-sumstats["scaled_P"].astype("float64"))
     else:
@@ -1109,7 +1187,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
                         sumstats.loc[right_chr&up_pos&low_pos,"HUE"]=0
         else:
             # highlight for one set
-            # to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
+            to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
             #assign colors: 0 is hightlight color
             for index,row in to_highlight.iterrows():
                 target_chr = int(row[chrom])
@@ -1122,7 +1200,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
 def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
     if "b" in mode and "DENSITY" not in sumstats.columns:
-        if verbose:log.write(" -Calculating DENSITY with windowsize of ",bwindowsizekb ," kb")
+        log.write(" -Calculating DENSITY with windowsize of ",bwindowsizekb ," kb",verbose=verbose)
         large_number = _get_largenumber(sumstats[pos].max(),log=log)
         stack=[]
@@ -1143,11 +1221,12 @@ def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
     elif "b" in mode and "DENSITY" in sumstats.columns:
         bmean=sumstats["DENSITY"].mean()
         bmedian=sumstats["DENSITY"].median()
-        if verbose:log.write(" -DENSITY column exists. Skipping calculation...")
+        log.write(" -DENSITY column exists. Skipping calculation...",verbose=verbose)
     return   sumstats, bmean, bmedian
-def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian ):
+def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian , log=Log(),verbose=True):
     # genomewide significant line
+    log.write(" -Processing lines...",verbose=verbose)
     if sig_line is True:
         sigline = ax1.axhline(y=lines_to_plot[0],
                                 linewidth = sc_linewidth,
@@ -1173,12 +1252,39 @@ def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_
         medianline = ax1.axhline(y=bmedian, linewidth = sc_linewidth,linestyle="--",color=sig_line_color,zorder=1000)
     return ax1
-def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family):
+def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title, log=Log(),verbose=True):
+    log.write(" -Processing color bar...",verbose=verbose)
+    if type(cbar) == list:
+        for cbar_single in cbar:
+            cbar_yticklabels = cbar_single.ax.get_yticklabels()
+            cbar_single.ax.set_yticklabels(cbar_yticklabels, fontsize=cbar_fontsize, family=cbar_font_family )
+            cbar_single.ax.set_title(cbar_title, fontsize=cbar_fontsize, family=cbar_font_family, loc="center",y=-0.2 )
+    else:
+        cbar_yticklabels = cbar.ax.get_yticklabels()
+        cbar.ax.set_yticklabels(cbar_yticklabels, fontsize=cbar_fontsize, family=cbar_font_family )
+        cbar.ax.set_title(cbar_title, fontsize=cbar_fontsize, family=cbar_font_family, loc="center",y=-0.2 )
+    return cbar
+def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log(),verbose=True):
+    log.write(" -Processing X ticks...",verbose=verbose)
     ax1.set_xticks(chrom_df.astype("float64"))
     ax1.set_xticklabels(chrom_df.index.astype("Int64").map(xtick_chr_dict),fontsize=fontsize,family=font_family)
     return ax1
-def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family,  ax3=None ):
+def _process_ytick(ax1, fontsize, font_family, ax4, log=Log(),verbose=True):
+    log.write(" -Processing Y labels...",verbose=verbose)
+    ax1_yticklabels = ax1.get_yticklabels()
+    #ax1.set_yticklabels(ax1_yticklabels,fontsize=fontsize,family=font_family)
+    ax1_yticks = ax1.get_yticks()
+    ax1.set_yticks(ax1_yticks,ax1_yticklabels,fontsize=fontsize,family=font_family)
+    if ax4 is not None:
+        ax4_yticklabels = ax4.get_yticklabels()
+        ax4_yticks = ax4.get_yticks()
+        ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
+    return ax1, ax4
+def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family,  ax3=None , log=Log(),verbose=True):
+    log.write(" -Processing X labels...",verbose=verbose)
     if region is not None:
         if xlabel is None:
             xlabel = "Chromosome "+str(region[0])+" (MB)"
@@ -1192,7 +1298,8 @@ def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family,
         ax1.set_xlabel(xlabel,fontsize=fontsize,family=font_family)
     return ax1, ax3
-def _process_ylabel(ylabel, ax1,  mode, bwindowsizekb, fontsize, font_family):
+def _process_ylabel(ylabel, ax1,  mode, bwindowsizekb, fontsize, font_family, ax4=None, log=Log(),verbose=True):
+    log.write(" -Processing Y labels...",verbose=verbose)
     if "b" in mode:
         if ylabel is None:
             ylabel ="Density of GWAS \n SNPs within "+str(bwindowsizekb)+" kb"
@@ -1201,7 +1308,10 @@ def _process_ylabel(ylabel, ax1,  mode, bwindowsizekb, fontsize, font_family):
         if ylabel is None:
             ylabel ="$-log_{10}(P)$"
         ax1.set_ylabel(ylabel,fontsize=fontsize,family=font_family)
-    return ax1
+    if ax4 is not None:
+        ax4_ylabel = ax4.get_ylabel()
+        ax4.set_ylabel(ax4_ylabel, fontsize=fontsize, family=font_family )
+    return ax1, ax4
 def _process_spine(ax1, mode):
     ax1.spines["top"].set_visible(False)
@@ -1218,6 +1328,7 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
     if  mode=="qqm":
         fig, (ax2, ax1) = plt.subplots(1, 2,gridspec_kw={'width_ratios': [1, mqqratio]},**fig_args)
         ax3 = None
     elif mode=="mqq":
         if figax is not None:
             fig = figax[0]
@@ -1226,6 +1337,7 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
         else:
             fig, (ax1, ax2) = plt.subplots(1, 2,gridspec_kw={'width_ratios': [mqqratio, 1]},**fig_args)
         ax3 = None
     elif mode=="m":
         if figax is not None:
             fig = figax[0]
@@ -1234,10 +1346,12 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
             fig, ax1 = plt.subplots(1, 1,**fig_args)
         ax2 = None
         ax3 = None
     elif mode=="qq":
         fig, ax2 = plt.subplots(1, 1,**fig_args)
         ax1=None
         ax3=None
     elif mode=="r":
         if figax is not None:
             fig = figax[0]
@@ -1257,4 +1371,6 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
         ax3 = None
     else:
         raise ValueError("Please select one from the 5 modes: mqq/qqm/m/qq/r/b")
-    return fig, ax1, ax2, ax3
+    ax4=None
+    cbar=None
+    return fig, ax1, ax2, ax3, ax4, cbar

gwaslab 3.4.36__py3-none-any.whl → 3.4.38__py3-none-any.whl

Potentially problematic release.

gwaslab 3.4.36py3-none-any.whl → 3.4.38py3-none-any.whl