PyPI - gwaslab - Versions diffs - 3.4.37__py3-none-any.whl → 3.4.38__py3-none-any.whl - Mend

gwaslab 3.4.37py3-none-any.whl → 3.4.38py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (37) hide show

gwaslab/data/formatbook.json +722 -721
gwaslab/g_Log.py +8 -0
gwaslab/g_Sumstats.py +26 -147
gwaslab/g_SumstatsPair.py +6 -2
gwaslab/g_Sumstats_summary.py +3 -3
gwaslab/g_version.py +2 -2
gwaslab/hm_casting.py +29 -15
gwaslab/hm_harmonize_sumstats.py +291 -163
gwaslab/hm_rsid_to_chrpos.py +1 -1
gwaslab/io_preformat_input.py +43 -37
gwaslab/io_to_formats.py +428 -295
gwaslab/qc_check_datatype.py +3 -3
gwaslab/qc_fix_sumstats.py +793 -682
gwaslab/util_ex_calculate_ldmatrix.py +29 -11
gwaslab/util_ex_gwascatalog.py +1 -1
gwaslab/util_ex_ldproxyfinder.py +1 -1
gwaslab/util_ex_process_ref.py +3 -3
gwaslab/util_ex_run_coloc.py +26 -4
gwaslab/util_in_convert_h2.py +1 -1
gwaslab/util_in_fill_data.py +2 -2
gwaslab/util_in_filter_value.py +122 -34
gwaslab/util_in_get_density.py +2 -2
gwaslab/util_in_get_sig.py +41 -9
gwaslab/viz_aux_quickfix.py +24 -19
gwaslab/viz_aux_reposition_text.py +7 -4
gwaslab/viz_aux_save_figure.py +6 -5
gwaslab/viz_plot_compare_af.py +5 -5
gwaslab/viz_plot_miamiplot2.py +28 -20
gwaslab/viz_plot_mqqplot.py +109 -72
gwaslab/viz_plot_qqplot.py +11 -8
gwaslab/viz_plot_regionalplot.py +3 -1
gwaslab/viz_plot_trumpetplot.py +15 -6
{gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/METADATA +2 -2
{gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/RECORD +37 -37
{gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/LICENSE +0 -0
{gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/WHEEL +0 -0
{gwaslab-3.4.37.dist-info → gwaslab-3.4.38.dist-info}/top_level.txt +0 -0

gwaslab/viz_plot_mqqplot.py CHANGED Viewed

@@ -213,7 +213,6 @@ def mqqplot(insumstats,
         chr_dict = get_chr_to_number()
     if xtick_chr_dict is None:
         xtick_chr_dict = get_number_to_chr()
     if gtf_chr_dict is None:
         gtf_chr_dict = get_number_to_chr()
     if rr_chr_dict is None:
@@ -304,40 +303,42 @@ def mqqplot(insumstats,
                     scatter_args["rasterized"]=True
                     qq_scatter_args["rasterized"]=True
-    if verbose: log.write("Start to create MQQ plot with the following basic settings {}:".format(_get_version()))
-    if verbose: log.write(" -Genomic coordinates version: {}...".format(build))
+    log.write("Start to create MQQ plot...{}:".format(_get_version()),verbose=verbose)
+    log.write(" -Genomic coordinates version: {}...".format(build),verbose=verbose)
     if build is None or build=="99":
-        if verbose: log.write("   -WARNING: Genomic coordinates version is unknown...")
-    if verbose: log.write(" -Genome-wide significance level to plot is set to "+str(sig_level_plot)+" ...")
-    if verbose: log.write(" -Raw input contains "+str(len(insumstats))+" variants...")
-    if verbose: log.write(" -MQQ plot layout mode is : "+mode)
+        log.warning("Genomic coordinates version is unknown.")
+    log.write(" -Genome-wide significance level to plot is set to "+str(sig_level_plot)+" ...",verbose=verbose)
+    log.write(" -Raw input contains "+str(len(insumstats))+" variants...",verbose=verbose)
+    log.write(" -MQQ plot layout mode is : "+mode,verbose=verbose)
     if len(anno_set)>0 and ("m" in mode):
-        if verbose: log.write(" -Variants to annotate : "+",".join(anno_set))
+        log.write(" -Variants to annotate : "+",".join(anno_set),verbose=verbose)
     if len(highlight)>0 and ("m" in mode):
         if pd.api.types.is_list_like(highlight[0]):
             if highlight_chrpos==False:
-                if len(highlight[0]) == len(highlight_color):
-                    log.write(" -WARNING: number of locus list does not match number of colors !!!")
+                if len(highlight) != len(highlight_color):
+                    log.warning("Number of locus groups in the list does not match number of provided colors.")
                 for i, highlight_set in enumerate(highlight):
-                    if verbose: log.write(" -Set {} loci to highlight ({}) : ".format(i+1, highlight_color[i%len(highlight_color)])+",".join(highlight_set))
+                    log.write(" -Set {} loci to highlight ({}) : ".format(i+1, highlight_color[i%len(highlight_color)])+",".join(highlight_set),verbose=verbose)
             else:
-                if verbose: log.write(" -Loci to highlight ({}): {}".format(highlight_color,highlight))
-            if verbose: log.write("  -Highlight_window is set to: ", highlight_windowkb, " kb")
+                log.write(" -Loci to highlight ({}): {}".format(highlight_color,highlight),verbose=verbose)
+            log.write("  -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
         else:
-            if verbose: log.write(" -Loci to highlight ({}): ".format(highlight_color)+",".join(highlight))
-            if verbose: log.write("  -Highlight_window is set to: ", highlight_windowkb, " kb")
+            log.write(" -Loci to highlight ({}): ".format(highlight_color)+",".join(highlight),verbose=verbose)
+            log.write("  -highlight_windowkb is set to: ", highlight_windowkb, " kb",verbose=verbose)
     if len(pinpoint)>0 :
         if pd.api.types.is_list_like(pinpoint[0]):
-            if len(pinpoint[0]) == len(pinpoint_color):
-                log.write(" -WARNING: number of variant list does not match number of colors !!!")
+            if len(pinpoint) != len(pinpoint_color):
+                log.warning("Number of variant groups in the list does not match number of provided colors.")
             for i, pinpoint_set in enumerate(pinpoint):
-                  if verbose: log.write(" -Set {} variants to pinpoint ({}) : ".format(i+1,pinpoint_color[i%len(pinpoint_color)])+",".join(pinpoint_set))
+                  log.write(" -Set {} variants to pinpoint ({}) : ".format(i+1,pinpoint_color[i%len(pinpoint_color)])+",".join(pinpoint_set),verbose=verbose)
         else:
-            if verbose: log.write(" -Variants to pinpoint ({}) : ".format(pinpoint_color)+",".join(pinpoint))
+            log.write(" -Variants to pinpoint ({}) : ".format(pinpoint_color)+",".join(pinpoint),verbose=verbose)
     if region is not None:
-        if verbose: log.write(" -Region to plot : chr"+str(region[0])+":"+str(region[1])+"-"+str(region[2])+".")
+        log.write(" -Region to plot : chr"+str(region[0])+":"+str(region[1])+"-"+str(region[2])+".",verbose=verbose)
     # construct line series for coversion
     if additional_line is None:
@@ -399,7 +400,7 @@ def mqqplot(insumstats,
                                      pinpoint=pinpoint,
                                      density_color=density_color)
-    sumstats = insumstats.loc[:,usecols].copy()
+    sumstats = insumstats[usecols].copy()
     #################################################################################################
@@ -408,7 +409,7 @@ def mqqplot(insumstats,
     if (anno == "GENENAME"):
         anno_sig=True
     elif (anno is not None) and (anno is not True):
-        sumstats["Annotation"]=sumstats.loc[:,anno].astype("string")
+        sumstats["Annotation"]=sumstats[anno].astype("string")
     ## P value
     ## m, qq, r
@@ -432,15 +433,15 @@ def mqqplot(insumstats,
         region_start = region[1]
         region_end = region[2]
         marker_size=(25,45)
-        if verbose:log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]))
+        log.write(" -Extract SNPs in region : chr{}:{}-{}...".format(region_chr, region[1], region[2]),verbose=verbose)
-        in_region_snp = (sumstats[chrom]==region_chr) &(sumstats[pos]<region_end) &(sumstats[pos]>region_start)
+        in_region_snp = (sumstats[chrom]==region_chr) & (sumstats[pos]<region_end) & (sumstats[pos]>region_start)
-        if verbose:log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)))
+        log.write(" -Extract SNPs in specified regions: "+str(sum(in_region_snp)),verbose=verbose)
         sumstats = sumstats.loc[in_region_snp,:]
         if len(sumstats)==0:
-            log.write(" -Warning : No valid data! Please check the input.")
+            log.warning("No valid data! Please check the input.")
             return None
     ## EAF
@@ -454,11 +455,11 @@ def mqqplot(insumstats,
         sumstats["HUE"] = pd.NA
         sumstats["HUE"] = sumstats["HUE"].astype("Int64")
-    if verbose: log.write("Finished loading specified columns from the sumstats.")
+    log.write("Finished loading specified columns from the sumstats.",verbose=verbose)
 #sanity check############################################################################################################
-    log.write("Start conversion and sanity check:",verbose=verbose)
+    log.write("Start data conversion and sanity check:",verbose=verbose)
     if _if_quick_qc == False:
         log.write(" -Sanity check will be skipped.", verbose=verbose)
@@ -527,15 +528,19 @@ def mqqplot(insumstats,
                                                                         lines_to_plot=lines_to_plot,
                                                                         log = log)
     except:
-        log.write(" -Warning : No valid data! Please check the input.")
+        log.warning("No valid data! Please check the input.")
         return None
+    log.write("Finished data conversion and sanity check.",verbose=verbose)
     # Manhattan plot ##########################################################################################################
+    log.write("Start to create MQQ plot with "+str(len(sumstats))+" variants...",verbose=verbose)
     ## regional plot ->rsq
         #calculate rsq]
     if vcf_path is not None:
         if tabix is None:
             tabix = which("tabix")
+            log.write(" -tabix will be used: {}".format(tabix),verbose=verbose)
         sumstats = process_vcf(sumstats=sumstats,
                                vcf_path=vcf_path,
                                region=region,
@@ -568,8 +573,6 @@ def mqqplot(insumstats,
         if vcf_path is not None:
             sumstats["chr_hue"]=sumstats["LD"]
-        if verbose:log.write("Start to create MQQ plot with "+str(len(sumstats))+" variants:")
         ## default seetings
         palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
@@ -601,6 +604,7 @@ def mqqplot(insumstats,
         ## if highlight
         highlight_i = pd.DataFrame()
         if len(highlight) >0:
+            log.write(" -Creating background plot...",verbose=verbose)
             plot = sns.scatterplot(data=sumstats, x='i', y='scaled_P',
                                hue='chr_hue',
                                palette=palette,
@@ -612,8 +616,7 @@ def mqqplot(insumstats,
                                zorder=2,ax=ax1,edgecolor=edgecolor, **scatter_args)
             if pd.api.types.is_list_like(highlight[0]) and highlight_chrpos==False:
                 for i, highlight_set in enumerate(highlight):
-                    if verbose: log.write(" -Highlighting set {} target loci...".format(i+1))
-                    print(sumstats["HUE"].dtype)
+                    log.write(" -Highlighting set {} target loci...".format(i+1),verbose=verbose)
                     sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==i], x='i', y='scaled_P',
                         hue="HUE",
                         palette={i:highlight_color[i%len(highlight_color)]},
@@ -625,7 +628,7 @@ def mqqplot(insumstats,
                         zorder=3+i,ax=ax1,edgecolor=edgecolor,**scatter_args)
                 highlight_i = sumstats.loc[~sumstats["HUE"].isna(),"i"].values
             else:
-                if verbose: log.write(" -Highlighting target loci...")
+                log.write(" -Highlighting target loci...",verbose=verbose)
                 sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==0], x='i', y='scaled_P',
                     hue="HUE",
                     palette={0:highlight_color},
@@ -674,6 +677,7 @@ def mqqplot(insumstats,
                 hue = 'chr_hue'
                 hue_norm=None
                 to_plot = sumstats
+                log.write(" -Creating background plot...",verbose=verbose)
                 plot = sns.scatterplot(data=to_plot, x='i', y='scaled_P',
                        hue=hue,
                        palette= palette,
@@ -693,17 +697,17 @@ def mqqplot(insumstats,
                 for i, pinpoint_set in enumerate(pinpoint):
                     if sum(sumstats[snpid].isin(pinpoint_set))>0:
                         to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint_set),:]
-                        if verbose: log.write(" -Pinpointing set {} target vairants...".format(i+1))
+                        log.write(" -Pinpointing set {} target vairants...".format(i+1),verbose=verbose)
                         ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color[i%len(pinpoint_color)],zorder=100,s=marker_size[1]+1)
                     else:
-                        if verbose: log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...")
+                        log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
             else:
                 if sum(sumstats[snpid].isin(pinpoint))>0:
                     to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint),:]
-                    if verbose: log.write(" -Pinpointing target vairants...")
+                    log.write(" -Pinpointing target vairants...",verbose=verbose)
                     ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color,zorder=100,s=marker_size[1]+1)
                 else:
-                    if verbose: log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...")
+                    log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...",verbose=verbose)
@@ -767,12 +771,15 @@ def mqqplot(insumstats,
             lead_snp_i= None
             lead_snp_i2=None
+        log.write("Finished creating MQQ plot successfully!",verbose=verbose)
         # Get top variants for annotation #######################################################
+        log.write("Start to extract variants for annotation...",verbose=verbose)
         if (anno and anno!=True) or (len(anno_set)>0):
             if len(anno_set)>0:
                 to_annotate=sumstats.loc[sumstats[snpid].isin(anno_set),:]
                 if to_annotate.empty is not True:
-                    if verbose: log.write(" -Found "+str(len(to_annotate))+" specified variants to annotate...")
+                    log.write(" -Found "+str(len(to_annotate))+" specified variants to annotate...",verbose=verbose)
             else:
                 to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
                                snpid,
@@ -785,7 +792,7 @@ def mqqplot(insumstats,
                                mlog10p="scaled_P",
                                verbose=False)
                 if (to_annotate.empty is not True) and ("b" not in mode):
-                    if verbose: log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...")
+                    log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
         else:
             to_annotate=getsig(sumstats.loc[sumstats["scaled_P"]> float(-np.log10(sig_level_lead)),:],
                                "i",
@@ -798,7 +805,7 @@ def mqqplot(insumstats,
                                mlog10p="scaled_P",
                                sig_level=sig_level_lead)
             if (to_annotate.empty is not True) and ("b" not in mode):
-                if verbose: log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...")
+                log.write(" -Found "+str(len(to_annotate))+" significant variants with a sliding window size of "+str(windowsizekb)+" kb...",verbose=verbose)
         if (to_annotate.empty is not True) and anno=="GENENAME":
             to_annotate = annogene(to_annotate,
                                    id=snpid,
@@ -808,16 +815,21 @@ def mqqplot(insumstats,
                                    build=build,
                                    source=anno_source,
                                    verbose=verbose).rename(columns={"GENE":"Annotation"})
+        log.write("Finished extracting variants for annotation...",verbose=verbose)
         # Configure X, Y axes #######################################################
+        log.write("Start to process figure arts.",verbose=verbose)
         if region is None:
             # if Manhattan plot
             ax1 = _process_xtick(ax1=ax1,
                                  chrom_df=chrom_df,
                                  xtick_chr_dict=xtick_chr_dict,
                                  fontsize = fontsize,
-                                 font_family=font_family)
+                                 font_family=font_family,
+                                 log=log,
+                                 verbose=verbose)
         ax1, ax3 = _process_xlabel(region=region,
                                    xlabel=xlabel,
                                    ax1=ax1,
@@ -825,7 +837,9 @@ def mqqplot(insumstats,
                                    mode=mode,
                                    fontsize=fontsize,
                                    font_family=font_family,
-                                   ax3=ax3 )
+                                   ax3=ax3,
+                                   log=log,
+                                   verbose=verbose)
         ax1, ax4 = _process_ylabel(ylabel=ylabel,
                                    ax1=ax1,
@@ -833,8 +847,11 @@ def mqqplot(insumstats,
                                    bwindowsizekb=bwindowsizekb,
                                    fontsize=fontsize,
                                    font_family=font_family,
-                                   ax4=ax4)
+                                   ax4=ax4,
+                                   log=log,
+                                   verbose=verbose)
         ax1 = _set_yticklabels(cut=cut,
                      cutfactor=cutfactor,
                      cut_log=cut_log,
@@ -849,19 +866,28 @@ def mqqplot(insumstats,
                      font_family=font_family,
                      ytick3=ytick3,
                      ylabels=ylabels,
-                     ylabels_converted=ylabels_converted
-                     )
+                     ylabels_converted=ylabels_converted,
+                     log=log,
+                     verbose=verbose)
         ax1, ax4 = _process_ytick(ax1=ax1,
                                    fontsize=fontsize,
                                    font_family=font_family,
-                                   ax4=ax4)
+                                   ax4=ax4,
+                                   log=log,
+                                   verbose=verbose)
-        if cbar is not None:
-            # regional plot cbar
-            cbar = _process_cbar(cbar, cbar_fontsize=fontsize, cbar_font_family=font_family, cbar_title=cbar_title)
+        # regional plot cbar
+        if cbar is not None:
+            cbar = _process_cbar(cbar,
+                                 cbar_fontsize=fontsize,
+                                 cbar_font_family=font_family,
+                                 cbar_title=cbar_title,
+                                 log=log,
+                                 verbose=verbose)
         ax1 = _process_spine(ax1, mode)
         # genomewide significant line
         ax1 = _process_line(ax1,
                             sig_line,
@@ -874,7 +900,9 @@ def mqqplot(insumstats,
                             additional_line_color,
                             mode,
                             bmean,
-                            bmedian )
+                            bmedian,
+                            log=log,
+                            verbose=verbose )
         if mtitle and anno and len(to_annotate)>0:
@@ -882,8 +910,10 @@ def mqqplot(insumstats,
             ax1.set_title(mtitle,pad=pad,fontsize=title_fontsize,family=font_family)
         elif mtitle:
             ax1.set_title(mtitle,fontsize=title_fontsize,family=font_family)
+        log.write("Finished processing figure arts.",verbose=verbose)
         # Add annotation arrows and texts
+        log.write("Start to annotate variants...",verbose=verbose)
         ax1 = annotate_single(
                                 sumstats=sumstats,
                                 anno=anno,
@@ -917,6 +947,7 @@ def mqqplot(insumstats,
                                 log=log,
                                _invert=_invert
                             )
+        log.write("Finished annotating variants.",verbose=verbose)
     # Manhatann-like plot Finished #####################################################################
     # QQ plot #########################################################################################################
@@ -961,9 +992,9 @@ def mqqplot(insumstats,
     # Y axis jagged
     if jagged==True:
-        ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
+        ax1 = _jagged_y(cut=cut,skip=skip,ax1=ax1,mode=1,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
         if "qq" in mode:
-            ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid)
+            ax2 = _jagged_y(cut=cut,skip=skip,ax1=ax2,mode=2,mqqratio=mqqratio,jagged_len=jagged_len,jagged_wid=jagged_wid,log=log, verbose=verbose)
     # XY lim
     if ylim is not None:
@@ -989,7 +1020,7 @@ def mqqplot(insumstats,
     if _get_region_lead==True:
         return fig, log, lead_snp_i, lead_snp_i2
-    if verbose: log.write("Finished creating MQQ plot successfully!")
+    log.write("Finished creating plot successfully!",verbose=verbose)
     return fig, log
 ##############################################################################################################################################################################
@@ -1084,22 +1115,22 @@ def _sanity_check(sumstats, mode, chrom, pos, stratified, _if_quick_qc, log, ver
         #sanity check : drop variants with na values in chr and pos df
         sumstats = sumstats.dropna(subset=[chrom,pos])
         after_number=len(sumstats)
-        if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in CHR or POS column ...")
+        log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in CHR or POS column ...",verbose=verbose)
         out_of_range_chr = sumstats[chrom]<=0
-        if verbose:log.write(" -Removed {} variants with CHR <=0...".format(sum(out_of_range_chr)))
+        log.write(" -Removed {} variants with CHR <=0...".format(sum(out_of_range_chr)),verbose=verbose)
         sumstats = sumstats.loc[~out_of_range_chr,:]
     if stratified is True and _if_quick_qc:
         pre_number=len(sumstats)
         sumstats = sumstats.dropna(subset=["MAF"])
         after_number=len(sumstats)
-        if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in EAF column ...")
+        log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in EAF column ...",verbose=verbose)
     if "b" not in mode and _if_quick_qc:
         pre_number=len(sumstats)
         sumstats = sumstats.dropna(subset=["raw_P"])
         after_number=len(sumstats)
-        if verbose:log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in P column ...")
+        log.write(" -Removed "+ str(pre_number-after_number) +" variants with nan in P column ...",verbose=verbose)
     return sumstats
 def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
@@ -1108,7 +1139,7 @@ def _process_p_value(sumstats, mode,p, mlog10p, scaled, log, verbose ):
         sumstats["scaled_P"] = sumstats["DENSITY"].copy()
         sumstats["raw_P"] = -np.log10(sumstats["DENSITY"].copy()+2)
     elif scaled is True:
-        if verbose:log.write(" -P values are already converted to -log10(P)!")
+        log.write(" -P values are already converted to -log10(P)!",verbose=verbose)
         sumstats["scaled_P"] = sumstats["raw_P"].copy()
         sumstats["raw_P"] = np.power(10,-sumstats["scaled_P"].astype("float64"))
     else:
@@ -1156,7 +1187,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
                         sumstats.loc[right_chr&up_pos&low_pos,"HUE"]=0
         else:
             # highlight for one set
-            # to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
+            to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
             #assign colors: 0 is hightlight color
             for index,row in to_highlight.iterrows():
                 target_chr = int(row[chrom])
@@ -1169,7 +1200,7 @@ def _process_highlight(sumstats, highlight, highlight_chrpos, highlight_windowkb
 def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
     if "b" in mode and "DENSITY" not in sumstats.columns:
-        if verbose:log.write(" -Calculating DENSITY with windowsize of ",bwindowsizekb ," kb")
+        log.write(" -Calculating DENSITY with windowsize of ",bwindowsizekb ," kb",verbose=verbose)
         large_number = _get_largenumber(sumstats[pos].max(),log=log)
         stack=[]
@@ -1190,11 +1221,12 @@ def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
     elif "b" in mode and "DENSITY" in sumstats.columns:
         bmean=sumstats["DENSITY"].mean()
         bmedian=sumstats["DENSITY"].median()
-        if verbose:log.write(" -DENSITY column exists. Skipping calculation...")
+        log.write(" -DENSITY column exists. Skipping calculation...",verbose=verbose)
     return   sumstats, bmean, bmedian
-def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian ):
+def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_plot , sc_linewidth, sig_line_color, suggestive_sig_line_color, additional_line_color, mode, bmean, bmedian , log=Log(),verbose=True):
     # genomewide significant line
+    log.write(" -Processing lines...",verbose=verbose)
     if sig_line is True:
         sigline = ax1.axhline(y=lines_to_plot[0],
                                 linewidth = sc_linewidth,
@@ -1220,8 +1252,9 @@ def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_
         medianline = ax1.axhline(y=bmedian, linewidth = sc_linewidth,linestyle="--",color=sig_line_color,zorder=1000)
     return ax1
-def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title):
-    if str(type(cbar))=="list":
+def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title, log=Log(),verbose=True):
+    log.write(" -Processing color bar...",verbose=verbose)
+    if type(cbar) == list:
         for cbar_single in cbar:
             cbar_yticklabels = cbar_single.ax.get_yticklabels()
             cbar_single.ax.set_yticklabels(cbar_yticklabels, fontsize=cbar_fontsize, family=cbar_font_family )
@@ -1232,12 +1265,14 @@ def _process_cbar(cbar, cbar_fontsize, cbar_font_family, cbar_title):
         cbar.ax.set_title(cbar_title, fontsize=cbar_fontsize, family=cbar_font_family, loc="center",y=-0.2 )
     return cbar
-def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family):
+def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log(),verbose=True):
+    log.write(" -Processing X ticks...",verbose=verbose)
     ax1.set_xticks(chrom_df.astype("float64"))
     ax1.set_xticklabels(chrom_df.index.astype("Int64").map(xtick_chr_dict),fontsize=fontsize,family=font_family)
     return ax1
-def _process_ytick(ax1, fontsize, font_family, ax4):
+def _process_ytick(ax1, fontsize, font_family, ax4, log=Log(),verbose=True):
+    log.write(" -Processing Y labels...",verbose=verbose)
     ax1_yticklabels = ax1.get_yticklabels()
     #ax1.set_yticklabels(ax1_yticklabels,fontsize=fontsize,family=font_family)
     ax1_yticks = ax1.get_yticks()
@@ -1248,7 +1283,8 @@ def _process_ytick(ax1, fontsize, font_family, ax4):
         ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
     return ax1, ax4
-def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family,  ax3=None ):
+def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family,  ax3=None , log=Log(),verbose=True):
+    log.write(" -Processing X labels...",verbose=verbose)
     if region is not None:
         if xlabel is None:
             xlabel = "Chromosome "+str(region[0])+" (MB)"
@@ -1262,7 +1298,8 @@ def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family,
         ax1.set_xlabel(xlabel,fontsize=fontsize,family=font_family)
     return ax1, ax3
-def _process_ylabel(ylabel, ax1,  mode, bwindowsizekb, fontsize, font_family, ax4=None):
+def _process_ylabel(ylabel, ax1,  mode, bwindowsizekb, fontsize, font_family, ax4=None, log=Log(),verbose=True):
+    log.write(" -Processing Y labels...",verbose=verbose)
     if "b" in mode:
         if ylabel is None:
             ylabel ="Density of GWAS \n SNPs within "+str(bwindowsizekb)+" kb"
@@ -1336,4 +1373,4 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
         raise ValueError("Please select one from the 5 modes: mqq/qqm/m/qq/r/b")
     ax4=None
     cbar=None
-    return fig, ax1, ax2, ax3, ax4, cbar
+    return fig, ax1, ax2, ax3, ax4, cbar

gwaslab/viz_plot_qqplot.py CHANGED Viewed

@@ -45,7 +45,7 @@ def _plot_qq(
     # QQ plot #########################################################################################################
     # ax2 qqplot
-    if verbose:log.write("Start to create QQ plot with "+str(len(sumstats))+" variants:")
+    log.write("Start to create QQ plot with "+str(len(sumstats))+" variants:",verbose=verbose )
     # plotting qq plots using processed data after cut and skip
@@ -59,6 +59,7 @@ def _plot_qq(
     upper_bound_p = np.power(10.0, -expected_min_mlog10p)
     if stratified is False:
+        log.write(" -Plotting all variants...",verbose=verbose)
         # sort x,y for qq plot
         # high to low
         observed = p_toplot.sort_values(ascending=False)
@@ -68,13 +69,13 @@ def _plot_qq(
         expected_all = -np.log10(np.linspace(minit,upper_bound_p,len(p_toplot_raw)))[:len(observed)]
-        if verbose:log.write("Expected range of P: (0,{})".format(upper_bound_p))
+        log.write(" -Expected range of P: (0,{})".format(upper_bound_p),verbose=verbose)
         #p_toplot = sumstats["scaled_P"]
         ax2.scatter(expected_all,observed,s=marker_size[1],color=colors[0],**qq_scatter_args)
     else:
         # stratified qq plot
+        log.write(" -Plotting variants stratified by MAF...",verbose=verbose)
         observed = p_toplot.sort_values(ascending=False)
         expected_all = -np.log10(np.linspace(minit,upper_bound_p,len(p_toplot_raw)))[:len(observed)]
@@ -115,15 +116,15 @@ def _plot_qq(
         if expected_min_mlog10p!=0:
             level = 1 -  np.power(10.0,-np.nanmedian(expected_all))
-            if verbose: log.write(" -Level for calculating lambda GC : {}".format(1 - level))
+            log.write(" -Level for calculating lambda GC : {}".format(1 - level),verbose=verbose)
-        if verbose and not include_chrXYMT : log.write(" -Excluding chrX,Y, MT from calculation of lambda GC.")
+        if verbose and not include_chrXYMT : log.write(" -Excluding chrX,Y, MT from calculation of lambda GC.",verbose=verbose)
         lambdagc = lambdaGC(p_toplot_raw,
                             mode="MLOG10P",
                             level=level,
                             include_chrXYMT=include_chrXYMT,
                             log=log,
-                            verbose=True)
+                            verbose=verbose)
         # annotate lambda gc to qq plot
         ax2.text(0.10, 1.03,"$\\lambda_{GC}$ = "+"{:.4f}".format(lambdagc),
@@ -147,7 +148,9 @@ def _plot_qq(
                         font_family=font_family,
                         ylabels=ylabels,
                         ytick3=ytick3,
-                        ylabels_converted=ylabels_converted
+                        ylabels_converted=ylabels_converted,
+                        log=log,
+                        verbose=verbose
                         )
     #if cut == 0:
@@ -181,7 +184,7 @@ def _plot_qq(
     if qtitle:
         ax2.set_title(qtitle,fontsize=title_fontsize,pad=10,family=font_family)
-    if verbose: log.write("Finished creating QQ plot successfully!")
+    log.write("Finished creating QQ plot successfully!",verbose=verbose)
     # Creating QQ plot Finished #############################################################################################
     return ax2

gwaslab/viz_plot_regionalplot.py CHANGED Viewed

@@ -122,6 +122,8 @@ def _plot_regional(
                                 region_ld_colors=region_ld_colors2,
                                 position=2)
                 cbar = [cbar1, cbar2]
+        else:
+            cbar=None
         if region_title is not None:
                 ax1 = _add_region_title(region_title, ax1=ax1,region_title_args=region_title_args )
     ## recombinnation rate ##################################################
@@ -540,7 +542,7 @@ def process_vcf(sumstats, vcf_path, region,region_ref, region_ref_second, log, v
             # no position match
             return None
     if verbose: log.write(" -Matching variants using POS, NEA, EA ...")
-    sumstats["REFINDEX"] = sumstats.loc[:,[pos,nea,ea]].apply(lambda x: match_varaint(x),axis=1)
+    sumstats["REFINDEX"] = sumstats[[pos,nea,ea]].apply(lambda x: match_varaint(x),axis=1)
     #############################################################################################
     #sumstats["REFINDEX"] = sumstats[pos].apply(lambda x: np.where(ref_genotype["variants/POS"] == x )[0][0] if np.any(ref_genotype["variants/POS"] == x) else None)

gwaslab 3.4.37__py3-none-any.whl → 3.4.38__py3-none-any.whl

Potentially problematic release.

gwaslab 3.4.37py3-none-any.whl → 3.4.38py3-none-any.whl