PyPI - gwaslab - Versions diffs - 3.4.48__py3-none-any.whl → 3.5.0__py3-none-any.whl - Mend

gwaslab 3.4.48py3-none-any.whl → 3.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (25) hide show

gwaslab/bd_common_data.py +3 -1
gwaslab/data/reference.json +10 -2
gwaslab/g_Sumstats.py +24 -2
gwaslab/g_vchange_status.py +1 -1
gwaslab/g_version.py +2 -2
gwaslab/hm_harmonize_sumstats.py +23 -7
gwaslab/io_preformat_input.py +73 -8
gwaslab/io_to_formats.py +5 -5
gwaslab/qc_fix_sumstats.py +106 -7
gwaslab/util_in_fill_data.py +20 -2
gwaslab/util_in_get_sig.py +18 -2
gwaslab/viz_aux_annotate_plot.py +75 -242
gwaslab/viz_aux_quickfix.py +9 -2
gwaslab/viz_aux_save_figure.py +2 -1
gwaslab/viz_plot_compare_effect.py +48 -20
gwaslab/viz_plot_miamiplot2.py +5 -1
gwaslab/viz_plot_mqqplot.py +70 -20
gwaslab/viz_plot_phe_heatmap.py +260 -0
gwaslab/viz_plot_stackedregional.py +11 -4
{gwaslab-3.4.48.dist-info → gwaslab-3.5.0.dist-info}/METADATA +1 -1
{gwaslab-3.4.48.dist-info → gwaslab-3.5.0.dist-info}/RECORD +25 -24
{gwaslab-3.4.48.dist-info → gwaslab-3.5.0.dist-info}/WHEEL +1 -1
{gwaslab-3.4.48.dist-info → gwaslab-3.5.0.dist-info}/LICENSE +0 -0
{gwaslab-3.4.48.dist-info → gwaslab-3.5.0.dist-info}/LICENSE_before_v3.4.39 +0 -0
{gwaslab-3.4.48.dist-info → gwaslab-3.5.0.dist-info}/top_level.txt +0 -0

gwaslab/viz_plot_compare_effect.py CHANGED Viewed

@@ -9,6 +9,7 @@ from matplotlib.patches import Rectangle
 from adjustText import adjust_text
 from gwaslab.viz_aux_save_figure import save_figure
 from gwaslab.util_in_get_sig import getsig
+from gwaslab.util_in_get_sig import annogene
 from gwaslab.g_Log import Log
 from gwaslab.util_in_correct_winnerscurse import wc_correct
 from gwaslab.util_in_correct_winnerscurse import wc_correct_test
@@ -59,6 +60,7 @@ def compare_effect(path1,
                    xylabel_prefix="Per-allele effect size in ",
                    helper_line_args=None,
                    fontargs=None,
+                   build="19",
                    r_or_r2="r",
                    #
                    errargs=None,
@@ -77,10 +79,9 @@ def compare_effect(path1,
         scaled2 = True
     if is_q_mc=="fdr" or is_q_mc=="bon":
         is_q = True
     if is_q == True:
         if is_q_mc not in [False,"fdr","bon","non"]:
-            raise ValueError("Please select either fdr or bon or non for is_q_mc.")
+            raise ValueError('Please select either "fdr" or "bon" or "non"/False for is_q_mc.')
     if save_args is None:
         save_args = {"dpi":300,"facecolor":"white"}
     if reg_box is None:
@@ -89,6 +90,8 @@ def compare_effect(path1,
         sep = ["\t","\t"]
     if get_lead_args is None:
         get_lead_args = {}
+    if anno=="GENENAME":
+        get_lead_args["anno"]=True
     if errargs is None:
         errargs={"ecolor":"#cccccc","elinewidth":1}
     if fontargs is None:
@@ -191,10 +194,12 @@ def compare_effect(path1,
         ######### 8.1 if a snplist is provided, use the snp list
         log.write(" -Extract variants in the given list from "+label[0]+"...")
         sig_list_1 = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
+        if anno=="GENENAME":
+            sig_list_1 = annogene(sumstats,"SNPID","CHR","POS", build=build, verbose=verbose,**get_lead_args)
     else:
-        ######### 8,2 otherwise use the sutomatically detected lead SNPs
+        ######### 8,2 otherwise use the automatically detected lead SNPs
         log.write(" -Extract lead variants from "+label[0]+"...")
-        sig_list_1 = getsig(sumstats,"SNPID","CHR","POS","P", verbose=verbose,sig_level=sig_level,**get_lead_args)
+        sig_list_1 = getsig(sumstats,"SNPID","CHR","POS","P", build=build, verbose=verbose,sig_level=sig_level,**get_lead_args)
     if drop==True:
         sig_list_1 = drop_duplicate_and_na(sig_list_1, sort_by="P", log=log ,verbose=verbose)
@@ -235,10 +240,12 @@ def compare_effect(path1,
         ######### 12.1 if a snplist is provided, use the snp list
         log.write(" -Extract snps in the given list from "+label[1]+"...")
         sig_list_2 = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
+        if anno=="GENENAME":
+            sig_list_2 = annogene(sumstats,"SNPID","CHR","POS", build=build, verbose=verbose,**get_lead_args)
     else:
         log.write(" -Extract lead snps from "+label[1]+"...")
         ######### 12.2 otherwise use the sutomatically detected lead SNPs
-        sig_list_2 = getsig(sumstats,"SNPID","CHR","POS","P",
+        sig_list_2 = getsig(sumstats,"SNPID","CHR","POS","P",build=build,
                                  verbose=verbose,sig_level=sig_level,**get_lead_args)
     if drop==True:
         sig_list_2 = drop_duplicate_and_na(sig_list_2, sort_by="P", log=log ,verbose=verbose)
@@ -248,6 +255,10 @@ def compare_effect(path1,
     log.write("Merging snps from "+label[0]+" and "+label[1]+"...")
     sig_list_merged = pd.merge(sig_list_1,sig_list_2,left_on="SNPID",right_on="SNPID",how="outer",suffixes=('_1', '_2'))
+    if anno == "GENENAME":
+        sig_list_merged.loc[sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENENAME"] = sig_list_merged.loc[sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENE_1"]
+        sig_list_merged.loc[~sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENENAME"] = sig_list_merged.loc[~sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENE_2"]
+        sig_list_merged = sig_list_merged.drop(columns=["GENE_1","GENE_2","LOCATION_1","LOCATION_2"])
     #     SNPID       P_1       P_2
     #0   rs117986209  0.142569  0.394455
     #1     rs6704312  0.652104  0.143750
@@ -533,7 +544,7 @@ def compare_effect(path1,
     ########################## Het test############################################################
     ## heterogeneity test
-    if (is_q is True):
+    if (is_q == True):
         log.write(" -Calculating Cochran's Q statistics and peform chisq test...", verbose=verbose)
         if mode=="beta" or mode=="BETA" or mode=="Beta":
             sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
@@ -552,7 +563,7 @@ def compare_effect(path1,
         log.write(" -Exclude "+str(len(sig_list_merged) -sum(both_eaf_clear))+ " variants with maf <",maf_level, verbose=verbose)
         sig_list_merged = sig_list_merged.loc[both_eaf_clear,:]
     # heterogeneity summary
-    if (is_q is True):
+    if (is_q == True):
         log.write(" -Significant het:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:]), verbose=verbose)
         log.write(" -All sig:" ,len(sig_list_merged), verbose=verbose)
         log.write(" -Het rate:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:])/len(sig_list_merged), verbose=verbose)
@@ -633,11 +644,11 @@ def compare_effect(path1,
             ax.scatter(both["OR_1"],both["OR_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**scatterargs)
             legend_elements.append(label[2])
     ## annotation #################################################################################################################
-    if anno==True:
+    if anno==True or anno=="GENENAME":
         sig_list_toanno = sig_list_merged.dropna(axis=0)
         if is_q==True and anno_het == True:
             sig_list_toanno = sig_list_toanno.loc[sig_list_toanno["Edge_color"]=="black",:]
         if mode=="beta":
             sig_list_toanno = sig_list_toanno.loc[sig_list_toanno["EFFECT_1"].abs() >=anno_min1 ,:]
             sig_list_toanno = sig_list_toanno.loc[sig_list_toanno["EFFECT_2_aligned"].abs() >=anno_min2 ,:]
@@ -651,22 +662,38 @@ def compare_effect(path1,
         texts_l=[]
         texts_r=[]
+        if anno==True:
+            log.write("Annotating variants using {}".format("SNPID"), verbose=verbose)
+        elif anno=="GENENAME":
+            log.write("Annotating variants using {}".format("GENENAME"), verbose=verbose)
         for index, row in sig_list_toanno.iterrows():
+            log.write("Annotating {}...".format(row), verbose=verbose)
+            if anno==True:
+                to_anno_text = index
+            elif type(anno) is str:
+                if not pd.isna(row[anno]):
+                    to_anno_text = row[anno]
+                else:
+                    to_anno_text = index
             if mode=="beta" or mode=="BETA" or mode=="Beta":
                 if row["EFFECT_1"] <  row["EFFECT_2_aligned"]:
-                    texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],index,ha="right",va="bottom"))
+                    texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],to_anno_text,ha="right",va="bottom"))
                 else:
-                    texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],index,ha="left",va="top"))
+                    texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],to_anno_text,ha="left",va="top"))
             else:
                 if row["OR_1"] <  row["OR_2_aligned"]:
-                    texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],index, ha='right', va='bottom'))
+                    texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],to_anno_text, ha='right', va='bottom'))
                 else:
-                    texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],index, ha='left', va='top'))
-        adjust_text(texts_l,autoalign =False,precision =0.001,lim=1000, ha="right",va="bottom", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects=(0.8,0.8) ,arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
-        adjust_text(texts_r,autoalign =False,precision =0.001,lim=1000, ha="left",va="top", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects =(0.8,0.8),arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
+                    texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],to_anno_text, ha='left', va='top'))
+        if len(texts_l)>0:
+            adjust_text(texts_l,autoalign =False,precision =0.001,lim=1000, ha="right",va="bottom", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects=(0.8,0.8) ,arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
+        if len(texts_r)>0:
+            adjust_text(texts_r,autoalign =False,precision =0.001,lim=1000, ha="left",va="top", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects =(0.8,0.8),arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
     elif type(anno) is dict:
+        sig_list_toanno = sig_list_merged.dropna(axis=0)
         # if input is a dict
         sig_list_toanno = sig_list_toanno.loc[sig_list_toanno.index.isin(list(anno.keys())),:]
         if is_q==True and anno_het == True:
@@ -696,9 +723,10 @@ def compare_effect(path1,
                     texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='right', va='bottom'))
                 else:
                     texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='left', va='top'))
-        adjust_text(texts_l,autoalign =False,precision =0.001,lim=1000, ha="right",va="bottom", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects=(0.8,0.8) ,arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
-        adjust_text(texts_r,autoalign =False,precision =0.001,lim=1000, ha="left",va="top", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects =(0.8,0.8),arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
+        if len(texts_l)>0:
+            adjust_text(texts_l,autoalign =False,precision =0.001,lim=1000, ha="right",va="bottom", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects=(0.8,0.8) ,arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
+        if len(texts_r)>0:
+            adjust_text(texts_r,autoalign =False,precision =0.001,lim=1000, ha="left",va="top", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects =(0.8,0.8),arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
     #################################################################################################################################
     # plot x=0,y=0, and a 45 degree line

gwaslab/viz_plot_miamiplot2.py CHANGED Viewed

@@ -290,7 +290,11 @@ def plot_miami2(
     #####################################################################################################################
+    ax1l, ax1r = ax5.get_xlim()
+    ax5l, ax5r = ax1.get_xlim()
+    ax1.set_xlim([min(ax1l,ax5l), max(ax1r,ax5r)])
+    ax5.set_xlim([min(ax1l,ax5l), max(ax1r,ax5r)])
+    #####################################################################################################################
     ax5.set_xlabel("")
     #ax5.set_xticks(chrom_df)
     ax5.set_xticklabels([])

gwaslab/viz_plot_mqqplot.py CHANGED Viewed

@@ -139,7 +139,7 @@ def mqqplot(insumstats,
           anno_gtf_path=None,
           anno_adjust=False,
           anno_max_iter=100,
-          arm_offset=50,
+          arm_offset=None,
           arm_scale=1,
           anno_height=1,
           arm_scale_d=None,
@@ -291,7 +291,7 @@ def mqqplot(insumstats,
     if maf_bin_colors is None:
         maf_bin_colors = ["#f0ad4e","#5cb85c", "#5bc0de","#000042"]
     if save_args is None:
-        save_args = {"dpi":300,"facecolor":"white"}
+        save_args = {"dpi":400,"facecolor":"white"}
     if highlight is None:
         highlight = list()
     if highlight_anno_args is None:
@@ -329,6 +329,20 @@ def mqqplot(insumstats,
                     fig_args["dpi"]=72
                     scatter_args["rasterized"]=True
                     qq_scatter_args["rasterized"]=True
+                else:
+                    fig_args["dpi"] = save_args["dpi"]
+    # configure dpi if saving the plot
+    fig_args, scatter_args, qq_scatter_args, save_args = _configure_fig_save_kwargs(save = save,
+                                                                                    fig_args = fig_args,
+                                                                                    scatter_args = scatter_args,
+                                                                                    qq_scatter_args = qq_scatter_args,
+                                                                                    save_args = save_args)
+    if len(anno_d) > 0 and arm_offset is None:
+        # in pixels
+        arm_offset = fig_args["dpi"] * repel_force * fig_args["figsize"][0]*0.5
     log.write("Start to create MQQ plot...{}:".format(_get_version()),verbose=verbose)
     log.write(" -Genomic coordinates version: {}...".format(build),verbose=verbose)
@@ -401,7 +415,7 @@ def mqqplot(insumstats,
     if mode=="b":
         sig_level=1,
         sig_line=False,
-        windowsizekb = 100000000
+        #windowsizekb = 100000000
         mode="mb"
         scatter_args={"marker":"s"}
         marker_size= (marker_size[1],marker_size[1])
@@ -522,8 +536,12 @@ def mqqplot(insumstats,
                                                     pos=pos,
                                                     verbose=verbose,
                                                     log=log)
+        lines_to_plot = pd.Series(lines_to_plot.to_list() + [bmean, bmedian])
     else:
         bmean, bmedian=0,0
 # P value conversion #####################################################################################################
     # add raw_P and scaled_P
@@ -956,7 +974,7 @@ def mqqplot(insumstats,
             ax1.set_title(mtitle,fontsize=title_fontsize,family=font_family)
         log.write("Finished processing figure arts.",verbose=verbose)
-        # Add annotation arrows and texts
+        ## Add annotation arrows and texts
         log.write("Start to annotate variants...",verbose=verbose)
         ax1 = annotate_single(
                                 sumstats=sumstats,
@@ -1055,7 +1073,8 @@ def mqqplot(insumstats,
         fig.suptitle(title , fontsize = title_fontsize ,x=0.5, y=1.05)
     else:
         fig.suptitle(title , fontsize = title_fontsize, x=0.5,y=1)
+        ## Add annotation arrows and texts
     # Saving figure
     save_figure(fig = fig, save = save, keyword=mode, save_args=save_args, log = log, verbose=verbose)
@@ -1069,7 +1088,31 @@ def mqqplot(insumstats,
 ##############################################################################################################################################################################
+def _configure_fig_save_kwargs(save=None,
+                               fig_args=None,
+                               scatter_args=None,
+                               qq_scatter_args=None,
+                               save_args=None):
+    if fig_args is None:
+        fig_args = dict()
+    if scatter_args is None:
+        scatter_args = dict()
+    if qq_scatter_args is None:
+        qq_scatter_args = dict()
+    if save_args is None:
+        save_args = dict()
+    if save is not None:
+        if type(save) is not bool:
+            if len(save)>3:
+                if save[-3:]=="pdf" or save[-3:]=="svg":
+                    # to save as vectorized plot
+                    fig_args["dpi"]=72
+                    scatter_args["rasterized"]=True
+                    qq_scatter_args["rasterized"]=True
+                else:
+                    fig_args["dpi"] = save_args["dpi"]
+    return fig_args, scatter_args, qq_scatter_args, save_args
 def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats, pos, chrpad, xtight, log, verbose):
@@ -1104,12 +1147,6 @@ def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats, pos, chrpad, xtight, l
     return ax1
 ##############################################################################################################################################################################
 def _configure_cols_to_use(insumstats, snpid,  chrom, pos, ea, nea, eaf, p, mlog10p,scaled, mode,stratified,anno, anno_set, anno_alias,_chrom_df_for_i,highlight ,pinpoint,density_color):
     usecols=[]
@@ -1287,9 +1324,10 @@ def _process_density(sumstats, mode, bwindowsizekb, chrom, pos, verbose, log):
                 else:
                     break
         df = pd.DataFrame(stack,columns=["SNPID","TCHR+POS","DENSITY"])
-        sumstats["DENSITY"] = df["DENSITY"].values
-        bmean=sumstats["DENSITY"].mean()
-        bmedian=sumstats["DENSITY"].median()
+        sumstats["DENSITY"] = df["DENSITY"].astype("Float64").values
+        bmean=sumstats.drop_duplicates(subset="SNPID")["DENSITY"].mean()
+        bmedian=sumstats.drop_duplicates(subset="SNPID")["DENSITY"].median()
     elif "b" in mode and "DENSITY" in sumstats.columns:
         bmean=sumstats["DENSITY"].mean()
         bmedian=sumstats["DENSITY"].median()
@@ -1305,6 +1343,7 @@ def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_
                                 linestyle="--",
                                 color=sig_line_color,
                                 zorder=1)
     if suggestive_sig_line is True:
         suggestive_sig_line = ax1.axhline(y=lines_to_plot[1],
                                             linewidth = sc_linewidth,
@@ -1312,15 +1351,20 @@ def _process_line(ax1, sig_line, suggestive_sig_line, additional_line, lines_to_
                                             color=suggestive_sig_line_color,
                                             zorder=1)
     if additional_line is not None:
-        for index, level in enumerate(lines_to_plot[2:].values):
+        for index, level in enumerate(lines_to_plot[2:2+len(additional_line)].values):
             ax1.axhline(y=level,
                         linewidth = sc_linewidth,
                         linestyle="--",
                         color=additional_line_color[index%len(additional_line_color)],
                         zorder=1)
-    if "b" in mode:
+    if "b" in mode:
+        bmean = lines_to_plot.iat[-2]
+        bmedian = lines_to_plot.iat[-1]
         # for brisbane plot, add median and mean line
+        log.write(" -Plotting horizontal line (  mean DENISTY): y = {}".format(bmean),verbose=verbose)
         meanline = ax1.axhline(y=bmean, linewidth = sc_linewidth,linestyle="-",color=sig_line_color,zorder=1000)
+        log.write(" -Plotting horizontal line ( median DENISTY): y = {}".format(bmedian),verbose=verbose)
         medianline = ax1.axhline(y=bmedian, linewidth = sc_linewidth,linestyle="--",color=sig_line_color,zorder=1000)
     return ax1
@@ -1441,10 +1485,16 @@ def _process_layout(mode, figax, fig_args, mqqratio, region_hspace):
             ax2 = None
             plt.subplots_adjust(hspace=region_hspace)
     elif mode =="b" :
-        fig_args["figsize"] = (15,5)
-        fig, ax1 = plt.subplots(1, 1,**fig_args)
-        ax2 = None
-        ax3 = None
+        if figax is not None:
+            fig = figax[0]
+            ax1 = figax[1]
+            ax3 = None
+            ax2 = None
+        else:
+            fig_args["figsize"] = (15,5)
+            fig, ax1 = plt.subplots(1, 1,**fig_args)
+            ax2 = None
+            ax3 = None
     else:
         raise ValueError("Please select one from the 5 modes: mqq/qqm/m/qq/r/b")
     ax4=None

gwaslab/viz_plot_phe_heatmap.py ADDED Viewed

@@ -0,0 +1,260 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+import scipy as sp
+from gwaslab.viz_aux_quickfix import _quick_assign_i_with_rank
+from gwaslab.viz_aux_quickfix import _get_largenumber
+from gwaslab.viz_aux_quickfix import _quick_fix_p_value
+from gwaslab.viz_aux_quickfix import _quick_fix_pos
+from gwaslab.viz_aux_quickfix import _quick_fix_chr
+from gwaslab.viz_aux_quickfix import _quick_fix_eaf
+from gwaslab.viz_aux_quickfix import _quick_fix_mlog10p
+from gwaslab.viz_aux_quickfix import _dropna_in_cols
+from gwaslab.viz_plot_mqqplot import _process_p_value
+from gwaslab.viz_plot_mqqplot import _configure_fig_save_kwargs
+from gwaslab.viz_plot_mqqplot import mqqplot
+from gwaslab.viz_aux_save_figure import save_figure
+from gwaslab.g_Log import Log
+import copy
+from gwaslab.bd_common_data import get_chr_to_number
+from gwaslab.bd_common_data import get_number_to_chr
+from gwaslab.g_version import _get_version
+def _gwheatmap(
+    insumstats,
+    chrom="CHR",
+    pos="POS",
+    ref_chrom="REF_CHR",
+    ref_pos="REF_START",
+    p="P",
+    scaled=False,
+    sizes = (10,50),
+    alpha=0.5,
+    mlog10p="MLOG10P",
+    snpid="SNPID",
+    eaf=None,
+    group="CIS/TRANS",
+    ea="EA",
+    nea="NEA",
+    colors=None,
+    check = True,
+    chr_dict = None,
+    xchrpad = 0,
+    ychrpad=0,
+    use_rank = False,
+    xtick_chr_dict=None,
+    ytick_chr_dict=None,
+    fontsize=10,
+    add_b =False,
+    log=Log(),
+    fig_kwargs=None,
+    scatter_kwargs=None,
+    height_ratios=None,
+    hspace = 0.1,
+    font_family="Arial",
+    cis_windowsizekb=100,
+    verbose=True,
+    save=True,
+    save_kwargs=None,
+    grid_linewidth=1,
+    grid_linecolor="grey",
+    **mqq_kwargs
+):
+    log.write("Start to create genome-wide scatter plot...{}:".format(_get_version()),verbose=verbose)
+    if height_ratios is None:
+        height_ratios = [1, 2]
+    if xtick_chr_dict is None:
+        xtick_chr_dict = get_number_to_chr()
+    if ytick_chr_dict is None:
+        ytick_chr_dict = get_number_to_chr()
+    if chr_dict is None:
+        chr_dict = get_chr_to_number()
+    if colors is None:
+        colors=["#CB132D","#597FBD"]
+    if fig_kwargs is None:
+        fig_kwargs= dict(figsize=(15,15))
+    if save_kwargs is None:
+        save_kwargs = {"dpi":300,"facecolor":"white"}
+    if scatter_kwargs is None:
+        scatter_kwargs = {}
+    fig_kwargs, scatter_kwargs, qq_scatter_args, save_kwargs = _configure_fig_save_kwargs(save=save,
+                                                                                    fig_args = fig_kwargs,
+                                                                                    scatter_args = scatter_kwargs,
+                                                                                    qq_scatter_args = dict(),
+                                                                                    save_args = save_kwargs)
+    sumstats = insumstats.copy()
+    # Data QC and format
+    if check ==True:
+        sumstats[pos] = _quick_fix_pos(sumstats[pos])
+        sumstats[chrom] = _quick_fix_chr(sumstats[chrom], chr_dict=chr_dict)
+        sumstats[ref_pos] = _quick_fix_pos(sumstats[ref_pos])
+        sumstats[ref_chrom] = _quick_fix_chr(sumstats[ref_chrom], chr_dict=chr_dict)
+        sumstats = _dropna_in_cols(sumstats, [pos, chrom, ref_pos, ref_chrom], log=log, verbose=verbose)
+    # dropna
+    sumstats = sumstats.sort_values(by=group)
+    if scaled is True:
+        sumstats["raw_P"] = pd.to_numeric(sumstats[mlog10p], errors='coerce')
+    else:
+        sumstats["raw_P"] = sumstats[p].astype("float64")
+    sumstats =  _process_p_value(sumstats=sumstats,
+                                mode="m",
+                                p=p,
+                                mlog10p=mlog10p,
+                                scaled=scaled,
+                                log=log,
+                                verbose=verbose )
+    if add_b ==False:
+        fig, ax1 = plt.subplots(**fig_kwargs)
+    else:
+        fig, (ax2, ax1) = plt.subplots( nrows=2 ,sharex=True, gridspec_kw={'height_ratios': height_ratios }, **fig_kwargs)
+        plt.subplots_adjust(hspace=hspace)
+    ## assign i for variants
+    sumstats, chrom_df_x = _quick_assign_i_with_rank(sumstats,
+                                                chrpad=xchrpad,
+                                                use_rank=use_rank,
+                                                chrom=chrom,
+                                                pos=pos,
+                                                verbose=verbose)
+    chrom_df_b = chrom_df_x
+    sumstats = sumstats.rename(columns={"i":"i_x"})
+    add_x_unique = list(sumstats["_ADD"].unique())
+    ## determine grouping methods for Y
+    ## assign i for Y group
+    sumstats, chrom_df_y = _quick_assign_i_with_rank(sumstats,
+                              chrpad=ychrpad,
+                              use_rank=use_rank,
+                              chrom=ref_chrom,
+                              pos=ref_pos,
+                              verbose=verbose)
+    sumstats = sumstats.rename(columns={"i":"i_y"})
+    add_y_unique = list(sumstats["_ADD"].unique())
+    if add_b == True:
+        sumstats["i"] = sumstats["i_x"]
+        fig,log = mqqplot(sumstats,
+                        chrom=chrom,
+                        pos=pos,
+                        p=p,
+                        mlog10p=mlog10p,
+                        snpid=snpid,
+                        scaled=scaled,
+                        log=log,
+                        mode="b",
+                        figax=(fig,ax2),
+                        _chrom_df_for_i = chrom_df_b,
+                        _invert=False,
+                        _if_quick_qc=False,
+                        **mqq_kwargs
+                        )
+    ##
+    #min_xy = min(min(sumstats["i_x"]),min(sumstats["i_y"]))
+    #max_xy = max(max(sumstats["i_x"]),max(sumstats["i_y"]))
+    ## determine color
+    ## determine dot size
+    ## plot
+    legend = True
+    style=None
+    linewidth=0
+    edgecolor="black"
+    palette = sns.color_palette(colors,n_colors=sumstats[group].nunique())
+    #for index,g in enumerate(sumstats[group].unique()):
+    #
+    #    palette = sns.color_palette("dark:{}".format(colors[index]), as_cmap=True)
+    #
+    #    plot = sns.scatterplot(data=sumstats.loc[sumstats[group]==g,:], x='i_x', y='i_y',
+    #            hue="scaled_P",
+    #            palette=palette,
+    #            size="scaled_P",
+    #            alpha=alpha,
+    #            sizes=sizes,
+    #            legend=legend,
+    #            style=style,
+    #            linewidth=linewidth,
+    #            edgecolor = edgecolor,
+    #            zorder=2,
+    #            ax=ax1)
+    plot = sns.scatterplot(data=sumstats, x='i_x', y='i_y',
+            hue=group,
+            palette=palette,
+            size="scaled_P",
+            alpha=alpha,
+            sizes=sizes,
+            legend=legend,
+            style=style,
+            linewidth=linewidth,
+            edgecolor = edgecolor,
+            zorder=2,
+            ax=ax1)
+    handles, labels = ax1.get_legend_handles_labels()
+    new_labels = []
+    ncol = len(labels)
+    for i in labels:
+        if i==group:
+            new_labels.append("Group")
+        elif i=="scaled_P":
+            new_labels.append("$-log_{10}(P)$")
+        else:
+            new_labels.append(i)
+    ax1.legend(labels = new_labels,  handles=handles, loc="lower center", bbox_to_anchor=(.45, -0.17),
+                    ncol=ncol, scatterpoints=2, title=None, frameon=False)
+    ## add vertical line
+    for i in add_x_unique:
+        ax1.axvline(x = i+0.5, linewidth = grid_linewidth,color=grid_linecolor,zorder=1000 )
+    for i in add_y_unique:
+        ax1.axhline(y = i+0.5,  linewidth = grid_linewidth,color=grid_linecolor,zorder=1000 )
+    ## add X tick label
+    ax1 = _process_xtick(ax1, chrom_df_x, xtick_chr_dict, fontsize, font_family, log=log,verbose=True)
+    ## add Y tick label
+    ax1 = _process_ytick(ax1, chrom_df_y, ytick_chr_dict, fontsize, font_family, log=log,verbose=True)
+    ## set x y lim
+    ax1.set_ylim([0.5,sumstats["i_y"].max()+1])
+    ax1.set_xlim([0.5,sumstats["i_x"].max()+1])
+    ## set x y label
+    xlabel = "pQTL position"
+    ax1.set_xlabel(xlabel,fontsize=fontsize,family=font_family)
+    ylabel = "location of the gene encoding the target protein"
+    ax1.set_ylabel(ylabel,fontsize=fontsize,family=font_family)
+    save_figure(fig = fig, save = save, keyword="gwheatmap",  save_args=save_kwargs, log = log, verbose=verbose)
+    return fig, log
+################################################################################################################
+def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log(),verbose=True):
+    log.write(" -Processing X ticks...",verbose=verbose)
+    ax1.set_xticks(chrom_df.astype("float64"))
+    ax1.set_xticklabels(chrom_df.index.astype("Int64").map(xtick_chr_dict),fontsize=fontsize,family=font_family)
+    return ax1
+def _process_ytick(ax1, chrom_df, ytick_chr_dict, fontsize, font_family, log=Log(),verbose=True):
+    log.write(" -Processing Y ticks...",verbose=verbose)
+    ax1.set_yticks(chrom_df.astype("float64"))
+    ax1.set_yticklabels(chrom_df.index.astype("Int64").map(ytick_chr_dict),fontsize=fontsize,family=font_family)
+    return ax1

gwaslab/viz_plot_stackedregional.py CHANGED Viewed

@@ -95,6 +95,10 @@ def plot_stacked_mqq(objects,
         if "family" not in title_args.keys():
             title_args["family"] = "Arial"
     # create figure and axes ##################################################################################################################
+    #
+    # subplot_height : subplot height
+    # figsize : Width, height in inches
     if mode=="r":
         if len(vcfs)==1:
             vcfs = vcfs *len(sumstats_list)
@@ -107,14 +111,17 @@ def plot_stacked_mqq(objects,
         else:
             height_ratios = [1 for i in range(n_plot_plus_gene_track-1)]+[gene_track_height]
-        fig_args["figsize"] = [16,subplot_height*n_plot_plus_gene_track]
+        if "figsize" not in fig_args.keys():
+            fig_args["figsize"] = [16,subplot_height*n_plot_plus_gene_track]
         fig, axes = plt.subplots(n_plot_plus_gene_track, 1, sharex=True,
                              gridspec_kw={'height_ratios': height_ratios},
                              **fig_args)
         plt.subplots_adjust(hspace=region_hspace)
     elif mode=="m":
         n_plot = len(sumstats_list)
-        fig_args["figsize"] = [10,subplot_height*n_plot]
+        if "figsize" not in fig_args.keys():
+            fig_args["figsize"] = [10,subplot_height*n_plot]
         fig, axes = plt.subplots(n_plot, 1, sharex=True,
                              gridspec_kw={'height_ratios': [1 for i in range(n_plot)]},
                              **fig_args)
@@ -122,8 +129,8 @@ def plot_stacked_mqq(objects,
         vcfs = [None for i in range(n_plot)]
     elif mode=="mqq":
         n_plot = len(objects)
-#
-        fig_args["figsize"] = [10,subplot_height*n_plot]
+        if "figsize" not in fig_args.keys():
+            fig_args["figsize"] = [10,subplot_height*n_plot]
         fig, axes = plt.subplots(n_plot, 2, sharex=True,
                              gridspec_kw={'height_ratios': [1 for i in range(n_plot-1)],
                                           'width_ratios':[mqqratio,1]},

gwaslab 3.4.48__py3-none-any.whl → 3.5.0__py3-none-any.whl

Potentially problematic release.

gwaslab 3.4.48py3-none-any.whl → 3.5.0py3-none-any.whl