PyPI - gwaslab - Versions diffs - 3.4.36__py3-none-any.whl → 3.4.38__py3-none-any.whl - Mend

gwaslab 3.4.36py3-none-any.whl → 3.4.38py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (42) hide show

gwaslab/__init__.py +1 -1
gwaslab/data/formatbook.json +722 -721
gwaslab/g_Log.py +8 -0
gwaslab/g_Sumstats.py +80 -178
gwaslab/g_SumstatsPair.py +6 -2
gwaslab/g_Sumstats_summary.py +3 -3
gwaslab/g_meta.py +13 -3
gwaslab/g_version.py +2 -2
gwaslab/hm_casting.py +29 -15
gwaslab/hm_harmonize_sumstats.py +312 -159
gwaslab/hm_rsid_to_chrpos.py +1 -1
gwaslab/io_preformat_input.py +46 -37
gwaslab/io_to_formats.py +428 -295
gwaslab/qc_check_datatype.py +15 -1
gwaslab/qc_fix_sumstats.py +956 -719
gwaslab/util_ex_calculate_ldmatrix.py +29 -11
gwaslab/util_ex_gwascatalog.py +1 -1
gwaslab/util_ex_ldproxyfinder.py +1 -1
gwaslab/util_ex_process_h5.py +26 -17
gwaslab/util_ex_process_ref.py +3 -3
gwaslab/util_ex_run_coloc.py +26 -4
gwaslab/util_in_convert_h2.py +1 -1
gwaslab/util_in_fill_data.py +44 -5
gwaslab/util_in_filter_value.py +122 -34
gwaslab/util_in_get_density.py +2 -2
gwaslab/util_in_get_sig.py +41 -9
gwaslab/viz_aux_quickfix.py +26 -21
gwaslab/viz_aux_reposition_text.py +7 -4
gwaslab/viz_aux_save_figure.py +6 -5
gwaslab/viz_plot_compare_af.py +5 -5
gwaslab/viz_plot_compare_effect.py +22 -5
gwaslab/viz_plot_miamiplot2.py +28 -20
gwaslab/viz_plot_mqqplot.py +214 -98
gwaslab/viz_plot_qqplot.py +11 -8
gwaslab/viz_plot_regionalplot.py +16 -9
gwaslab/viz_plot_trumpetplot.py +15 -6
{gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/METADATA +3 -3
gwaslab-3.4.38.dist-info/RECORD +72 -0
gwaslab-3.4.36.dist-info/RECORD +0 -72
{gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/LICENSE +0 -0
{gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/WHEEL +0 -0
{gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/top_level.txt +0 -0

gwaslab/util_in_get_sig.py CHANGED Viewed

@@ -13,8 +13,9 @@ from gwaslab.bd_common_data import get_chr_to_NC
 from gwaslab.bd_common_data import gtf_to_protein_coding
 from gwaslab.bd_download import check_and_download
 from gwaslab.util_ex_gwascatalog import gwascatalog_trait
+from gwaslab.qc_fix_sumstats import check_dataframe_shape
+from gwaslab.qc_fix_sumstats import start_to
+from gwaslab.qc_fix_sumstats import finished
 # getsig
 # closest_gene
 # annogene
@@ -39,8 +40,24 @@ def getsig(insumstats,
     """
     Extract the lead variants using a sliding window. P or MLOG10P will be used and converted to SCALEDP for sorting.
     """
+    ##start function with col checking##########################################################
+    _start_line = "extract lead variants"
+    _end_line = "extracting lead variants"
+    _start_cols = [chrom,pos]
+    _start_function = ".get_lead()"
+    _must_args ={}
+    is_enough_info = start_to(sumstats=insumstats,
+                            log=log,
+                            verbose=verbose,
+                            start_line=_start_line,
+                            end_line=_end_line,
+                            start_cols=_start_cols,
+                            start_function=_start_function,
+                            **_must_args)
+    if is_enough_info == False: return None
+    ############################################################################################
-    if verbose: log.write("Start to extract lead variants...")
     if verbose: log.write(" -Processing "+str(len(insumstats))+" variants...")
     if verbose: log.write(" -Significance threshold :", sig_level)
     if verbose: log.write(" -Sliding window size:", str(windowsizekb) ," kb")
@@ -155,11 +172,9 @@ def getsig(insumstats,
                source=source,
                verbose=verbose)
-    # Finishing
-    if verbose: log.write("Finished extracting lead variants successfully!")
     # drop internal id
     output = output.drop("__ID",axis=1)
-    gc.collect()
+    finished(log,verbose,_end_line)
     return output.copy()
@@ -329,7 +344,24 @@ def getnovel(insumstats,
            gwascatalog_source="NCBI",
            output_known=False,
            verbose=True):
-    if verbose: log.write("Start to check if lead variants are known...")
+    ##start function with col checking##########################################################
+    _start_line = "check if lead variants are known"
+    _end_line = "checking if lead variants are known"
+    _start_cols = [chrom,pos]
+    _start_function = ".get_novel()"
+    _must_args ={}
+    is_enough_info = start_to(sumstats=insumstats,
+                            log=log,
+                            verbose=verbose,
+                            start_line=_start_line,
+                            end_line=_end_line,
+                            start_cols=_start_cols,
+                            start_function=_start_function,
+                            **_must_args)
+    if is_enough_info == False: return None
+    ############################################################################################
     allsig = getsig(insumstats=insumstats,
            id=id,chrom=chrom,pos=pos,p=p,use_p=use_p,windowsizekb=windowsizekb,sig_level=sig_level,log=log,
            xymt=xymt,anno=anno,build=build, source=source,verbose=verbose)
@@ -438,8 +470,8 @@ def getnovel(insumstats,
     if verbose: log.write(" -Identified ",len(allsig)-sum(allsig["NOVEL"])," known vairants in current sumstats...")
     if verbose: log.write(" -Identified ",sum(allsig["NOVEL"])," novel vairants in current sumstats...")
-    if verbose: log.write("Finished checking known or novel successfully!")
-    gc.collect()
+    finished(log,verbose,_end_line)
     # how to return
     if only_novel is True:

gwaslab/viz_aux_quickfix.py CHANGED Viewed

@@ -5,7 +5,7 @@ from gwaslab.bd_common_data import get_chr_to_number
 from gwaslab.bd_common_data import get_number_to_chr
 from math import ceil
-def _quick_fix(sumstats, chr_dict=get_chr_to_number(), scaled=False, chrom="CHR", pos="POS", p="P", mlog10p="MLOG10P",verbose=True, log=Log()):
+def _quick_fix(sumstats, chr_dict=get_chr_to_number(), scaled=False, chrom="CHR", pos="POS", p="P", mlog10p="MLOG10P",log=Log(), verbose=True):
     '''
     quick sanity check for input sumstats
     '''
@@ -45,10 +45,11 @@ def _quick_fix_p_value(sumstats, p="P", mlog10p="MLOG10P", scaled=False,verbose=
     return sumstats
-def _quick_fix_mlog10p(sumstats,p="P", mlog10p="MLOG10P", scaled=False, verbose=True, log=Log()):
+def _quick_fix_mlog10p(insumstats,p="P", mlog10p="MLOG10P", scaled=False, log=Log(), verbose=True):
     '''
     drop variants with bad -log10(P) values
     '''
+    sumstats = insumstats.copy()
     if scaled != True:
         if verbose:log.write(" -Sumstats P values are being converted to -log10(P)...")
         sumstats["scaled_P"] = -np.log10(sumstats[p].astype("float64"))
@@ -63,7 +64,7 @@ def _quick_fix_mlog10p(sumstats,p="P", mlog10p="MLOG10P", scaled=False, verbose=
     return sumstats
-def _quick_fix_eaf(seires, verbose=True, log=Log()):
+def _quick_fix_eaf(seires,log=Log(), verbose=True):
     '''
     conversion of eaf to maf
     '''
@@ -73,7 +74,7 @@ def _quick_fix_eaf(seires, verbose=True, log=Log()):
     return seires.copy()
-def _quick_fix_chr(seires, chr_dict, verbose=True, log=Log()):
+def _quick_fix_chr(seires, chr_dict,log=Log(), verbose=True):
     '''
     conversion and check for chr
     '''
@@ -84,7 +85,7 @@ def _quick_fix_chr(seires, chr_dict, verbose=True, log=Log()):
     return seires
-def _quick_fix_pos(seires, verbose=True, log=Log()):
+def _quick_fix_pos(seires,log=Log(), verbose=True):
     '''
     force conversion for pos
     '''
@@ -92,7 +93,7 @@ def _quick_fix_pos(seires, verbose=True, log=Log()):
     return seires
-def _get_largenumber(*args, log=Log()):
+def _get_largenumber(*args,log=Log(), verbose=True):
     '''
     get a helper large number, >> max(pos)
     '''
@@ -108,7 +109,7 @@ def _get_largenumber(*args, log=Log()):
     return large_number
-def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000, dropchrpos=False, verbose=True, log=Log()):
+def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000, dropchrpos=False,log=Log(), verbose=True):
     sumstats["TCHR+POS"] = sumstats["CHR"]*large_number + sumstats["POS"]
     sumstats["TCHR+POS"] = sumstats["TCHR+POS"].astype('Int64')
     if dropchrpos == True:
@@ -117,7 +118,7 @@ def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000,
     return sumstats
-def _quick_merge_sumstats(sumstats1, sumstats2):
+def _quick_merge_sumstats(sumstats1, sumstats2, log=Log(), verbose=True):
     merged_sumstats = pd.merge(sumstats1, sumstats2, on="TCHR+POS", how="outer", suffixes=('_1', '_2'))
     merged_sumstats["CHR"] = merged_sumstats["CHR_1"]
     merged_sumstats["POS"] = merged_sumstats["POS_1"]
@@ -126,7 +127,7 @@ def _quick_merge_sumstats(sumstats1, sumstats2):
     merged_sumstats = merged_sumstats.drop(labels=["CHR_1", "CHR_2", "POS_1", "POS_2"],axis=1)
     return merged_sumstats
-def _quick_assign_i(sumstats, chrom="CHR",pos="POS"):
+def _quick_assign_i(sumstats, chrom="CHR",pos="POS",log=Log(), verbose=True):
     # sort by CHR an POS
     sumstats = sumstats.sort_values([chrom,pos])
     # set new id
@@ -158,7 +159,7 @@ def _quick_assign_i(sumstats, chrom="CHR",pos="POS"):
     sumstats["i"] = np.floor(pd.to_numeric(sumstats["i"], errors='coerce')).astype('Int64')
     return sumstats, chrom_df
-def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos="POS",drop_chr_start=False,_posdiccul=None):
+def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos="POS",drop_chr_start=False,_posdiccul=None,log=Log(), verbose=True):
     # align all variants on a single axis (i)
     sumstats = sumstats.sort_values([chrom,pos])
     if use_rank is True:
@@ -218,7 +219,7 @@ def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos=
     sumstats["i"] = np.floor(pd.to_numeric(sumstats["i"], errors='coerce')).astype('Int64')
     return sumstats, chrom_df
-def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_level=5e-6, lower_level=5e-4):
+def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_level=5e-6, lower_level=5e-4,log=Log(), verbose=True):
     size_series = series.copy()
     size_series[:] = 1
@@ -231,7 +232,7 @@ def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_
     size_series[is_sig_level] = 4
     return size_series
-def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SNPID",chrom="CHR",pos="POS",verbose=True, log=Log()):
+def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SNPID",chrom="CHR",pos="POS",log=Log(), verbose=True):
     to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
     #assign colors: 0 is hightlight color
     for i,row in to_highlight.iterrows():
@@ -243,7 +244,7 @@ def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SN
         sumstats.loc[right_chr&up_pos&low_pos,"HUE"]="0"
     return sumstats
-def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight_windowkb, chrom="CHR",pos="POS",verbose=True, log=Log()):
+def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight_windowkb, chrom="CHR",pos="POS",log=Log(), verbose=True):
     #assign colors: 0 is hightlight color
     to_highlight1 = pd.DataFrame()
     to_highlight2 = pd.DataFrame()
@@ -271,7 +272,7 @@ def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight
             sumstats.loc[right_chr&up_pos&low_pos,"HUE2"]="0"
     return sumstats, to_highlight1, to_highlight2
-def _quick_extract_snp_in_region(sumstats, region, chrom="CHR",pos="POS",verbose=True, log=Log()):
+def _quick_extract_snp_in_region(sumstats, region, chrom="CHR",pos="POS",log=Log(), verbose=True):
     region_chr = region[0]
     region_start = region[1]
     region_end = region[2]
@@ -281,15 +282,16 @@ def _quick_extract_snp_in_region(sumstats, region, chrom="CHR",pos="POS",verbose
     sumstats = sumstats.loc[is_in_region_snp,:]
     return sumstats
-def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose,lines_to_plot, log):
+def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_plot, log):
+    log.write(" -Converting data above cut line...",verbose=verbose)
     if ylabels is not None:
         ylabels = pd.Series(ylabels)
     maxy = series.max()
     series = series.copy()
     if "b" not in mode:
-        if verbose: log.write(" -Maximum -log10(P) values is "+str(maxy) +" .")
+        if verbose: log.write(" -Maximum -log10(P) value is "+str(maxy) +" .")
     elif "b" in mode:
-        if verbose: log.write(" -Maximum DENSITY values is "+str(maxy) +" .")
+        if verbose: log.write(" -Maximum DENSITY value is "+str(maxy) +" .")
     maxticker=int(np.round(series.max(skipna=True)))
@@ -340,7 +342,7 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose,lines_to_plo
                 #sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"] = (sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"]-cut)/cutfactor +  cut
                 maxy = (maxticker-cut)/cutfactor + cut
-    if verbose: log.write("Finished data conversion and sanity check.")
     return series, maxy, maxticker, cut, cutfactor,ylabels,lines_to_plot
 #def _cut_line(level, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, log):
@@ -362,9 +364,11 @@ def _set_yticklabels(cut,
                      font_family,
                      ytick3,
                      ylabels,
-                     ylabels_converted
+                     ylabels_converted,
+                     log=Log(),
+                     verbose=True
                      ):
+    log.write(" -Processing Y tick lables...",verbose=verbose)
     # if no cut
     if cut == 0:
             ax1.set_ylim(skip, ceil(maxy*1.2) )
@@ -430,7 +434,8 @@ def _set_yticklabels(cut,
     return ax1
-def _jagged_y(cut,skip,ax1,mode,mqqratio,jagged_len,jagged_wid):
+def _jagged_y(cut,skip,ax1,mode,mqqratio,jagged_len,jagged_wid, log=Log(), verbose=True):
+    log.write(" -Processing jagged Y axis...",verbose=verbose)
     tycut = cut +0.3 #(cut - skip)/ (ax1.get_ylim()[1] - skip) + 0.002
     dy= jagged_len * (cut - skip)
     x0 =  0

gwaslab/viz_aux_reposition_text.py CHANGED Viewed

@@ -2,7 +2,7 @@ import pandas as pd
 import numpy as np
 from gwaslab.g_Log import Log
-def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode="int",log=Log(),verbose=True):
+def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode="int",log=Log(),verbose=True, min_factor=None):
     # check the number of variants to annotate
     #if repel_force>0:
     #    if 1/(repel_force*2 +0.01) < len(positions):
@@ -15,10 +15,12 @@ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode=
     if amode=="int":
         step = int(yspan*repel_force)
     elif amode=="log":
-        min_factor = np.min(positions)
+        if min_factor is None:
+            min_factor = np.min(positions)
         #(1, max) -> (0, log(max)))
-        positions = np.log(positions/min_factor)
+        positions = np.log2(positions/min_factor)
         step = max(positions)*repel_force
     else:
         step = yspan*repel_force
@@ -33,7 +35,8 @@ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode=
             if amode=="int":
                 return  np.floor(pd.to_numeric(positions, errors='coerce')).astype('Int64').copy()
             elif amode=="log":
-                return  np.exp(pd.to_numeric(positions, errors='coerce')) * min_factor
+                return  np.power(2, pd.to_numeric(positions, errors='coerce'))* min_factor
             else:
                 return  pd.to_numeric(positions, errors='coerce')
         else:

gwaslab/viz_aux_save_figure.py CHANGED Viewed

@@ -4,23 +4,24 @@ import time
 import os.path
 def save_figure(fig, save, keyword, save_args=None, log = Log(), verbose=True):
+    log.write("Start to save figure..." ,verbose=verbose)
     if save_args is None:
         save_args = {}
     if save:
-        if verbose: log.write("Saving plot:")
         if save==True:
             default_path = get_default_path(keyword)
             fig.savefig(default_path, bbox_inches="tight",**save_args)
-            log.write(" -Saved to "+ default_path + " successfully!" )
+            log.write(" -Saved to "+ default_path + " successfully!" ,verbose=verbose)
         else:
             if os.path.exists(save):
                 fig.savefig(save,bbox_inches="tight",**save_args)
-                log.write(" -Saved to "+ save + " successfully! (overwrite)" )
+                log.write(" -Saved to "+ save + " successfully! (overwrite)" ,verbose=verbose)
             else:
                 fig.savefig(save,bbox_inches="tight",**save_args)
-                log.write(" -Saved to "+ save + " successfully!" )
+                log.write(" -Saved to "+ save + " successfully!" ,verbose=verbose)
     else:
-        log.write(" -Skip saving figures!" )
+        log.write(" -Skip saving figure!" ,verbose=verbose)
+    log.write("Finished saving figure..." ,verbose=verbose)
 def get_default_path(keyword,fmt="png"):
     path_dictionary = {

gwaslab/viz_plot_compare_af.py CHANGED Viewed

@@ -72,11 +72,11 @@ def plotdaf(sumstats,
     sumstats = sumstats.loc[(~sumstats[eaf].isna())&(~sumstats[daf].isna()),[snpid,eaf,daf]+alleles].copy()
-    sumstats.loc[:,daf] = sumstats.loc[:,daf].astype("float")
-    sumstats.loc[:,eaf] = sumstats.loc[:,eaf].astype("float")
+    sumstats[daf] = sumstats[daf].astype("float")
+    sumstats[eaf] = sumstats[eaf].astype("float")
     if verbose: log.write(" -Plotting valriants:" + str(len(sumstats)))
-    sumstats.loc[:,"RAF"]=sumstats[eaf] - sumstats[daf]
+    sumstats["RAF"]=sumstats[eaf] - sumstats[daf]
     sns.set_style("ticks")
     fig, (ax1, ax2) = plt.subplots(1, 2,**plt_args)
     ax1.scatter(sumstats["RAF"],sumstats[eaf],label="Non-outlier", **scatter_args)
@@ -119,9 +119,9 @@ def plotdaf(sumstats,
     ax1.set_ylim([0,1])
-    sumstats.loc[:,"ID"] = sumstats.index
+    sumstats["ID"] = sumstats.index
-    to_plot = pd.melt(sumstats,id_vars=['ID'], value_vars=['EAF',"RAF"], var_name='Types', value_name='Allele Frequency')
+    to_plot = pd.melt(sumstats,id_vars=['ID'], value_vars=['EAF',"RAF"], var_name='Types', value_name='Allele Frequency').dropna()
     sns.histplot(data=to_plot, x="Allele Frequency", hue="Types", fill=True, ax=ax2, legend = legend2 ,**histplot_args)
     ax2.set_xlabel("Allele Frequency",**font_args)

gwaslab/viz_plot_compare_effect.py CHANGED Viewed

@@ -36,6 +36,7 @@ def compare_effect(path1,
                    wc_correction=False,
                    null_beta=0,
                    is_q=False,
+                   is_q_mc = False,
                    include_all=True,
                    q_level=0.05,
                    sig_level=5e-8,
@@ -530,9 +531,10 @@ def compare_effect(path1,
     if (is_q is True):
         if verbose: log.write(" -Calculating Cochran's Q statistics and peform chisq test...")
         if mode=="beta" or mode=="BETA" or mode=="Beta":
-            sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level)
+            sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
         else:
-            sig_list_merged = test_q(sig_list_merged,"BETA_1","SE_1","BETA_2_aligned","SE_2",q_level=q_level)
+            sig_list_merged = test_q(sig_list_merged,"BETA_1","SE_1","BETA_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
     ######################### save ###############################################################
     ## save the merged data
     save_path = label[0]+"_"+label[1]+"_beta_sig_list_merged.tsv"
@@ -806,8 +808,15 @@ def compare_effect(path1,
     if legend_mode == "full" and is_q==True :
         title_proxy = Rectangle((0,0), 0, 0, color='w',label=legend_title)
         title_proxy2 = Rectangle((0,0), 0, 0, color='w',label=legend_title2)
-        het_label_sig = r"$P_{het} < $" + "${}$".format(q_level)
-        het_label_sig2 = r"$P_{het} > $" + "${}$".format(q_level)
+        if is_q_mc=="fdr":
+            het_label_sig = r"$FDR_{het} < $" + "${}$".format(q_level)
+            het_label_sig2 = r"$FDR_{het} > $" + "${}$".format(q_level)
+        elif is_q_mc=="bon":
+            het_label_sig = r"$P_{het,bon} < $" + "${}$".format(q_level)
+            het_label_sig2 = r"$P_{het,bon} > $" + "${}$".format(q_level)
+        else:
+            het_label_sig = r"$P_{het} < $" + "${}$".format(q_level)
+            het_label_sig2 = r"$P_{het} > $" + "${}$".format(q_level)
         het_sig = Rectangle((0,0), 0, 0, facecolor='#cccccc',edgecolor="black", linewidth=1, label=het_label_sig)
         het_nonsig = Rectangle((0,0), 0, 0, facecolor='#cccccc',edgecolor="white",linewidth=1, label=het_label_sig2)
@@ -876,7 +885,7 @@ def reorderLegend(ax=None, order=None, add=None):
     new_handles = [info[l] for l in order]
     return new_handles, order
-def test_q(df,beta1,se1,beta2,se2,q_level=0.05):
+def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose=False):
     w1="Weight_1"
     w2="Weight_2"
     beta="BETA_FE"
@@ -891,6 +900,14 @@ def test_q(df,beta1,se1,beta2,se2,q_level=0.05):
     df[q] = df[w1]*(df[beta1]-df[beta])**2 + df[w2]*(df[beta2]-df[beta])**2
     df[pq] = ss.chi2.sf(df[q], 1)
     df["Edge_color"]="white"
+    if is_q_mc=="fdr":
+        if verbose: log.write(" -FDR correction applied...")
+        df[pq] = ss.false_discovery_control(df[pq])
+    elif is_q_mc=="bon":
+        if verbose: log.write(" -Bonferroni correction applied...")
+        df[pq] = df[pq] * len(df[pq])
     df.loc[df[pq]<q_level,"Edge_color"]="black"
     df.drop(columns=["Weight_1","Weight_2","BETA_FE"],inplace=True)
     # Huedo-Medina, T. B., Sánchez-Meca, J., Marín-Martínez, F., & Botella, J. (2006). Assessing heterogeneity in meta-analysis: Q statistic or I² index?. Psychological methods, 11(2), 193.

gwaslab/viz_plot_miamiplot2.py CHANGED Viewed

@@ -38,6 +38,7 @@ from gwaslab.g_Sumstats import Sumstats
 from gwaslab.viz_aux_save_figure import save_figure
 from gwaslab.viz_plot_mqqplot import mqqplot
 from gwaslab.g_version import _get_version
 def plot_miami2(
           path1=None,
           path2=None,
@@ -86,7 +87,7 @@ def plot_miami2(
           log=Log(),
           **mqq_args
           ):
+    log.write("Start to create miami plot {}:".format(_get_version()), verbose=verbose)
     ## figuring arguments ###########################################################################################################
     # figure columns to use
     if scaled == True:
@@ -98,6 +99,7 @@ def plot_miami2(
             cols = ["CHR","POS","MLOG10P"]
         else:
             cols = ["CHR","POS","P"]
     if cols1 is None:
         cols1 = cols.copy()
     if cols2 is None:
@@ -151,9 +153,8 @@ def plot_miami2(
     fig_args, scatter_args = _figure_args_for_vector_plot(save, fig_args, scatter_args)
     # add suffix if ids are the same
-    id1, id2, mqq_args1, mqq_args2 = _solve_id_contradictory(id0, id1, id2, mqq_args1, mqq_args2)
-    if verbose: log.write("Start to plot miami plot {}:".format(_get_version()))
+    id1_1, id2_2, mqq_args1, mqq_args2 = _solve_id_contradictory(id0, id1, id2, mqq_args1, mqq_args2)
     if dpi!=100:
         fig_args["dpi"] = dpi
     if xtickpad is None:
@@ -176,6 +177,7 @@ def plot_miami2(
             titles_pad_adjusted[0]= 1 + titles_pad[0]
         if "anno2" in mqq_args.keys():
             titles_pad_adjusted[1]=  - titles_pad[1]
     if merged_sumstats is None:
     ## load sumstats1 ###########################################################################################################
         sumstats1 = _figure_type_load_sumstats(name="Sumstats1",
@@ -198,8 +200,8 @@ def plot_miami2(
     else:
         cols1[2] += suffixes[0]
         cols2[2] += suffixes[1]
-        sumstats1 = merged_sumstats.loc[:,cols1].copy()
-        sumstats2 = merged_sumstats.loc[:,cols2].copy()
+        sumstats1 = merged_sumstats[cols1].copy()
+        sumstats2 = merged_sumstats[cols2].copy()
     ## rename and quick fix ###########################################################################################################
     renaming_dict1 = {cols1[0]:"CHR",cols1[1]:"POS",cols1[2]:"P"}
@@ -217,7 +219,7 @@ def plot_miami2(
     ## create merge index ###########################################################################################################
     sumstats1 = _quick_add_tchrpos(sumstats1,large_number=large_number, dropchrpos=False, verbose=verbose, log=log)
     sumstats2 = _quick_add_tchrpos(sumstats2,large_number=large_number, dropchrpos=False, verbose=verbose, log=log)
-    if verbose: log.write(" -Merging sumstats using chr and pos...")
+    log.write(" -Merging sumstats using chr and pos...",verbose=verbose)
     ###### merge #####################################################################################################
     merged_sumstats = _quick_merge_sumstats(sumstats1=sumstats1,sumstats2=sumstats2)
@@ -231,7 +233,9 @@ def plot_miami2(
                                                           drop_chr_start=False)
     # P_1  scaled_P_1  P_2  scaled_P_2  TCHR+POS CHR POS
+    log.write(" -Columns in merged sumstats: {}".format(",".join(merged_sumstats.columns)), verbose=verbose)
     del(sumstats1)
     del(sumstats2)
     garbage_collect.collect()
@@ -243,13 +247,14 @@ def plot_miami2(
         plt.subplots_adjust(hspace=region_hspace)
     else:
         fig, ax1, ax5 = figax
+    log.write("Start to create Manhattan plot for sumstats1...", verbose=verbose)
     fig,log = mqqplot(merged_sumstats,
                       chrom="CHR",
                       pos="POS",
                       p="P_1",
                       mlog10p="scaled_P_1",
-                      snpid=id1,
+                      snpid=id1_1,
                       scaled=scaled1,
                       log=log,
                       mode=mode,
@@ -260,15 +265,16 @@ def plot_miami2(
                       _if_quick_qc=False,
                       **mqq_args1
                      )
+    log.write("Finished creating Manhattan plot for sumstats1".format(_get_version()), verbose=verbose)
+    log.write("Start to create Manhattan plot for sumstats2...", verbose=verbose)
     fig,log = mqqplot(merged_sumstats,
                       chrom="CHR",
                       pos="POS",
                       p="P_2",
                       mlog10p="scaled_P_2",
                       scaled=scaled2,
-                      snpid=id2,
+                      snpid=id2_2,
                       log=log,
                       mode=mode,
                       figax=(fig,ax5),
@@ -277,7 +283,8 @@ def plot_miami2(
                        _invert=True,
                       _if_quick_qc=False,
                      **mqq_args2)
+    log.write("Finished creating Manhattan plot for sumstats2".format(_get_version()), verbose=verbose)
     if same_ylim==True:
         ylim1_converted = ax1.get_ylim()
         ylim2_converted = ax5.get_ylim()
@@ -285,8 +292,6 @@ def plot_miami2(
             ax5.set_ylim(ylim1_converted)
         else:
             ax1.set_ylim(ylim2_converted)
     #####################################################################################################################
     ax5.set_xlabel("")
@@ -337,24 +342,27 @@ def _sort_args_to_12(mqq_args):
     return mqq_args1, mqq_args2
 def _solve_id_contradictory(id0, id1, id2, mqq_args1, mqq_args2):
-    if id1 is not None and id2 is not None:
+    if (id1 is not None) and (id2 is not None):
         if id1 == id2:
             id1_1 = id1 + "_1"
             id2_2 = id2 + "_2"
             if "anno" in mqq_args1.keys():
                 if mqq_args1["anno"] == id1:
                     mqq_args1["anno"] = id1_1
-            if "anno" in mqq_args1.keys():
-                if mqq_args1["anno"] == id2:
-                    mqq_args1["anno"] = id2_2
+            if "anno" in mqq_args2.keys():
+                if mqq_args2["anno"] == id2:
+                    mqq_args2["anno"] = id2_2
         else:
             id1_1 = id1
             id2_2 = id2
     if id1 is None:
         id1_1 = id0
     if id2 is None:
         id2_2 = id0
-    return id1_1, id2_2, mqq_args1, mqq_args2
+    return (id1_1, id2_2, mqq_args1, mqq_args2)
 def _figure_args_for_vector_plot(save, fig_args, scatter_kwargs ):
     if save is not None:

gwaslab 3.4.36__py3-none-any.whl → 3.4.38__py3-none-any.whl

Potentially problematic release.

gwaslab 3.4.36py3-none-any.whl → 3.4.38py3-none-any.whl