PyPI - gwaslab - Versions diffs - 3.4.14__py3-none-any.whl → 3.4.16__py3-none-any.whl - Mend

gwaslab 3.4.14py3-none-any.whl → 3.4.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (20) hide show

gwaslab/Sumstats.py +6 -4
gwaslab/__init__.py +3 -1
gwaslab/annotateplot.py +2 -2
gwaslab/calculate_power.py +119 -42
gwaslab/compare_effect.py +83 -17
gwaslab/download.py +19 -4
gwaslab/fill.py +183 -57
gwaslab/miamiplot.py +25 -10
gwaslab/mqqplot.py +4 -3
gwaslab/plotrg.py +208 -75
gwaslab/regionalplot.py +21 -3
gwaslab/retrievedata.py +49 -18
gwaslab/to_pickle.py +12 -0
gwaslab/trumpetplot.py +0 -0
gwaslab/version.py +3 -3
{gwaslab-3.4.14.dist-info → gwaslab-3.4.16.dist-info}/METADATA +2 -2
{gwaslab-3.4.14.dist-info → gwaslab-3.4.16.dist-info}/RECORD +20 -19
{gwaslab-3.4.14.dist-info → gwaslab-3.4.16.dist-info}/LICENSE +0 -0
{gwaslab-3.4.14.dist-info → gwaslab-3.4.16.dist-info}/WHEEL +0 -0
{gwaslab-3.4.14.dist-info → gwaslab-3.4.16.dist-info}/top_level.txt +0 -0

gwaslab/fill.py CHANGED Viewed

@@ -13,6 +13,8 @@ def filldata(
     overwrite=False,
     verbose=True,
     only_sig=False,
+    sig_level=5e-8,
+    extreme=False,
     log = Log()
     ):
@@ -33,39 +35,42 @@ def filldata(
             to_fill.remove(i)
         if verbose: log.write("  -Skipping columns: ",skip_cols)
     if verbose: log.write(" -Filling columns: ",to_fill)
-# beta to or ####################################################################################################
-    if "OR" in to_fill:
-        fill_or(sumstats,log,verbose=verbose)
-# or to beta ####################################################################################################
-    if "BETA" in to_fill:
-        fill_beta(sumstats,log,verbose=verbose)
-    if "SE" in to_fill:
-        fill_se(sumstats,log,verbose=verbose)
-# z/chi2 to p ##################################################################################################
-    if "P" in to_fill:
-        fill_p(sumstats,log,only_sig=only_sig,df=df,verbose=verbose)
-# beta/se to z ##################################################################################################
-    if "Z" in to_fill:
-        fill_z(sumstats,log,verbose=verbose)
-# z/p to chisq ##################################################################################################
-    if "CHISQ" in to_fill:
-        fill_chisq(sumstats,log,verbose=verbose)
-# EAF to MAF ##################################################################################################
-    if "MAF" in to_fill:
-        fill_maf(sumstats,log,verbose=verbose)
-# p to -log10(P)  ###############################################################################################
-    if "MLOG10P" in to_fill:
-        if "P" not in sumstats.columns:
-            fill_p(sumstats,log,verbose=verbose)
-            fill_mlog10p(sumstats,log,verbose=verbose)
-            sumstats = sumstats.drop(labels=["P"],axis=1)
-        else:
-            fill_mlog10p(sumstats,log,verbose=verbose)
+    fill_iteratively(sumstats,to_fill,log,only_sig,df,extreme,verbose)
+## beta to or ####################################################################################################
+#    if "OR" in to_fill:
+#        fill_or(sumstats,log,verbose=verbose)
+#
+## or to beta ####################################################################################################
+#    if "BETA" in to_fill:
+#        fill_beta(sumstats,log,verbose=verbose)
+#
+#    if "SE" in to_fill:
+#        fill_se(sumstats,log,verbose=verbose)
+## z/chi2 to p ##################################################################################################
+#    if "P" in to_fill:
+#        fill_p(sumstats,log,only_sig=only_sig,df=df,verbose=verbose)
+#
+## beta/se to z ##################################################################################################
+#    if "Z" in to_fill:
+#        fill_z(sumstats,log,verbose=verbose)
+#
+## z/p to chisq ##################################################################################################
+#    if "CHISQ" in to_fill:
+#        fill_chisq(sumstats,log,verbose=verbose)
+## EAF to MAF ##################################################################################################
+#    if "MAF" in to_fill:
+#        fill_maf(sumstats,log,verbose=verbose)
+## p to -log10(P)  ###############################################################################################
+#    if "MLOG10P" in to_fill:
+#        if extreme==True:
+#            fill_extreme_mlog10p(sumstats,log,verbose=verbose)
+#        elif "P" not in sumstats.columns:
+#            fill_p(sumstats,log,verbose=verbose)
+#            fill_mlog10p(sumstats,log,verbose=verbose)
+#            sumstats = sumstats.drop(labels=["P"],axis=1)
+#        else:
+#            fill_mlog10p(sumstats,log,verbose=verbose)
 # ###################################################################################
     sumstats = sortcolumn(sumstats, verbose=verbose, log=log)
@@ -75,16 +80,19 @@ def filldata(
 ##########################################################################################################################
-def fill_p(sumstats,log,df=None,only_sig=False,overwrite=False,verbose=True):
+def fill_p(sumstats,log,df=None,only_sig=False,sig_level=5e-8,overwrite=False,verbose=True,filled_count=0):
         # MLOG10P -> P
     if "MLOG10P" in sumstats.columns:
         if verbose: log.write("  - Filling P value using MLOG10P column...")
         sumstats["P"] = np.power(10,-sumstats["MLOG10P"])
+        filled_count +=1
     # Z -> P
     elif "Z" in sumstats.columns:
         if verbose: log.write("  - Filling P value using Z column...")
         stats.chisqprob = lambda chisq, degree_of_freedom: stats.chi2.sf(chisq, degree_of_freedom)
         sumstats["P"] = ss.chisqprob(sumstats["Z"]**2,1)
+        filled_count +=1
     elif "CHISQ" in sumstats.columns:
     #CHISQ -> P
@@ -92,83 +100,201 @@ def fill_p(sumstats,log,df=None,only_sig=False,overwrite=False,verbose=True):
         stats.chisqprob = lambda chisq, degree_of_freedom: stats.chi2.sf(chisq, degree_of_freedom)
         if df is None:
             if only_sig is True and overwrite is True:
-                sumstats.loc[sumstats["P"]<5e-8,"P"] = stats.chisqprob(sumstats.loc[sumstats["P"]<5e-8,"CHISQ"],1)
+                sumstats.loc[sumstats["P"]<sig_level,"P"] = stats.chisqprob(sumstats.loc[sumstats["P"]<sig_level,"CHISQ"],1)
+                filled_count +=1
             else:
                 sumstats["P"] = stats.chisqprob(sumstats["CHISQ"],1)
+                filled_count +=1
         else:
             if only_sig is True and overwrite is True:
-                if verbose: log.write("  - Filling P value using CHISQ column for variants:" , sum(sumstats["P"]<5e-8))
-                sumstats.loc[sumstats["P"]<5e-8,"P"] = stats.chisqprob(sumstats.loc[sumstats["P"]<5e-8,"CHISQ"],sumstats.loc[sumstats["P"]<5e-8,df].astype("int"))
+                if verbose: log.write("  - Filling P value using CHISQ column for variants:" , sum(sumstats["P"]<sig_level))
+                sumstats.loc[sumstats["P"]<sig_level,"P"] = stats.chisqprob(sumstats.loc[sumstats["P"]<sig_level,"CHISQ"],sumstats.loc[sumstats["P"]<sig_level,df].astype("int"))
+                filled_count +=1
             else:
                 if verbose: log.write("  - Filling P value using CHISQ column for all valid variants:")
                 sumstats["P"] = stats.chisqprob(sumstats["CHISQ"],sumstats[df].astype("int"))
-def fill_z(sumstats,log,verbose=True):
+                filled_count +=1
+    else:
+        return 0
+    return 1
+def fill_z(sumstats,log,verbose=True,filled_count=0):
     # BETA/SE -> Z
     if ("BETA" in sumstats.columns) and ("SE" in sumstats.columns):
         if verbose: log.write("  - Filling Z using BETA/SE column...")
         sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
-def fill_chisq(sumstats,log,verbose=True):
+        filled_count +=1
+    else:
+        return 0
+    return 1
+def fill_chisq(sumstats,log,verbose=True,filled_count=0):
     # Z -> CHISQ
     if "Z" in sumstats.columns:
         if verbose: log.write("  - Filling CHISQ using Z column...")
         sumstats["CHISQ"] = (sumstats["Z"])**2
+        filled_count +=1
     elif "P" in sumstats.columns:
     # P -> CHISQ
         if verbose: log.write("  - Filling CHISQ using P column...")
         sumstats["CHISQ"] = ss.chi2.isf(sumstats["P"], 1)
-def fill_or(sumstats,log,verbose=True):
+        filled_count +=1
+    else:
+        return 0
+    return 1
+def fill_or(sumstats,log,verbose=True,filled_count=0):
     # BETA -> OR
     if "BETA" in sumstats.columns:
         if verbose: log.write("  - Filling OR using BETA column...")
         sumstats["OR"]   = np.exp(sumstats["BETA"])
-    # BETA/SE -> OR_95L / OR_95U
-    # get confidence interval 95
-    if ("BETA" in sumstats.columns) and ("SE" in sumstats.columns):
-        if verbose: log.write("  - Filling OR_95L/OR_95U using BETA/SE columns...")
-        # beta - 1.96 x se , beta + 1.96 x se
-        sumstats["OR_95L"] = np.exp(sumstats["BETA"]-ss.norm.ppf(0.975)*sumstats["SE"])
-        sumstats["OR_95U"] = np.exp(sumstats["BETA"]+ss.norm.ppf(0.975)*sumstats["SE"])
-def fill_or95(sumstats,log,verbose=True):
+        filled_count +=1
+        # BETA/SE -> OR_95L / OR_95U
+        # get confidence interval 95
+        if ("BETA" in sumstats.columns) and ("SE" in sumstats.columns):
+            if verbose: log.write("  - Filling OR_95L/OR_95U using BETA/SE columns...")
+            # beta - 1.96 x se , beta + 1.96 x se
+            sumstats["OR_95L"] = np.exp(sumstats["BETA"]-ss.norm.ppf(0.975)*sumstats["SE"])
+            sumstats["OR_95U"] = np.exp(sumstats["BETA"]+ss.norm.ppf(0.975)*sumstats["SE"])
+            filled_count +=1
+    else:
+        return 0
+    return 1
+def fill_or95(sumstats,log,verbose=True,filled_count=0):
     # get confidence interval 95
     if ("BETA" in sumstats.columns) and ("SE" in sumstats.columns):
         if verbose: log.write("  - Filling OR_95L/OR_95U using BETA/SE columns...")
         # beta - 1.96 x se , beta + 1.96 x se
         sumstats["OR_95L"] = np.exp(sumstats["BETA"]-ss.norm.ppf(0.975)*sumstats["SE"])
         sumstats["OR_95U"] = np.exp(sumstats["BETA"]+ss.norm.ppf(0.975)*sumstats["SE"])
-def fill_beta(sumstats,log,verbose=True):
+        filled_count +=1
+    else:
+        return 0
+def fill_beta(sumstats,log,verbose=True,filled_count=0):
     # OR -> beta
     if "OR" in sumstats.columns:
         if verbose: log.write("  - Filling BETA value using OR column...")
         sumstats["BETA"]  = np.log(sumstats["OR"])
+        filled_count +=1
+    else:
+        return 0
+    return 1
-def fill_se(sumstats,log,verbose=True):
+def fill_se(sumstats,log,verbose=True,filled_count=0):
     # OR / OR_95L /OR_95U -> SE
     if ("P" in sumstats.columns) and ("BETA" in sumstats.columns):
         if verbose: log.write("  - Filling SE value using BETA and P column...")
         sumstats["SE"]= np.abs(sumstats["BETA"]/ ss.norm.ppf(1-sumstats["P"]/2))
+        filled_count +=1
     elif ("OR" in sumstats.columns) and ("OR_95U" in sumstats.columns):
         if verbose: log.write("  - Filling SE value using OR/OR_95U column...")
         #
         sumstats["SE"]=(np.log(sumstats["OR_95U"]) - np.log(sumstats["OR"]))/ss.norm.ppf(0.975)
+        filled_count +=1
     elif ("OR" in sumstats.columns) and ("OR_95L" in sumstats.columns):
         if verbose: log.write("  - Filling SE value using OR/OR_95L column...")
         sumstats["SE"]=(np.log(sumstats["OR"]) - np.log(sumstats["OR_95L"]))/ss.norm.ppf(0.975)
+        filled_count +=1
     else:
         if verbose: log.write("  - Not enough information to fill SE...")
-def fill_mlog10p(sumstats,log,verbose=True):
+        return 0
+    return 1
+def fill_mlog10p(sumstats,log,verbose=True,filled_count=0):
     if "P" in sumstats.columns:
         # P -> MLOG10P
         if verbose: log.write("  - Filling MLOG10P using P column...")
         sumstats["MLOG10P"] = -np.log10(sumstats["P"])
+        filled_count +=1
+    else:
+        return 0
+    return 1
+def fill_extreme_mlog10p(sumstats,log,verbose=True,filled_count=0):
+    # ref: https://stackoverflow.com/questions/46416027/how-to-compute-p-values-from-z-scores-in-r-when-the-z-score-is-large-pvalue-muc/46416222#46416222
+    if "Z" in sumstats.columns:
+        # P -> MLOG10P
+        if verbose: log.write("  - Filling MLOG10P using Z column...")
+        sumstats = fill_extreme_mlog10(sumstats, "Z")
+        filled_count +=1
+    elif "BETA" in sumstats.columns and "SE" in sumstats.columns:
+        if verbose: log.write("  - Z column not available...")
+        if verbose: log.write("  - Filling Z using BETA/SE column...")
+        sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
+        if verbose: log.write("  - Filling MLOG10P using Z column...")
+        sumstats = fill_extreme_mlog10(sumstats, "Z")
+        filled_count +=1
+    else:
+        return 0
+    return 1
-def fill_maf(sumstats,log,verbose=True):
+def fill_maf(sumstats,log,verbose=True,filled_count=0):
     if "EAF" in sumstats.columns:
         # EAF -> MAF
         if verbose: log.write("  - Filling MAF using EAF column...")
         sumstats["MAF"] =  sumstats["EAF"].apply(lambda x: min(x,1-x) if pd.notnull(x) else np.nan)
+        filled_count +=1
+    else:
+        return 0
+    return 1
+####################################################################################################################
+def fill_extreme_mlog10(sumstats, z):
+    log_pvalue = np.log(2) + ss.norm.logsf(np.abs(sumstats[z])) #two-sided
+    log10_pvalue = log_pvalue/np.log(10)
+    mantissa = 10**(log10_pvalue %1 )
+    exponent = log10_pvalue // 1
+    sumstats["MLOG10P"] = -log10_pvalue
+    sumstats["P_MANTISSA"]= mantissa
+    sumstats["P_EXPONENT"]= exponent
+    return sumstats
+####################################################################################################################
+def fill_iteratively(sumstats,to_fill,log,only_sig,df,extreme,verbose):
+    if verbose: log.write("  - Filling Columns iteratively...")
+    filled=[]
+    for i in range(len(to_fill)):
+        filled_count=0
+        previous_count=filled_count
+    # beta to or ####################################################################################################
+        if "OR" in to_fill:
+            status = fill_or(sumstats,log,verbose=verbose,filled_count=filled_count)
+            if status == 1 : to_fill.remove("OR")
+    # or to beta ####################################################################################################
+        if "BETA" in to_fill:
+            status = fill_beta(sumstats,log,verbose=verbose,filled_count=filled_count)
+            if status == 1 : to_fill.remove("BETA")
+        if "SE" in to_fill:
+            status = fill_se(sumstats,log,verbose=verbose,filled_count=filled_count)
+            if status == 1 : to_fill.remove("SE")
+    # z/chi2 to p ##################################################################################################
+        if "P" in to_fill:
+            status = fill_p(sumstats,log,only_sig=only_sig,df=df,sig_level=sig_level,verbose=verbose,filled_count=filled_count)
+            if status == 1 : to_fill.remove("P")
+    # beta/se to z ##################################################################################################
+        if "Z" in to_fill:
+            status = fill_z(sumstats,log,verbose=verbose,filled_count=filled_count)
+            if status == 1 : to_fill.remove("Z")
+    # z/p to chisq ##################################################################################################
+        if "CHISQ" in to_fill:
+            status = fill_chisq(sumstats,log,verbose=verbose,filled_count=filled_count)
+            if status == 1 : to_fill.remove("CHISQ")
+    # EAF to MAF ##################################################################################################
+        if "MAF" in to_fill:
+            status = fill_maf(sumstats,log,verbose=verbose,filled_count=filled_count)
+            if status == 1 : to_fill.remove("MAF")
+    # p to -log10(P)  ###############################################################################################
+        if "MLOG10P" in to_fill:
+            if extreme==True:
+                status = fill_extreme_mlog10p(sumstats,log,verbose=verbose,filled_count=filled_count)
+                filled_count +=1
+            elif "P" not in sumstats.columns:
+                fill_p(sumstats,log,verbose=verbose)
+                status = fill_mlog10p(sumstats,log,verbose=verbose,filled_count=filled_count)
+                sumstats = sumstats.drop(labels=["P"],axis=1)
+            else:
+                status = fill_mlog10p(sumstats,log,verbose=verbose)
+            if status == 1 : to_fill.remove("MLOG10P")
+        if previous_count == filled_count:
+            break

gwaslab/miamiplot.py CHANGED Viewed

@@ -36,13 +36,15 @@ from gwaslab.quickfix import _quick_extract_snp_in_region
 from gwaslab.quickfix import _quick_assign_highlight_hue_pair
 from gwaslab.quickfix import _quick_assign_marker_relative_size
 from gwaslab.annotateplot import annotate_pair
+from gwaslab.to_pickle import load_pickle
+from gwaslab.to_pickle import load_data_from_pickle
 def plot_miami(
           path1,
           path2,
           cols1=None,
           cols2=None,
           sep=None,
+          mode="txt",
           chr_dict  = None,
           chr_dict1 = False,
           chr_dict2 = False,
@@ -111,6 +113,10 @@ def plot_miami(
           log=Log()
           ):
     ## figuring arguments ###########################################################################################################
+    if cols1 is None:
+        cols1 = ["CHR","POS","P"]
+    if cols2 is None:
+        cols2 = ["CHR","POS","P"]
     if highlight is None:
         highlight  = list()
     if highlight1 is None:
@@ -163,9 +169,10 @@ def plot_miami(
         titles=["",""]
     if titles_pad is None:
         titles_pad=[0.2,0.2]
+    if type(mode) is str:
+        modes =[ mode, mode]
+    else:
+        modes = mode
     if verbose: log.write("Start to plot miami plot with the following basic settings:")
     if verbose: log.write(" -Genome-wide significance level is set to "+str(sig_level)+" ...")
@@ -192,16 +199,24 @@ def plot_miami(
     pos="POS"
     ## load sumstats1 ###########################################################################################################
-    if verbose: log.write(" -Loading sumstats1:" + path1)
+    if verbose: log.write(" -Loading sumstats1 ({} mode):".format(modes[0]) + path1)
     if verbose: log.write(" -Sumstats1 CHR,POS,P information will be obtained from:",cols1)
-    sumstats1 = pd.read_csv(path1,sep=sep[0],usecols=cols1,dtype={cols1[0]:"string",cols1[1]:"Int64",cols1[2]:"float64"},**readcsv_args)
-    sumstats1 = sumstats1.rename(columns={cols1[0]:"CHR",cols1[1]:"POS",cols1[2]:"P"})
-    sumstats1 = _quick_fix(sumstats1,chr_dict=chr_dict1, scaled=scaled1, verbose=verbose, log=log)
+    if modes[0]=="pickle":
+        sumstats1 = load_data_from_pickle(path1,usecols=cols1)
+    else:
+        sumstats1 = pd.read_csv(path1,sep=sep[0],usecols=cols1,dtype={cols1[0]:"string",cols1[1]:"Int64",cols1[2]:"float64"},**readcsv_args)
     ## load sumstats2 ###########################################################################################################
-    if verbose: log.write(" -Loading sumstats2:" + path2)
+    if verbose: log.write(" -Loading sumstats2 ({} mode):".format(modes[1]) + path2)
     if verbose: log.write(" -Sumstats2 CHR,POS,P information will be obtained from:",cols2)
-    sumstats2 = pd.read_csv(path2,sep=sep[1],usecols=cols2,dtype={cols1[0]:"string",cols1[1]:"Int64",cols1[2]:"float64"},**readcsv_args)
+    if modes[1]=="pickle":
+        sumstats2 = load_data_from_pickle(path2,usecols=cols2)
+    else:
+        sumstats2 = pd.read_csv(path2,sep=sep[1],usecols=cols2,dtype={cols1[0]:"string",cols1[1]:"Int64",cols1[2]:"float64"},**readcsv_args)
+    sumstats1 = sumstats1.rename(columns={cols1[0]:"CHR",cols1[1]:"POS",cols1[2]:"P"})
+    sumstats1 = _quick_fix(sumstats1,chr_dict=chr_dict1, scaled=scaled1, verbose=verbose, log=log)
     sumstats2 = sumstats2.rename(columns={cols2[0]:"CHR",cols2[1]:"POS",cols2[2]:"P"})
     sumstats2 = _quick_fix(sumstats2,chr_dict=chr_dict2, scaled=scaled2, verbose=verbose, log=log)

gwaslab/mqqplot.py CHANGED Viewed

@@ -29,6 +29,7 @@ from adjustText import adjust_text
 from gwaslab.textreposition import adjust_text_position
 from gwaslab.annotateplot import annotate_single
 from gwaslab.qqplot import _plot_qq
+from gwaslab.retrievedata import auto_check_vcf_chr_dict
 from gwaslab.regionalplot import _plot_regional
 from gwaslab.regionalplot import process_vcf
 from gwaslab.quickfix import _get_largenumber
@@ -180,8 +181,7 @@ def mqqplot(insumstats,
         chr_dict = get_chr_to_number()
     if xtick_chr_dict is None:
         xtick_chr_dict = get_number_to_chr()
-    if vcf_chr_dict is None:
-        vcf_chr_dict = get_number_to_chr()
     if gtf_chr_dict is None:
         gtf_chr_dict = get_number_to_chr()
     if rr_chr_dict is None:
@@ -267,6 +267,7 @@ def mqqplot(insumstats,
             additional_line_color = ["grey"]
     lines_to_plot = -np.log10(lines_to_plot)
+    vcf_chr_dict = auto_check_vcf_chr_dict(vcf_path, vcf_chr_dict, verbose, log)
 # Plotting mode selection : layout ####################################################################
     # ax1 : manhattanplot / brisbane plot
     # ax2 : qq plot
@@ -321,7 +322,7 @@ def mqqplot(insumstats,
     # CHR and POS ########################################################################
     # chrom and pos exists && (m || r mode)
-    if (chrom is not None) and (pos is not None) and (("m" in mode) or ("r" in mode)):
+    if (chrom is not None) and (pos is not None) and (("qq" in mode) or ("m" in mode) or ("r" in mode)):
         # when manhattan plot, chrom and pos is needed.
         if chrom in insumstats.columns:
             usecols.append(chrom)

gwaslab 3.4.14__py3-none-any.whl → 3.4.16__py3-none-any.whl

Potentially problematic release.

gwaslab 3.4.14py3-none-any.whl → 3.4.16py3-none-any.whl