PyPI - gwaslab - Versions diffs - 3.4.47__tar.gz → 3.4.48__tar.gz - Mend

gwaslab 3.4.47tar.gz → 3.4.48tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (91) hide show

{gwaslab-3.4.47/src/gwaslab.egg-info → gwaslab-3.4.48}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gwaslab
-Version: 3.4.47
+Version: 3.4.48
 Summary: A collection of handy tools for GWAS SumStats
 Author-email: Yunye <yunye@gwaslab.com>
 Project-URL: Homepage, https://cloufield.github.io/gwaslab/
@@ -51,7 +51,7 @@ Warning: Known issues of GWASLab are summarized in [https://cloufield.github.io/
 ### install via pip
 ```
-pip install gwaslab==3.4.45
+pip install gwaslab==3.4.46
 ```
 ```python

{gwaslab-3.4.47 → gwaslab-3.4.48}/README.md RENAMED Viewed

@@ -23,7 +23,7 @@ Warning: Known issues of GWASLab are summarized in [https://cloufield.github.io/
 ### install via pip
 ```
-pip install gwaslab==3.4.45
+pip install gwaslab==3.4.46
 ```
 ```python

{gwaslab-3.4.47 → gwaslab-3.4.48}/pyproject.toml RENAMED Viewed

@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "gwaslab"
-version = "3.4.47"
+version = "3.4.48"
 authors = [
   { name="Yunye", email="yunye@gwaslab.com" },
 ]

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/g_Sumstats.py RENAMED Viewed

@@ -76,6 +76,8 @@ from gwaslab.util_ex_ldsc import _estimate_rg_by_ldsc
 from gwaslab.util_ex_ldsc import _estimate_h2_cts_by_ldsc
 from gwaslab.util_ex_ldsc import _estimate_partitioned_h2_by_ldsc
 from gwaslab.bd_get_hapmap3 import gethapmap3
+from gwaslab.util_abf_finemapping import abf_finemapping
+from gwaslab.util_abf_finemapping import make_cs
 import gc
 #20220309
@@ -135,6 +137,7 @@ class Sumstats():
         self.data = pd.DataFrame()
         self.log = Log()
         self.ldsc_h2 = None
+        self.ldsc_h2_results = None
         self.ldsc_rg = None
         self.ldsc_h2_cts = None
         self.ldsc_partitioned_h2_summary = None
@@ -756,13 +759,20 @@ class Sumstats():
         else:
             output = lambdaGC(self.data[["CHR",mode]],mode=mode,**kwargs)
             self.meta["Genomic inflation factor"] = output
-            return output
+            return output
+    def abf_finemapping(self, region=None, chrpos=None, snpid=None,**kwargs):
+        region_data = abf_finemapping(self.data.copy(),region=region,chrpos=chrpos,snpid=snpid,log=self.log, **kwargs)
+        credible_sets = make_cs(region_data,threshold=0.95,log=self.log)
+        return region_data, credible_sets
 ## LDSC ##############################################################################################
     def estimate_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
         if build is None:
             build = self.meta["gwaslab"]["genome_build"]
         insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True, how="right" )
-        self.ldsc_h2 = _estimate_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
+        self.ldsc_h2, self.ldsc_h2_results = _estimate_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
     def estimate_rg_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
         if build is None:

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/g_version.py RENAMED Viewed

@@ -15,8 +15,8 @@ def _get_version():
 def gwaslab_info():
     # version meta information
     dic={
-   "version":"3.4.47",
-   "release_date":"20240703"
+   "version":"3.4.48",
+   "release_date":"20240822"
     }
     return dic

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/io_read_ldsc.py RENAMED Viewed

@@ -198,16 +198,29 @@ def read_greml(filelist=[]):
     return summary
 def parse_ldsc_summary(ldsc_summary):
-    summary = pd.DataFrame(columns = ['h2_obs', 'h2_se','Lambda_gc','Mean_chi2','Intercept','Intercept_se',"Ratio","Ratio_se"])
     lines = ldsc_summary.split("\n")
+    columns = ['h2_obs', 'h2_se','Lambda_gc','Mean_chi2','Intercept','Intercept_se',"Ratio","Ratio_se","Catagories"]
+    summary = pd.DataFrame(columns = columns)
     row={}
     try:
         objects = re.compile('[a-zA-Z\s\d]+:|[-0-9.]+[e]?[-0-9.]+|NA').findall(lines[0])
         row["h2_obs"]=objects[1]
         row["h2_se"]=objects[2]
-        ##next line lambda gc
+        ##check categories
+        if len(lines) == 6:
+            objects = re.compile('  -Categories:(.+)').findall(lines[1])
+            row["Catagories"] = objects[0].strip()
+            lines.pop(1)
+        else:
+            row["Catagories"] = "NA"
+        ##next line lambda gc
         objects = re.compile('[a-zA-Z\s\d]+:|[-0-9.]+[e]?[-0-9.]+|NA').findall(lines[1])
         row["Lambda_gc"] = objects[1]
         ##next line Mean_chi2
@@ -240,6 +253,7 @@ def parse_ldsc_summary(ldsc_summary):
         row["Intercept_se"]="NA"
         row["Ratio"]="NA"
         row["Ratio_se"]="NA"
+        row["Catagories"] = "NA"
     #summary = summary.append(row,ignore_index=True)
     row = pd.DataFrame([row], columns = summary.columns)

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/qc_fix_sumstats.py RENAMED Viewed

@@ -1497,7 +1497,11 @@ def liftover_variant(sumstats,
              status="STATUS",
              from_build="19",
              to_build="38"):
-    converter = get_lifter("hg"+from_build,"hg"+to_build)
+    try:
+        converter = get_lifter("hg"+from_build,"hg"+to_build,one_based=True)
+    except:
+        converter = get_lifter("hg"+from_build,"hg"+to_build)
     dic= get_number_to_chr(in_chr=False,xymt=["X","Y","M"])
     dic2= get_chr_to_number(out_chr=False)
     for i in sumstats[chrom].unique():
@@ -1549,7 +1553,6 @@ def parallelizeliftovervariant(sumstats,n_cores=1,chrom="CHR", pos="POS", from_b
         pool.close()
         pool.join()
     ############################################################################
     unmap_num = len(sumstats.loc[sumstats[pos].isna(),:])
     if remove is True:

gwaslab-3.4.48/src/gwaslab/util_abf_finemapping.py ADDED Viewed

@@ -0,0 +1,67 @@
+import pandas as pd
+import numpy as np
+from gwaslab.g_Log import Log
+from gwaslab.util_in_filter_value import _get_flanking_by_chrpos
+from gwaslab.util_in_filter_value import _get_flanking_by_id
+# Calculate PIP based on approximate Bayesian factor (ABF)
+# Wakefield, J. A bayesian measure of the probability of false discovery in genetic epidemiology studies. Am J Hum Genet 81, 208–227 (2007).
+def calc_abf(insumstats,w=0.2,log=Log(),verbose=True,**kwargs):
+    log.write("Start to calculate approximate Bayesian factor for {} variants".format(len(insumstats)),verbose=verbose)
+    log.write(" - Reference: akefield, J. A bayesian measure of the probability of false discovery in genetic epidemiology studies. Am J Hum Genet 81, 208–227 (2007).",verbose=verbose)
+    log.write(" - Priors for the standard deviation W of the effect size parameter β : {} ".format(w),verbose=verbose)
+    # binary -> w=0.2
+    # quant  -> w=0.15
+    omega = w**2
+    se = insumstats["SE"]
+    v = se**2
+    r = omega / (omega+v)
+    beta = insumstats["BETA"]
+    z = beta/se
+    insumstats = insumstats.copy()
+    # (6) ABF -> reciprocal
+    insumstats.loc[:, "log_ABF"] = 1/2* (np.log(1-r) + (r * z**2))
+    return insumstats
+def calc_PIP(insumstats,log=Log(),verbose=True,**kwargs):
+    # Calculate the logarithmic sum of each ABF to find the logarithm of total_abf
+    log_total_abf = np.log(np.sum(np.exp(insumstats["log_ABF"] - np.max(insumstats["log_ABF"])))) + np.max(insumstats["log_ABF"])
+    insumstats = insumstats.copy()
+    log.write("Start to calculate PIP for {} variants".format(len(insumstats)),verbose=verbose)
+    # Calculate PIP on a logarithmic scale by subtracting log_total_abf from each log_abf
+    insumstats.loc[:, "log_PIP"] = insumstats['log_ABF'] - log_total_abf
+    # Convert PIP on logarithmic scale to exponential and back to normal scale
+    insumstats.loc[:, "PIP"] = np.exp(insumstats['log_PIP'])
+    return insumstats
+def abf_finemapping(insumstats,region=None,chrpos=None,snpid=None, log=Log(),**kwargs):
+    if region is not None:
+        region_data = insumstats[(insumstats["CHR"] == region[0]) & (insumstats["POS"] >= region[1]) & (insumstats["POS"] <= region[2])]
+    elif chrpos is not None:
+        region_data = _get_flanking_by_chrpos(insumstats, chrpos=chrpos,**kwargs)
+    elif snpid is not None:
+        region_data = _get_flanking_by_id(insumstats, snpid=snpid,**kwargs)
+    region_data = calc_abf(region_data,log=log,**kwargs)
+    region_data = calc_PIP(region_data,log=log,**kwargs)
+    return region_data
+def make_cs(insumstats,threshold=0.95,log=Log(),verbose=True):
+    insumstats = insumstats.sort_values(by="PIP",ascending=False)
+    pip_sum = 0
+    cs = pd.DataFrame()
+    for index, row in insumstats.iterrows():
+        cs = pd.concat([cs,pd.DataFrame(row).T])
+        pip_sum += row["PIP"]
+        if pip_sum > threshold:
+            break
+    log.write("Finished constructing a {}% credible set with {} variant(s)".format(str(threshold * 100),str(len(cs))),verbose=verbose)
+    return cs

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_ex_ldsc.py RENAMED Viewed

@@ -304,9 +304,16 @@ def _estimate_h2_by_ldsc(insumstats, log, verbose=True, munge=False, munge_args=
     log.write(" -LDSC log:", verbose=verbose)
     summary = estimate_h2(sumstats, default_args, log)
+    results_table = None
+    if type(summary) is tuple:
+        results_table = summary[1]
+        summary = summary[0]
+        log.write(" -Coefficient results have been stored in .ldsc_h2_results", verbose=verbose)
     log.write(" -Results have been stored in .ldsc_h2", verbose=verbose)
     finished(log=log,verbose=verbose,end_line=_end_line)
-    return parse_ldsc_summary(summary)
+    return parse_ldsc_summary(summary), results_table
 ####################################################################################################################

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_ex_run_clumping.py RENAMED Viewed

@@ -11,7 +11,7 @@ from gwaslab.g_version import _checking_plink_version
 def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
            p="P",mlog10p="MLOG10P", overwrite=False, study=None, bfile=None,
            n_cores=1, memory=None, chrom=None, clump_p1=5e-8, clump_p2=5e-8, clump_r2=0.01, clump_kb=250,
-           log=Log(),verbose=True):
+           log=Log(),verbose=True,plink="plink",plink2="plink2"):
     ##start function with col checking##########################################################
     _start_line = "perfrom clumping"
     _end_line = "clumping"
@@ -111,7 +111,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
             bfile_to_use = bfile
         log.write(" -Performing clumping for CHR {}...".format(i),verbose=verbose)
-        log = _checking_plink_version(v=2, log=log)
+        log = _checking_plink_version(plink2=plink2, log=log)
         if memory is not None:
             memory_flag = "--memory {}".format(memory)
@@ -123,7 +123,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
         if scaled == True:
             # clumping using LOG10P
             script = """
-            plink2 \
+            {} \
                 {}\
                 --chr {} \
                 --clump {} \
@@ -136,11 +136,11 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
                 --clump-kb {} \
                 --threads {} {}\
                 --out {}
-            """.format(file_flag, chrom, clump, mlog10p,clump_log10_p1, clump_log10_p2, clump_r2, clump_kb, n_cores, memory_flag if memory is not None else "", out_single_chr)
+            """.format(plink2, file_flag, chrom, clump, mlog10p,clump_log10_p1, clump_log10_p2, clump_r2, clump_kb, n_cores, memory_flag if memory is not None else "", out_single_chr)
         else:
             # clumping using P
             script = """
-            plink2 \
+            {} \
                 {}\
                 --chr {} \
                 --clump {} \
@@ -152,7 +152,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
                 --clump-kb {} \
                 --threads {} {}\
                 --out {}
-            """.format(file_flag, chrom, clump, p, clump_p1, clump_p2, clump_r2, clump_kb, n_cores,memory_flag if memory is not None else "", out_single_chr)
+            """.format(plink2,file_flag, chrom, clump, p, clump_p1, clump_p2, clump_r2, clump_kb, n_cores,memory_flag if memory is not None else "", out_single_chr)
         try:
             output = subprocess.check_output(script, stderr=subprocess.STDOUT, shell=True,text=True)

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_aux_annotate_plot.py RENAMED Viewed

@@ -38,6 +38,7 @@ def annotate_single(
     region,
     region_anno_bbox_args,
     skip,
+    anno_height=1,
     amode="int",
     snpid="SNPID",
     chrom="CHR",
@@ -131,7 +132,7 @@ def annotate_single(
             #xy=(row["i"],row["scaled_P"]+0.2)
             xy=(row["i"],row["scaled_P"]+0.01*maxy)
-            xytext=(last_pos,1.15*maxy*arm_scale)
+            xytext=(last_pos,1.15*maxy*arm_scale*anno_height)
             if anno_fixed_arm_length is not None:
                 armB_length_in_point = anno_fixed_arm_length

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_aux_quickfix.py RENAMED Viewed

@@ -286,8 +286,9 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
     log.write(" -Converting data above cut line...",verbose=verbose)
     if ylabels is not None:
         ylabels = pd.Series(ylabels)
-    maxy = series.max()
     series = series.copy()
+    maxy = series.max()
     if "b" not in mode:
         log.write(" -Maximum -log10(P) value is "+str(maxy) +" .", verbose=verbose)
     elif "b" in mode:

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_plot_compare_effect.py RENAMED Viewed

@@ -77,8 +77,10 @@ def compare_effect(path1,
         scaled2 = True
     if is_q_mc=="fdr" or is_q_mc=="bon":
         is_q = True
-    else:
-        raise ValueError("Please select either fdr or bon for is_q_mc.")
+    if is_q == True:
+        if is_q_mc not in [False,"fdr","bon","non"]:
+            raise ValueError("Please select either fdr or bon or non for is_q_mc.")
     if save_args is None:
         save_args = {"dpi":300,"facecolor":"white"}
     if reg_box is None:

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_plot_miamiplot2.py RENAMED Viewed

@@ -247,7 +247,10 @@ def plot_miami2(
         plt.subplots_adjust(hspace=region_hspace)
     else:
         fig, ax1, ax5 = figax
+    #if same_ylim==True:
+        #maxy = merged_sumstats[["scaled_P_1","scaled_P_2"]].max().max()
     log.write("Start to create Manhattan plot for sumstats1...", verbose=verbose)
     fig,log = mqqplot(merged_sumstats,
                       chrom="CHR",
@@ -284,14 +287,8 @@ def plot_miami2(
                       _if_quick_qc=False,
                      **mqq_args2)
     log.write("Finished creating Manhattan plot for sumstats2".format(_get_version()), verbose=verbose)
-    if same_ylim==True:
-        ylim1_converted = ax1.get_ylim()
-        ylim2_converted = ax5.get_ylim()
-        if ylim1_converted > ylim2_converted:
-            ax5.set_ylim(ylim1_converted)
-        else:
-            ax1.set_ylim(ylim2_converted)
     #####################################################################################################################
     ax5.set_xlabel("")

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_plot_mqqplot.py RENAMED Viewed

@@ -141,6 +141,7 @@ def mqqplot(insumstats,
           anno_max_iter=100,
           arm_offset=50,
           arm_scale=1,
+          anno_height=1,
           arm_scale_d=None,
           cut=0,
           skip=0,
@@ -180,6 +181,7 @@ def mqqplot(insumstats,
           xpad=None,
           xpadl=None,
           xpadr=None,
+          xtight=False,
           chrpad=0.03,
           drop_chr_start=False,
           title =None,
@@ -552,7 +554,8 @@ def mqqplot(insumstats,
                                                                         cut_log = cut_log,
                                                                         verbose =verbose,
                                                                         lines_to_plot=lines_to_plot,
-                                                                        log = log)
+                                                                        log = log
+                                                                        )
     except:
         log.warning("No valid data! Please check the input.")
         return None
@@ -596,19 +599,23 @@ def mqqplot(insumstats,
             sumstats.loc[sumstats["scaled_P"]>-np.log10(sig_level_plot),"s"]=4
         sumstats["chr_hue"]=sumstats[chrom].astype("string")
-        if vcf_path is not None:
+        if "r" in mode:
+            if vcf_path is None:
+                sumstats["LD"]=100
+                sumstats["SHAPE"]=1
             sumstats["chr_hue"]=sumstats["LD"]
         ## default seetings
         palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
         legend = None
         style=None
         linewidth=0
         edgecolor="black"
         # if regional plot assign colors
-        if vcf_path is not None:
+        if "r" in mode:
+            #if vcf_path is not None:
             legend=None
             linewidth=1
             if len(region_ref) == 1:
@@ -631,10 +638,9 @@ def mqqplot(insumstats,
                         palette[(i+1)*100 + j ] = hex_color
                 edgecolor="none"
-                scatter_args["markers"]= region_marker_shapes[:len(region_ref)]
+                scatter_args["markers"]= {(i+1):m for i,m in enumerate(region_marker_shapes[:len(region_ref)])}
                 style="SHAPE"
         ## if highlight
         highlight_i = pd.DataFrame()
@@ -977,6 +983,7 @@ def mqqplot(insumstats,
                                 region=region,
                                 region_anno_bbox_args=region_anno_bbox_args,
                                 skip=skip,
+                                anno_height=anno_height,
                                 snpid=snpid,
                                 chrom=chrom,
                                 pos=pos,
@@ -1040,7 +1047,7 @@ def mqqplot(insumstats,
         if "qq" in mode:
             ax2.set_ylim(ylim)
-    ax1 = _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats)
+    ax1 = _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats, pos, chrpad, xtight, log = log, verbose=verbose)
     # Titles
     if title and anno and len(to_annotate)>0:
@@ -1065,20 +1072,34 @@ def mqqplot(insumstats,
-def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats):
+def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats, pos, chrpad, xtight, log, verbose):
-    if ax1 is not None:
-        xmin, xmax = ax1.get_xlim()
-        if xpad is not None:
-            pad = xpad* sumstats["i"].max()
-            ax1.set_xlim([xmin - pad, xmin + pad])
-        if xpadl is not None:
-            pad = xpadl* sumstats["i"].max()
-            ax1.set_xlim([xmin - pad,xmax])
-        if xpadr is not None:
-            pad = xpadr* sumstats["i"].max()
-            ax1.set_xlim([xmin, xmax + pad])
+    if xtight==True:
+        log.write(" -Adjusting X padding on both side : tight mode", verbose=verbose)
+        xmax = sumstats["i"].max()
+        xmin=  sumstats["i"].min()
+        ax1.set_xlim([xmin, xmax])
+    else:
+        chrpad_to_remove = sumstats[pos].max()*chrpad
+        if ax1 is not None:
+            xmin, xmax = ax1.get_xlim()
+            length = xmax - xmin
+            if xpad is not None:
+                log.write(" -Adjusting X padding on both side: {}".format(xpad), verbose=verbose)
+                pad = xpad* length #sumstats["i"].max()
+                ax1.set_xlim([xmin - pad + chrpad_to_remove, xmax + pad - chrpad_to_remove])
+            if xpad is None and xpadl is not None:
+                log.write(" -Adjusting X padding on left side: {}".format(xpadl), verbose=verbose)
+                xmin, xmax = ax1.get_xlim()
+                pad = xpadl*length # sumstats["i"].max()
+                ax1.set_xlim([xmin - pad + chrpad_to_remove ,xmax])
+            if xpad is None and xpadr is not None:
+                log.write(" -Adjusting X padding on right side: {}".format(xpadr), verbose=verbose)
+                xmin, xmax = ax1.get_xlim()
+                pad = xpadr*length # sumstats["i"].max()
+                ax1.set_xlim([xmin, xmax + pad - chrpad_to_remove])
     return ax1

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_plot_regional2.py RENAMED Viewed

@@ -5,6 +5,7 @@ import matplotlib.patches as patches
 import seaborn as sns
 import numpy as np
 import copy
+import re
 import scipy as sp
 from pyensembl import EnsemblRelease
 from allel import GenotypeArray
@@ -96,19 +97,20 @@ def _plot_regional(
                                         marker_size= marker_size,
                                         region_marker_shapes=region_marker_shapes,
                                         log=log,verbose=verbose)
-            if lead_id_single is not None:
-                lead_ids.append(lead_id_single)
+            #if lead_id_single is not None:
+            lead_ids.append(lead_id_single)
         # update region_ref to variant rsID or variantID / skip NAs
         new_region_ref = []
         for i in range(len(lead_ids)):
             if lead_ids[i] is None:
+                new_region_ref.append(region_ref[i])
                 continue
             if region_ref[i] is None:
-                if "rsID" in sumstats.columns:
-                    new_name = sumstats.loc[lead_ids[i],"rsID"]
-                elif "SNPID" in sumstats.columns:
+                if "SNPID" in sumstats.columns:
                     new_name = sumstats.loc[lead_ids[i],"SNPID"]
+                elif "rsID" in sumstats.columns:
+                    new_name = sumstats.loc[lead_ids[i],"rsID"]
                 else:
                     new_name = "chr{}:{}".format(sumstats.loc[lead_ids[i],"CHR"] , sumstats.loc[lead_ids[i],"POS"])
                 new_region_ref.append(new_name)
@@ -162,7 +164,6 @@ def _plot_regional(
         lead_snp_ys = []
         lead_snp_is = []
         lead_snp_is_colors = []
         for i,lead_id_single in enumerate(lead_ids):
             if lead_id_single is not None:
                 lead_snp_ys.append(sumstats.loc[lead_id_single,"scaled_P"] )
@@ -258,11 +259,35 @@ def _get_lead_id(sumstats=None, region_ref=None, log=None, verbose=True):
     if type(lead_id) is list:
         if len(lead_id)>0:
             lead_id = int(lead_id[0])
     if region_ref_to_check is not None:
         if type(lead_id) is list:
             if len(lead_id)==0 :
-                log.warning("{} not found.. Skipping..".format(region_ref_to_check))
+                #try:
+                matched_snpid = re.match("(chr)?[0-9]+:[0-9]+:[ATCG]+:[ATCG]+", region_ref_to_check,  re.IGNORECASE)
+                if matched_snpid is None:
+                    pass
+                else:
+                    lead_snpid = matched_snpid.group(0).split(":")
+                    if len(lead_snpid)==4:
+                        lead_chr= int(lead_snpid[0])
+                        lead_pos= int(lead_snpid[1])
+                        lead_ea= lead_snpid[2]
+                        lead_nea= lead_snpid[3]
+                        chrpos_match = (sumstats["CHR"] == lead_chr) & (sumstats["POS"] == lead_pos)
+                        eanea_match = ((sumstats["EA"] == lead_ea) & (sumstats["NEA"] == lead_nea)) | ((sumstats["EA"] == lead_nea) & (sumstats["NEA"] == lead_ea))
+                        if "rsID" in sumstats.columns:
+                            lead_id = sumstats.index[chrpos_match&eanea_match].to_list()
+                        if "SNPID" in sumstats.columns:
+                            lead_id = sumstats.index[chrpos_match&eanea_match].to_list()
+                if type(lead_id) is list:
+                    if len(lead_id)>0:
+                        lead_id = int(lead_id[0])
+                        log.warning("Trying matching variant {} using CHR:POS:EA:NEA to {}... ".format(region_ref_to_check,lead_id))
+        if type(lead_id) is list:
+            if len(lead_id)==0 :
+                log.warning("Extracting variant: {} not found in sumstats.. Skipping..".format(region_ref_to_check))
                 #lead_id = sumstats["scaled_P"].idxmax()
                 lead_id = None
                 return lead_id
@@ -275,7 +300,7 @@ def _get_lead_id(sumstats=None, region_ref=None, log=None, verbose=True):
     return lead_id
-def _pinpoint_lead(sumstats,ax1,region_ref, lead_color, marker_size, log, verbose,region_marker_shapes):
+def _pinpoint_lead(sumstats,ax1,region_ref, lead_color, marker_size, log, verbose, region_marker_shapes):
     if region_ref is None:
         log.write(" -Extracting lead variant..." , verbose=verbose)
@@ -416,6 +441,11 @@ def _plot_gene_track(
     texts_to_adjust_left = []
     texts_to_adjust_middle = []
     texts_to_adjust_right = []
+    sig_gene_names=[]
+    sig_gene_lefts=[]
+    sig_gene_rights=[]
     for index,row in uniq_gene_region.iterrows():
         gene_color="#020080"
@@ -426,21 +456,18 @@ def _plot_gene_track(
             gene_anno = "<-" + row["name"]
-        sig_gene_names=[]
-        sig_gene_lefts=[]
-        sig_gene_rights=[]
         for lead_snp_i in lead_snp_is:
             if region_lead_grid is True and lead_snp_i > gene_track_start_i+row["start"] and lead_snp_i < gene_track_start_i+row["end"] :
-                    gene_color=region_lead_grid_line["color"]
-                    sig_gene_names.append(row["name"])
-                    sig_gene_lefts.append(gene_track_start_i+row["start"])
-                    sig_gene_rights.append(gene_track_start_i+row["end"])
+                gene_color=region_lead_grid_line["color"]
+                sig_gene_names.append(row["name"])
+                sig_gene_lefts.append(gene_track_start_i+row["start"])
+                sig_gene_rights.append(gene_track_start_i+row["end"])
             # plot gene line
             ax3.plot((gene_track_start_i+row["start"],gene_track_start_i+row["end"]),
                         (row["stack"]*2,row["stack"]*2),color=gene_color,linewidth=linewidth_in_points/10)
         # plot gene name
         if row["end"] >= region[2]:
             #right side
@@ -459,6 +486,7 @@ def _plot_gene_track(
     for index,row in exons.iterrows():
         exon_color="#020080"
         for sig_gene_name, sig_gene_left, sig_gene_right in zip(sig_gene_names,sig_gene_lefts,sig_gene_rights):
             if not pd.isnull(row["name"]):
                 if (region_lead_grid is True) and row["name"]==sig_gene_name:
                     exon_color = region_lead_grid_line["color"]
@@ -468,7 +496,7 @@ def _plot_gene_track(
                 exon_color = region_lead_grid_line["color"]
             else:
                 exon_color="#020080"
         ax3.plot((gene_track_start_i+row["start"],gene_track_start_i+row["end"]),
                     (row["stack"]*2,row["stack"]*2),linewidth=linewidth_in_points*taf[3],color=exon_color,solid_capstyle="butt")
@@ -550,24 +578,42 @@ def process_vcf(sumstats,
             # figure out lead variant
             lead_id = _get_lead_id(sumstats, region_ref_single, log, verbose)
-        if lead_id is None:
-            sumstats[rsq] = None
-            sumstats[rsq] = sumstats[rsq].astype("float")
-            sumstats[ld_single] = 0
-            continue
-        lead_pos = sumstats.loc[lead_id,pos]
+        lead_series = None
+        if lead_id is None:
+            matched_snpid = re.match("(chr)?[0-9]+:[0-9]+:[ATCG]+:[ATCG]+",region_ref_single,  re.IGNORECASE)
+            if matched_snpid is None:
+                sumstats[rsq] = None
+                sumstats[rsq] = sumstats[rsq].astype("float")
+                sumstats[ld_single] = 0
+                continue
+            else:
+                lead_snpid = matched_snpid.group(0).split(":")[1:]
+                lead_pos = int(lead_snpid[0])
+                lead_snpid[0]= int(lead_snpid[0])
+                lead_series = pd.Series(lead_snpid)
+        else:
+            lead_pos = sumstats.loc[lead_id,pos]
         # if lead pos is available:
         if lead_pos in ref_genotype["variants/POS"]:
             # get ref index for lead snp
-            lead_snp_ref_index = match_varaint(sumstats.loc[lead_id,[pos,nea,ea]])
-            #lead_snp_ref_index = np.where(ref_genotype["variants/POS"] == lead_pos)[0][0]
+            if lead_series is None:
+                lead_snp_ref_index = match_varaint(sumstats.loc[lead_id,[pos,nea,ea]])
+                #lead_snp_ref_index = np.where(ref_genotype["variants/POS"] == lead_pos)[0][0]
+            else:
+                log.warning("Computing LD: {} not found in sumstats but found in reference...Still Computing...".format(region_ref_single))
+                lead_snp_ref_index = match_varaint(lead_series)
             # non-na other snp index
             other_snps_ref_index = sumstats["REFINDEX"].dropna().astype("int").values
             # get genotype
             lead_snp_genotype = GenotypeArray([ref_genotype["calldata/GT"][lead_snp_ref_index]]).to_n_alt()
             try:
                 if len(set(lead_snp_genotype[0]))==1:
@@ -604,10 +650,10 @@ def process_vcf(sumstats,
                 sumstats.loc[to_change_color,ld_single] = 1
             to_change_color = sumstats[rsq]>ld_threshold
             sumstats.loc[to_change_color,ld_single] = index+2
-        sumstats.loc[lead_id,ld_single] = len(region_ld_threshold)+2
-        sumstats.loc[lead_id,lead] = 1
+        if lead_series is None:
+            sumstats.loc[lead_id,ld_single] = len(region_ld_threshold)+2
+            sumstats.loc[lead_id,lead] = 1
     ####################################################################################################
     final_shape_col = "SHAPE"

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_plot_stackedregional.py RENAMED Viewed

@@ -57,7 +57,9 @@ def plot_stacked_mqq(objects,
                         fig_args=None,
                         region_hspace=0.05,
                         subplot_height=4,
+                        region_lead_grids = None,
                         region_lead_grid_line=None,
+                        region_ld_legends = None,
                         fontsize=9,
                         font_family="Arial",
                         build="99",
@@ -85,6 +87,8 @@ def plot_stacked_mqq(objects,
         region_chromatin_height = len(region_chromatin_files) * region_chromatin_height
     if region_chromatin_labels is None:
         region_chromatin_labels = []
+    if region_ld_legends is None:
+        region_ld_legends = [0]
     if title_args is None:
         title_args = {"family":"Arial"}
     else:
@@ -125,7 +129,9 @@ def plot_stacked_mqq(objects,
                                           'width_ratios':[mqqratio,1]},
                                           **fig_args)
         plt.subplots_adjust(hspace=region_hspace)
+    if region_lead_grids is None:
+        region_lead_grids = [i for i in range(len(axes))]
     ##########################################################################################################################################
     mqq_args_for_each_plot = _sort_args(mqq_args, n_plot)
     ##########################################################################################################################################
@@ -149,6 +155,10 @@ def plot_stacked_mqq(objects,
             figax = (fig,axes[index],axes[-1])
         elif mode=="mqq":
             figax = (fig,axes[index,0],axes[index,1])
+        if index in region_ld_legends:
+            region_ld_legend = True
+        else:
+            region_ld_legend = False
         #################################################################
         if index==0:
             # plot last m and gene track
@@ -164,6 +174,7 @@ def plot_stacked_mqq(objects,
                             fontsize=fontsize,
                             font_family=font_family,
                             region_lead_grid=False,
+                            region_ld_legend=region_ld_legend,
                             gtf_path="default",
                             rr_ylabel=False,
                             figax=figax,
@@ -192,7 +203,7 @@ def plot_stacked_mqq(objects,
                             font_family=font_family,
                             mode=mode,
                             rr_ylabel=False,
-                            region_ld_legend=False,
+                            region_ld_legend=region_ld_legend,
                             gtf_path=None,
                             figax=figax,
                             _get_region_lead=True,
@@ -257,7 +268,7 @@ def plot_stacked_mqq(objects,
     ##########################################################################################################################################
     # draw the line for lead variants
-    _draw_grid_line_for_lead_variants(mode, lead_variants_is,lead_variants_is_color, n_plot, axes, region_lead_grid_line,region_chromatin_files)
+    _draw_grid_line_for_lead_variants(mode, lead_variants_is,lead_variants_is_color, n_plot, axes, region_lead_grid_line,region_chromatin_files,region_lead_grids)
     ##########################################################################################################################################
     _drop_old_y_labels(axes, n_plot)
@@ -275,21 +286,24 @@ def _drop_old_y_labels(axes, n_plot):
     for index in range(n_plot):
         axes[index].set_ylabel("")
-def _draw_grid_line_for_lead_variants(mode, lead_variants_is,lead_variants_is_color, n_plot, axes, region_lead_grid_line,region_chromatin_files):
+def _draw_grid_line_for_lead_variants(mode, lead_variants_is,lead_variants_is_color, n_plot, axes, region_lead_grid_line,region_chromatin_files,region_lead_grids):
     if len(region_chromatin_files)>0:
         n_plot_and_track = n_plot+2
     else:
         n_plot_and_track = n_plot+1
+    plotted=[None]
     if mode=="r":
         for index, sig_is in lead_variants_is.items():
-            for j, sig_i in enumerate(sig_is):
-                try:
-                    region_lead_grid_line["color"]=lead_variants_is_color[index][j]
-                except:
-                    pass
-                if sig_i is not None:
-                    for each_axis_index in range(n_plot_and_track):
-                        axes[each_axis_index].axvline(x=sig_i, zorder=2,**region_lead_grid_line)
+            if index in region_lead_grids:
+                for j, sig_i in enumerate(sig_is):
+                    try:
+                        region_lead_grid_line["color"] = lead_variants_is_color[index][j]
+                    except:
+                        pass
+                    if sig_i not in plotted:
+                        for each_axis_index in range(n_plot_and_track):
+                            axes[each_axis_index].axvline(x=sig_i, zorder=2,**region_lead_grid_line)
 def _add_new_y_label(mode, fig, gene_track_height,n_plot,subplot_height ,fontsize,font_family):
     gene_track_height_ratio = gene_track_height/(gene_track_height + n_plot*subplot_height)

{gwaslab-3.4.47 → gwaslab-3.4.48/src/gwaslab.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gwaslab
-Version: 3.4.47
+Version: 3.4.48
 Summary: A collection of handy tools for GWAS SumStats
 Author-email: Yunye <yunye@gwaslab.com>
 Project-URL: Homepage, https://cloufield.github.io/gwaslab/
@@ -51,7 +51,7 @@ Warning: Known issues of GWASLab are summarized in [https://cloufield.github.io/
 ### install via pip
 ```
-pip install gwaslab==3.4.45
+pip install gwaslab==3.4.46
 ```
 ```python

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab.egg-info/SOURCES.txt RENAMED Viewed

@@ -34,6 +34,7 @@ src/gwaslab/ldsc_sumstats.py
 src/gwaslab/qc_check_datatype.py
 src/gwaslab/qc_fix_sumstats.py
 src/gwaslab/run_script.py
+src/gwaslab/util_abf_finemapping.py
 src/gwaslab/util_ex_calculate_ldmatrix.py
 src/gwaslab/util_ex_calculate_prs.py
 src/gwaslab/util_ex_gwascatalog.py

{gwaslab-3.4.47 → gwaslab-3.4.48}/LICENSE RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/LICENSE_before_v3.4.39 RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/setup.cfg RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/__init__.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/bd_common_data.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/bd_config.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/bd_download.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/bd_get_hapmap3.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/cache_manager.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/data/chrx_par/chrx_par_hg19.bed.gz RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/data/chrx_par/chrx_par_hg38.bed.gz RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/data/formatbook.json RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/data/reference.json RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/g_Log.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/g_Phenotypes.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/g_SumstatsPair.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/g_SumstatsT.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/g_Sumstats_summary.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/g_meta.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/g_vchange_status.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/hm_casting.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/hm_harmonize_sumstats.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/hm_rsid_to_chrpos.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/io_preformat_input.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/io_read_tabular.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/io_to_formats.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/io_to_pickle.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/ldsc_irwls.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/ldsc_jackknife.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/ldsc_ldscore.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/ldsc_parse.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/ldsc_regressions.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/ldsc_sumstats.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/qc_check_datatype.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/run_script.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_ex_calculate_ldmatrix.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_ex_calculate_prs.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_ex_gwascatalog.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_ex_ldproxyfinder.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_ex_plink_filter.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_ex_process_h5.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_ex_process_ref.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_ex_run_2samplemr.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_ex_run_coloc.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_ex_run_susie.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_in_calculate_gc.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_in_calculate_power.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_in_convert_h2.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_in_correct_winnerscurse.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_in_fill_data.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_in_filter_value.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_in_get_density.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_in_get_sig.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_in_meta.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/util_in_snphwe.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_aux_chromatin.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_aux_reposition_text.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_aux_save_figure.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_plot_compare_af.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_plot_forestplot.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_plot_miamiplot.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_plot_qqplot.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_plot_regionalplot.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_plot_rg_heatmap.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab/viz_plot_trumpetplot.py RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab.egg-info/requires.txt RENAMED Viewed

File without changes

{gwaslab-3.4.47 → gwaslab-3.4.48}/src/gwaslab.egg-info/top_level.txt RENAMED Viewed

File without changes

gwaslab 3.4.47__tar.gz → 3.4.48__tar.gz

Potentially problematic release.

gwaslab 3.4.47tar.gz → 3.4.48tar.gz