PyPI - gwaslab - Versions diffs - 3.4.47__py3-none-any.whl → 3.4.49__py3-none-any.whl - Mend

gwaslab 3.4.47py3-none-any.whl → 3.4.49py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (27) hide show

gwaslab/bd_common_data.py +3 -1
gwaslab/data/reference.json +10 -2
gwaslab/g_Sumstats.py +22 -2
gwaslab/g_vchange_status.py +1 -1
gwaslab/g_version.py +2 -2
gwaslab/hm_harmonize_sumstats.py +23 -7
gwaslab/io_preformat_input.py +73 -8
gwaslab/io_read_ldsc.py +16 -2
gwaslab/io_to_formats.py +5 -5
gwaslab/qc_fix_sumstats.py +109 -7
gwaslab/util_abf_finemapping.py +67 -0
gwaslab/util_ex_ldsc.py +8 -1
gwaslab/util_ex_run_clumping.py +6 -6
gwaslab/util_in_fill_data.py +20 -2
gwaslab/viz_aux_annotate_plot.py +2 -1
gwaslab/viz_aux_quickfix.py +2 -1
gwaslab/viz_plot_compare_effect.py +4 -2
gwaslab/viz_plot_miamiplot2.py +10 -9
gwaslab/viz_plot_mqqplot.py +42 -21
gwaslab/viz_plot_regional2.py +75 -29
gwaslab/viz_plot_stackedregional.py +37 -16
{gwaslab-3.4.47.dist-info → gwaslab-3.4.49.dist-info}/METADATA +15 -15
{gwaslab-3.4.47.dist-info → gwaslab-3.4.49.dist-info}/RECORD +27 -26
{gwaslab-3.4.47.dist-info → gwaslab-3.4.49.dist-info}/WHEEL +1 -1
{gwaslab-3.4.47.dist-info → gwaslab-3.4.49.dist-info}/LICENSE +0 -0
{gwaslab-3.4.47.dist-info → gwaslab-3.4.49.dist-info}/LICENSE_before_v3.4.39 +0 -0
{gwaslab-3.4.47.dist-info → gwaslab-3.4.49.dist-info}/top_level.txt +0 -0

gwaslab/util_ex_ldsc.py CHANGED Viewed

@@ -304,9 +304,16 @@ def _estimate_h2_by_ldsc(insumstats, log, verbose=True, munge=False, munge_args=
     log.write(" -LDSC log:", verbose=verbose)
     summary = estimate_h2(sumstats, default_args, log)
+    results_table = None
+    if type(summary) is tuple:
+        results_table = summary[1]
+        summary = summary[0]
+        log.write(" -Coefficient results have been stored in .ldsc_h2_results", verbose=verbose)
     log.write(" -Results have been stored in .ldsc_h2", verbose=verbose)
     finished(log=log,verbose=verbose,end_line=_end_line)
-    return parse_ldsc_summary(summary)
+    return parse_ldsc_summary(summary), results_table
 ####################################################################################################################

gwaslab/util_ex_run_clumping.py CHANGED Viewed

@@ -11,7 +11,7 @@ from gwaslab.g_version import _checking_plink_version
 def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
            p="P",mlog10p="MLOG10P", overwrite=False, study=None, bfile=None,
            n_cores=1, memory=None, chrom=None, clump_p1=5e-8, clump_p2=5e-8, clump_r2=0.01, clump_kb=250,
-           log=Log(),verbose=True):
+           log=Log(),verbose=True,plink="plink",plink2="plink2"):
     ##start function with col checking##########################################################
     _start_line = "perfrom clumping"
     _end_line = "clumping"
@@ -111,7 +111,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
             bfile_to_use = bfile
         log.write(" -Performing clumping for CHR {}...".format(i),verbose=verbose)
-        log = _checking_plink_version(v=2, log=log)
+        log = _checking_plink_version(plink2=plink2, log=log)
         if memory is not None:
             memory_flag = "--memory {}".format(memory)
@@ -123,7 +123,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
         if scaled == True:
             # clumping using LOG10P
             script = """
-            plink2 \
+            {} \
                 {}\
                 --chr {} \
                 --clump {} \
@@ -136,11 +136,11 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
                 --clump-kb {} \
                 --threads {} {}\
                 --out {}
-            """.format(file_flag, chrom, clump, mlog10p,clump_log10_p1, clump_log10_p2, clump_r2, clump_kb, n_cores, memory_flag if memory is not None else "", out_single_chr)
+            """.format(plink2, file_flag, chrom, clump, mlog10p,clump_log10_p1, clump_log10_p2, clump_r2, clump_kb, n_cores, memory_flag if memory is not None else "", out_single_chr)
         else:
             # clumping using P
             script = """
-            plink2 \
+            {} \
                 {}\
                 --chr {} \
                 --clump {} \
@@ -152,7 +152,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
                 --clump-kb {} \
                 --threads {} {}\
                 --out {}
-            """.format(file_flag, chrom, clump, p, clump_p1, clump_p2, clump_r2, clump_kb, n_cores,memory_flag if memory is not None else "", out_single_chr)
+            """.format(plink2,file_flag, chrom, clump, p, clump_p1, clump_p2, clump_r2, clump_kb, n_cores,memory_flag if memory is not None else "", out_single_chr)
         try:
             output = subprocess.check_output(script, stderr=subprocess.STDOUT, shell=True,text=True)

gwaslab/util_in_fill_data.py CHANGED Viewed

@@ -184,7 +184,8 @@ def fill_mlog10p(sumstats,log,verbose=True,filled_count=0):
     else:
         return 0,filled_count
     return 1,filled_count
-def fill_extreme_mlog10p(sumstats,log,verbose=True,filled_count=0):
+def fill_extreme_mlog10p(sumstats,df,log,verbose=True,filled_count=0):
     # ref: https://stackoverflow.com/questions/46416027/how-to-compute-p-values-from-z-scores-in-r-when-the-z-score-is-large-pvalue-muc/46416222#46416222
     if "Z" in sumstats.columns:
         # P -> MLOG10P
@@ -198,6 +199,10 @@ def fill_extreme_mlog10p(sumstats,log,verbose=True,filled_count=0):
         log.write("  - Filling MLOG10P using Z column...", verbose=verbose)
         sumstats = fill_extreme_mlog10(sumstats, "Z")
         filled_count +=1
+    elif "CHISQ" in sumstats.columns and "DOF" in sumstats.columns:
+        log.write("  - Filling MLOG10P using CHISQ and DOF column...", verbose=verbose)
+        sumstats = fill_extreme_mlog10_chisq(sumstats, "CHISQ", df)
+        filled_count +=1
     else:
         return 0,filled_count
     return 1,filled_count
@@ -223,6 +228,19 @@ def fill_extreme_mlog10(sumstats, z):
     sumstats["P_EXPONENT"]= exponent
     return sumstats
+def fill_extreme_mlog10_chisq(sumstats, chisq, df):
+    #https://stackoverflow.com/a/46416222/199475
+    log_pvalue = ss.chi2.logsf(sumstats[chisq], sumstats[df])
+    log10_pvalue = log_pvalue/np.log(10)
+    mantissa = 10**(log10_pvalue %1)
+    exponent = log10_pvalue // 1
+    sumstats["MLOG10P"] = -log10_pvalue
+    sumstats["P_MANTISSA"]= mantissa
+    sumstats["P_EXPONENT"]= exponent
+    return sumstats
 ####################################################################################################################
 def fill_iteratively(sumstats,raw_to_fill,log,only_sig,df,extreme,verbose,sig_level):
     to_fill = raw_to_fill.copy()
@@ -260,7 +278,7 @@ def fill_iteratively(sumstats,raw_to_fill,log,only_sig,df,extreme,verbose,sig_le
     # p to -log10(P)  ###############################################################################################
         if "MLOG10P" in to_fill:
             if extreme==True:
-                status,filled_count = fill_extreme_mlog10p(sumstats,log,verbose=verbose,filled_count=filled_count)
+                status,filled_count = fill_extreme_mlog10p(sumstats,df, log,verbose=verbose,filled_count=filled_count)
                 filled_count +=1
             elif "P" not in sumstats.columns:
                 fill_p(sumstats,log,verbose=verbose)

gwaslab/viz_aux_annotate_plot.py CHANGED Viewed

@@ -38,6 +38,7 @@ def annotate_single(
     region,
     region_anno_bbox_args,
     skip,
+    anno_height=1,
     amode="int",
     snpid="SNPID",
     chrom="CHR",
@@ -131,7 +132,7 @@ def annotate_single(
             #xy=(row["i"],row["scaled_P"]+0.2)
             xy=(row["i"],row["scaled_P"]+0.01*maxy)
-            xytext=(last_pos,1.15*maxy*arm_scale)
+            xytext=(last_pos,1.15*maxy*arm_scale*anno_height)
             if anno_fixed_arm_length is not None:
                 armB_length_in_point = anno_fixed_arm_length

gwaslab/viz_aux_quickfix.py CHANGED Viewed

@@ -286,8 +286,9 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
     log.write(" -Converting data above cut line...",verbose=verbose)
     if ylabels is not None:
         ylabels = pd.Series(ylabels)
-    maxy = series.max()
     series = series.copy()
+    maxy = series.max()
     if "b" not in mode:
         log.write(" -Maximum -log10(P) value is "+str(maxy) +" .", verbose=verbose)
     elif "b" in mode:

gwaslab/viz_plot_compare_effect.py CHANGED Viewed

@@ -77,8 +77,10 @@ def compare_effect(path1,
         scaled2 = True
     if is_q_mc=="fdr" or is_q_mc=="bon":
         is_q = True
-    else:
-        raise ValueError("Please select either fdr or bon for is_q_mc.")
+    if is_q == True:
+        if is_q_mc not in [False,"fdr","bon","non"]:
+            raise ValueError("Please select either fdr or bon or non for is_q_mc.")
     if save_args is None:
         save_args = {"dpi":300,"facecolor":"white"}
     if reg_box is None:

gwaslab/viz_plot_miamiplot2.py CHANGED Viewed

@@ -247,7 +247,10 @@ def plot_miami2(
         plt.subplots_adjust(hspace=region_hspace)
     else:
         fig, ax1, ax5 = figax
+    #if same_ylim==True:
+        #maxy = merged_sumstats[["scaled_P_1","scaled_P_2"]].max().max()
     log.write("Start to create Manhattan plot for sumstats1...", verbose=verbose)
     fig,log = mqqplot(merged_sumstats,
                       chrom="CHR",
@@ -284,16 +287,14 @@ def plot_miami2(
                       _if_quick_qc=False,
                      **mqq_args2)
     log.write("Finished creating Manhattan plot for sumstats2".format(_get_version()), verbose=verbose)
-    if same_ylim==True:
-        ylim1_converted = ax1.get_ylim()
-        ylim2_converted = ax5.get_ylim()
-        if ylim1_converted > ylim2_converted:
-            ax5.set_ylim(ylim1_converted)
-        else:
-            ax1.set_ylim(ylim2_converted)
     #####################################################################################################################
+    ax1l, ax1r = ax5.get_xlim()
+    ax5l, ax5r = ax1.get_xlim()
+    ax1.set_xlim([min(ax1l,ax5l), max(ax1r,ax5r)])
+    ax5.set_xlim([min(ax1l,ax5l), max(ax1r,ax5r)])
+    #####################################################################################################################
     ax5.set_xlabel("")
     #ax5.set_xticks(chrom_df)
     ax5.set_xticklabels([])

gwaslab/viz_plot_mqqplot.py CHANGED Viewed

@@ -141,6 +141,7 @@ def mqqplot(insumstats,
           anno_max_iter=100,
           arm_offset=50,
           arm_scale=1,
+          anno_height=1,
           arm_scale_d=None,
           cut=0,
           skip=0,
@@ -180,6 +181,7 @@ def mqqplot(insumstats,
           xpad=None,
           xpadl=None,
           xpadr=None,
+          xtight=False,
           chrpad=0.03,
           drop_chr_start=False,
           title =None,
@@ -552,7 +554,8 @@ def mqqplot(insumstats,
                                                                         cut_log = cut_log,
                                                                         verbose =verbose,
                                                                         lines_to_plot=lines_to_plot,
-                                                                        log = log)
+                                                                        log = log
+                                                                        )
     except:
         log.warning("No valid data! Please check the input.")
         return None
@@ -596,19 +599,23 @@ def mqqplot(insumstats,
             sumstats.loc[sumstats["scaled_P"]>-np.log10(sig_level_plot),"s"]=4
         sumstats["chr_hue"]=sumstats[chrom].astype("string")
-        if vcf_path is not None:
+        if "r" in mode:
+            if vcf_path is None:
+                sumstats["LD"]=100
+                sumstats["SHAPE"]=1
             sumstats["chr_hue"]=sumstats["LD"]
         ## default seetings
         palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
         legend = None
         style=None
         linewidth=0
         edgecolor="black"
         # if regional plot assign colors
-        if vcf_path is not None:
+        if "r" in mode:
+            #if vcf_path is not None:
             legend=None
             linewidth=1
             if len(region_ref) == 1:
@@ -631,10 +638,9 @@ def mqqplot(insumstats,
                         palette[(i+1)*100 + j ] = hex_color
                 edgecolor="none"
-                scatter_args["markers"]= region_marker_shapes[:len(region_ref)]
+                scatter_args["markers"]= {(i+1):m for i,m in enumerate(region_marker_shapes[:len(region_ref)])}
                 style="SHAPE"
         ## if highlight
         highlight_i = pd.DataFrame()
@@ -977,6 +983,7 @@ def mqqplot(insumstats,
                                 region=region,
                                 region_anno_bbox_args=region_anno_bbox_args,
                                 skip=skip,
+                                anno_height=anno_height,
                                 snpid=snpid,
                                 chrom=chrom,
                                 pos=pos,
@@ -1040,7 +1047,7 @@ def mqqplot(insumstats,
         if "qq" in mode:
             ax2.set_ylim(ylim)
-    ax1 = _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats)
+    ax1 = _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats, pos, chrpad, xtight, log = log, verbose=verbose)
     # Titles
     if title and anno and len(to_annotate)>0:
@@ -1065,20 +1072,34 @@ def mqqplot(insumstats,
-def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats):
+def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats, pos, chrpad, xtight, log, verbose):
-    if ax1 is not None:
-        xmin, xmax = ax1.get_xlim()
-        if xpad is not None:
-            pad = xpad* sumstats["i"].max()
-            ax1.set_xlim([xmin - pad, xmin + pad])
-        if xpadl is not None:
-            pad = xpadl* sumstats["i"].max()
-            ax1.set_xlim([xmin - pad,xmax])
-        if xpadr is not None:
-            pad = xpadr* sumstats["i"].max()
-            ax1.set_xlim([xmin, xmax + pad])
+    if xtight==True:
+        log.write(" -Adjusting X padding on both side : tight mode", verbose=verbose)
+        xmax = sumstats["i"].max()
+        xmin=  sumstats["i"].min()
+        ax1.set_xlim([xmin, xmax])
+    else:
+        chrpad_to_remove = sumstats[pos].max()*chrpad
+        if ax1 is not None:
+            xmin, xmax = ax1.get_xlim()
+            length = xmax - xmin
+            if xpad is not None:
+                log.write(" -Adjusting X padding on both side: {}".format(xpad), verbose=verbose)
+                pad = xpad* length #sumstats["i"].max()
+                ax1.set_xlim([xmin - pad + chrpad_to_remove, xmax + pad - chrpad_to_remove])
+            if xpad is None and xpadl is not None:
+                log.write(" -Adjusting X padding on left side: {}".format(xpadl), verbose=verbose)
+                xmin, xmax = ax1.get_xlim()
+                pad = xpadl*length # sumstats["i"].max()
+                ax1.set_xlim([xmin - pad + chrpad_to_remove ,xmax])
+            if xpad is None and xpadr is not None:
+                log.write(" -Adjusting X padding on right side: {}".format(xpadr), verbose=verbose)
+                xmin, xmax = ax1.get_xlim()
+                pad = xpadr*length # sumstats["i"].max()
+                ax1.set_xlim([xmin, xmax + pad - chrpad_to_remove])
     return ax1

gwaslab/viz_plot_regional2.py CHANGED Viewed

@@ -5,6 +5,7 @@ import matplotlib.patches as patches
 import seaborn as sns
 import numpy as np
 import copy
+import re
 import scipy as sp
 from pyensembl import EnsemblRelease
 from allel import GenotypeArray
@@ -96,19 +97,20 @@ def _plot_regional(
                                         marker_size= marker_size,
                                         region_marker_shapes=region_marker_shapes,
                                         log=log,verbose=verbose)
-            if lead_id_single is not None:
-                lead_ids.append(lead_id_single)
+            #if lead_id_single is not None:
+            lead_ids.append(lead_id_single)
         # update region_ref to variant rsID or variantID / skip NAs
         new_region_ref = []
         for i in range(len(lead_ids)):
             if lead_ids[i] is None:
+                new_region_ref.append(region_ref[i])
                 continue
             if region_ref[i] is None:
-                if "rsID" in sumstats.columns:
-                    new_name = sumstats.loc[lead_ids[i],"rsID"]
-                elif "SNPID" in sumstats.columns:
+                if "SNPID" in sumstats.columns:
                     new_name = sumstats.loc[lead_ids[i],"SNPID"]
+                elif "rsID" in sumstats.columns:
+                    new_name = sumstats.loc[lead_ids[i],"rsID"]
                 else:
                     new_name = "chr{}:{}".format(sumstats.loc[lead_ids[i],"CHR"] , sumstats.loc[lead_ids[i],"POS"])
                 new_region_ref.append(new_name)
@@ -162,7 +164,6 @@ def _plot_regional(
         lead_snp_ys = []
         lead_snp_is = []
         lead_snp_is_colors = []
         for i,lead_id_single in enumerate(lead_ids):
             if lead_id_single is not None:
                 lead_snp_ys.append(sumstats.loc[lead_id_single,"scaled_P"] )
@@ -258,11 +259,35 @@ def _get_lead_id(sumstats=None, region_ref=None, log=None, verbose=True):
     if type(lead_id) is list:
         if len(lead_id)>0:
             lead_id = int(lead_id[0])
     if region_ref_to_check is not None:
         if type(lead_id) is list:
             if len(lead_id)==0 :
-                log.warning("{} not found.. Skipping..".format(region_ref_to_check))
+                #try:
+                matched_snpid = re.match("(chr)?[0-9]+:[0-9]+:[ATCG]+:[ATCG]+", region_ref_to_check,  re.IGNORECASE)
+                if matched_snpid is None:
+                    pass
+                else:
+                    lead_snpid = matched_snpid.group(0).split(":")
+                    if len(lead_snpid)==4:
+                        lead_chr= int(lead_snpid[0])
+                        lead_pos= int(lead_snpid[1])
+                        lead_ea= lead_snpid[2]
+                        lead_nea= lead_snpid[3]
+                        chrpos_match = (sumstats["CHR"] == lead_chr) & (sumstats["POS"] == lead_pos)
+                        eanea_match = ((sumstats["EA"] == lead_ea) & (sumstats["NEA"] == lead_nea)) | ((sumstats["EA"] == lead_nea) & (sumstats["NEA"] == lead_ea))
+                        if "rsID" in sumstats.columns:
+                            lead_id = sumstats.index[chrpos_match&eanea_match].to_list()
+                        if "SNPID" in sumstats.columns:
+                            lead_id = sumstats.index[chrpos_match&eanea_match].to_list()
+                if type(lead_id) is list:
+                    if len(lead_id)>0:
+                        lead_id = int(lead_id[0])
+                        log.warning("Trying matching variant {} using CHR:POS:EA:NEA to {}... ".format(region_ref_to_check,lead_id))
+        if type(lead_id) is list:
+            if len(lead_id)==0 :
+                log.warning("Extracting variant: {} not found in sumstats.. Skipping..".format(region_ref_to_check))
                 #lead_id = sumstats["scaled_P"].idxmax()
                 lead_id = None
                 return lead_id
@@ -275,7 +300,7 @@ def _get_lead_id(sumstats=None, region_ref=None, log=None, verbose=True):
     return lead_id
-def _pinpoint_lead(sumstats,ax1,region_ref, lead_color, marker_size, log, verbose,region_marker_shapes):
+def _pinpoint_lead(sumstats,ax1,region_ref, lead_color, marker_size, log, verbose, region_marker_shapes):
     if region_ref is None:
         log.write(" -Extracting lead variant..." , verbose=verbose)
@@ -416,6 +441,11 @@ def _plot_gene_track(
     texts_to_adjust_left = []
     texts_to_adjust_middle = []
     texts_to_adjust_right = []
+    sig_gene_names=[]
+    sig_gene_lefts=[]
+    sig_gene_rights=[]
     for index,row in uniq_gene_region.iterrows():
         gene_color="#020080"
@@ -426,21 +456,18 @@ def _plot_gene_track(
             gene_anno = "<-" + row["name"]
-        sig_gene_names=[]
-        sig_gene_lefts=[]
-        sig_gene_rights=[]
         for lead_snp_i in lead_snp_is:
             if region_lead_grid is True and lead_snp_i > gene_track_start_i+row["start"] and lead_snp_i < gene_track_start_i+row["end"] :
-                    gene_color=region_lead_grid_line["color"]
-                    sig_gene_names.append(row["name"])
-                    sig_gene_lefts.append(gene_track_start_i+row["start"])
-                    sig_gene_rights.append(gene_track_start_i+row["end"])
+                gene_color=region_lead_grid_line["color"]
+                sig_gene_names.append(row["name"])
+                sig_gene_lefts.append(gene_track_start_i+row["start"])
+                sig_gene_rights.append(gene_track_start_i+row["end"])
             # plot gene line
             ax3.plot((gene_track_start_i+row["start"],gene_track_start_i+row["end"]),
                         (row["stack"]*2,row["stack"]*2),color=gene_color,linewidth=linewidth_in_points/10)
         # plot gene name
         if row["end"] >= region[2]:
             #right side
@@ -459,6 +486,7 @@ def _plot_gene_track(
     for index,row in exons.iterrows():
         exon_color="#020080"
         for sig_gene_name, sig_gene_left, sig_gene_right in zip(sig_gene_names,sig_gene_lefts,sig_gene_rights):
             if not pd.isnull(row["name"]):
                 if (region_lead_grid is True) and row["name"]==sig_gene_name:
                     exon_color = region_lead_grid_line["color"]
@@ -468,7 +496,7 @@ def _plot_gene_track(
                 exon_color = region_lead_grid_line["color"]
             else:
                 exon_color="#020080"
         ax3.plot((gene_track_start_i+row["start"],gene_track_start_i+row["end"]),
                     (row["stack"]*2,row["stack"]*2),linewidth=linewidth_in_points*taf[3],color=exon_color,solid_capstyle="butt")
@@ -550,24 +578,42 @@ def process_vcf(sumstats,
             # figure out lead variant
             lead_id = _get_lead_id(sumstats, region_ref_single, log, verbose)
-        if lead_id is None:
-            sumstats[rsq] = None
-            sumstats[rsq] = sumstats[rsq].astype("float")
-            sumstats[ld_single] = 0
-            continue
-        lead_pos = sumstats.loc[lead_id,pos]
+        lead_series = None
+        if lead_id is None:
+            matched_snpid = re.match("(chr)?[0-9]+:[0-9]+:[ATCG]+:[ATCG]+",region_ref_single,  re.IGNORECASE)
+            if matched_snpid is None:
+                sumstats[rsq] = None
+                sumstats[rsq] = sumstats[rsq].astype("float")
+                sumstats[ld_single] = 0
+                continue
+            else:
+                lead_snpid = matched_snpid.group(0).split(":")[1:]
+                lead_pos = int(lead_snpid[0])
+                lead_snpid[0]= int(lead_snpid[0])
+                lead_series = pd.Series(lead_snpid)
+        else:
+            lead_pos = sumstats.loc[lead_id,pos]
         # if lead pos is available:
         if lead_pos in ref_genotype["variants/POS"]:
             # get ref index for lead snp
-            lead_snp_ref_index = match_varaint(sumstats.loc[lead_id,[pos,nea,ea]])
-            #lead_snp_ref_index = np.where(ref_genotype["variants/POS"] == lead_pos)[0][0]
+            if lead_series is None:
+                lead_snp_ref_index = match_varaint(sumstats.loc[lead_id,[pos,nea,ea]])
+                #lead_snp_ref_index = np.where(ref_genotype["variants/POS"] == lead_pos)[0][0]
+            else:
+                log.warning("Computing LD: {} not found in sumstats but found in reference...Still Computing...".format(region_ref_single))
+                lead_snp_ref_index = match_varaint(lead_series)
             # non-na other snp index
             other_snps_ref_index = sumstats["REFINDEX"].dropna().astype("int").values
             # get genotype
             lead_snp_genotype = GenotypeArray([ref_genotype["calldata/GT"][lead_snp_ref_index]]).to_n_alt()
             try:
                 if len(set(lead_snp_genotype[0]))==1:
@@ -604,10 +650,10 @@ def process_vcf(sumstats,
                 sumstats.loc[to_change_color,ld_single] = 1
             to_change_color = sumstats[rsq]>ld_threshold
             sumstats.loc[to_change_color,ld_single] = index+2
-        sumstats.loc[lead_id,ld_single] = len(region_ld_threshold)+2
-        sumstats.loc[lead_id,lead] = 1
+        if lead_series is None:
+            sumstats.loc[lead_id,ld_single] = len(region_ld_threshold)+2
+            sumstats.loc[lead_id,lead] = 1
     ####################################################################################################
     final_shape_col = "SHAPE"

gwaslab 3.4.47__py3-none-any.whl → 3.4.49__py3-none-any.whl

Potentially problematic release.

gwaslab 3.4.47py3-none-any.whl → 3.4.49py3-none-any.whl