PyPI - gwaslab - Versions diffs - 3.4.46__py3-none-any.whl → 3.4.48__py3-none-any.whl - Mend

gwaslab 3.4.46py3-none-any.whl → 3.4.48py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (25) hide show

gwaslab/g_Sumstats.py +12 -2
gwaslab/g_version.py +7 -7
gwaslab/io_read_ldsc.py +16 -2
gwaslab/io_to_formats.py +8 -3
gwaslab/qc_fix_sumstats.py +5 -2
gwaslab/util_abf_finemapping.py +67 -0
gwaslab/util_ex_calculate_ldmatrix.py +20 -7
gwaslab/util_ex_calculate_prs.py +13 -7
gwaslab/util_ex_ldsc.py +8 -1
gwaslab/util_ex_process_ref.py +22 -11
gwaslab/util_ex_run_clumping.py +6 -6
gwaslab/viz_aux_annotate_plot.py +2 -1
gwaslab/viz_aux_chromatin.py +4 -3
gwaslab/viz_aux_quickfix.py +2 -1
gwaslab/viz_plot_compare_effect.py +4 -2
gwaslab/viz_plot_miamiplot2.py +5 -8
gwaslab/viz_plot_mqqplot.py +121 -62
gwaslab/viz_plot_regional2.py +838 -0
gwaslab/viz_plot_stackedregional.py +81 -48
{gwaslab-3.4.46.dist-info → gwaslab-3.4.48.dist-info}/METADATA +15 -15
{gwaslab-3.4.46.dist-info → gwaslab-3.4.48.dist-info}/RECORD +25 -23
{gwaslab-3.4.46.dist-info → gwaslab-3.4.48.dist-info}/WHEEL +1 -1
{gwaslab-3.4.46.dist-info → gwaslab-3.4.48.dist-info}/LICENSE +0 -0
{gwaslab-3.4.46.dist-info → gwaslab-3.4.48.dist-info}/LICENSE_before_v3.4.39 +0 -0
{gwaslab-3.4.46.dist-info → gwaslab-3.4.48.dist-info}/top_level.txt +0 -0

gwaslab/g_Sumstats.py CHANGED Viewed

@@ -76,6 +76,8 @@ from gwaslab.util_ex_ldsc import _estimate_rg_by_ldsc
 from gwaslab.util_ex_ldsc import _estimate_h2_cts_by_ldsc
 from gwaslab.util_ex_ldsc import _estimate_partitioned_h2_by_ldsc
 from gwaslab.bd_get_hapmap3 import gethapmap3
+from gwaslab.util_abf_finemapping import abf_finemapping
+from gwaslab.util_abf_finemapping import make_cs
 import gc
 #20220309
@@ -135,6 +137,7 @@ class Sumstats():
         self.data = pd.DataFrame()
         self.log = Log()
         self.ldsc_h2 = None
+        self.ldsc_h2_results = None
         self.ldsc_rg = None
         self.ldsc_h2_cts = None
         self.ldsc_partitioned_h2_summary = None
@@ -756,13 +759,20 @@ class Sumstats():
         else:
             output = lambdaGC(self.data[["CHR",mode]],mode=mode,**kwargs)
             self.meta["Genomic inflation factor"] = output
-            return output
+            return output
+    def abf_finemapping(self, region=None, chrpos=None, snpid=None,**kwargs):
+        region_data = abf_finemapping(self.data.copy(),region=region,chrpos=chrpos,snpid=snpid,log=self.log, **kwargs)
+        credible_sets = make_cs(region_data,threshold=0.95,log=self.log)
+        return region_data, credible_sets
 ## LDSC ##############################################################################################
     def estimate_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
         if build is None:
             build = self.meta["gwaslab"]["genome_build"]
         insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True, how="right" )
-        self.ldsc_h2 = _estimate_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
+        self.ldsc_h2, self.ldsc_h2_results = _estimate_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
     def estimate_rg_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
         if build is None:

gwaslab/g_version.py CHANGED Viewed

@@ -15,16 +15,16 @@ def _get_version():
 def gwaslab_info():
     # version meta information
     dic={
-   "version":"3.4.46",
-   "release_date":"20240624"
+   "version":"3.4.48",
+   "release_date":"20240822"
     }
     return dic
-def _checking_plink_version(v=2,log=Log(), verbose=True):
-    if v==1:
-        which_plink_script = "plink --version"
-    elif v==2:
-        which_plink_script = "plink2 --version"
+def _checking_plink_version(plink=None,plink2=None,log=Log(), verbose=True):
+    if plink is not None:
+        which_plink_script = "{} --version".format(plink)
+    elif plink2 is not None:
+        which_plink_script = "{}  --version".format(plink2)
     output = subprocess.check_output(which_plink_script, stderr=subprocess.STDOUT, shell=True,text=True)
     log.write(" -PLINK version: {}".format(output.strip()))
     return log

gwaslab/io_read_ldsc.py CHANGED Viewed

@@ -198,16 +198,29 @@ def read_greml(filelist=[]):
     return summary
 def parse_ldsc_summary(ldsc_summary):
-    summary = pd.DataFrame(columns = ['h2_obs', 'h2_se','Lambda_gc','Mean_chi2','Intercept','Intercept_se',"Ratio","Ratio_se"])
     lines = ldsc_summary.split("\n")
+    columns = ['h2_obs', 'h2_se','Lambda_gc','Mean_chi2','Intercept','Intercept_se',"Ratio","Ratio_se","Catagories"]
+    summary = pd.DataFrame(columns = columns)
     row={}
     try:
         objects = re.compile('[a-zA-Z\s\d]+:|[-0-9.]+[e]?[-0-9.]+|NA').findall(lines[0])
         row["h2_obs"]=objects[1]
         row["h2_se"]=objects[2]
-        ##next line lambda gc
+        ##check categories
+        if len(lines) == 6:
+            objects = re.compile('  -Categories:(.+)').findall(lines[1])
+            row["Catagories"] = objects[0].strip()
+            lines.pop(1)
+        else:
+            row["Catagories"] = "NA"
+        ##next line lambda gc
         objects = re.compile('[a-zA-Z\s\d]+:|[-0-9.]+[e]?[-0-9.]+|NA').findall(lines[1])
         row["Lambda_gc"] = objects[1]
         ##next line Mean_chi2
@@ -240,6 +253,7 @@ def parse_ldsc_summary(ldsc_summary):
         row["Intercept_se"]="NA"
         row["Ratio"]="NA"
         row["Ratio_se"]="NA"
+        row["Catagories"] = "NA"
     #summary = summary.append(row,ignore_index=True)
     row = pd.DataFrame([row], columns = summary.columns)

gwaslab/io_to_formats.py CHANGED Viewed

@@ -212,8 +212,10 @@ def tofmt(sumstats,
     log.write(" -Start outputting sumstats in "+fmt+" format...")
     if "CHR" in sumstats.columns:
+        # output X,Y,MT instead of 23,24,25
         if xymt_number is False and pd.api.types.is_integer_dtype(sumstats["CHR"]):
             sumstats["CHR"]= sumstats["CHR"].map(get_number_to_chr(xymt=xymt,prefix=chr_prefix))
+        # add prefix to CHR
         elif chr_prefix is not None:
             sumstats["CHR"]= chr_prefix + sumstats["CHR"].astype("string")
@@ -437,17 +439,20 @@ def _configure_output_cols_and_args(sumstats, rename_dictionary, cols, no_status
             ouput_cols.append(i)
     # + additional cols and remove duplicated
-    ouput_cols = list(set(ouput_cols + cols))
+    ouput_cols_final = []
+    for i in ouput_cols + cols:
+        if i not in ouput_cols_final:
+            ouput_cols_final.append(i)
     # remove STATUS
     try:
         if no_status == True:
-            ouput_cols.remove("STATUS")
+            ouput_cols_final.remove("STATUS")
     except:
         pass
     #filter and rename to target fromat headers
-    sumstats = sumstats[ouput_cols]
+    sumstats = sumstats[ouput_cols_final]
     sumstats = sumstats.rename(columns=rename_dictionary)
     # configure target format args and reorder columns

gwaslab/qc_fix_sumstats.py CHANGED Viewed

@@ -1497,7 +1497,11 @@ def liftover_variant(sumstats,
              status="STATUS",
              from_build="19",
              to_build="38"):
-    converter = get_lifter("hg"+from_build,"hg"+to_build)
+    try:
+        converter = get_lifter("hg"+from_build,"hg"+to_build,one_based=True)
+    except:
+        converter = get_lifter("hg"+from_build,"hg"+to_build)
     dic= get_number_to_chr(in_chr=False,xymt=["X","Y","M"])
     dic2= get_chr_to_number(out_chr=False)
     for i in sumstats[chrom].unique():
@@ -1549,7 +1553,6 @@ def parallelizeliftovervariant(sumstats,n_cores=1,chrom="CHR", pos="POS", from_b
         pool.close()
         pool.join()
     ############################################################################
     unmap_num = len(sumstats.loc[sumstats[pos].isna(),:])
     if remove is True:

gwaslab/util_abf_finemapping.py ADDED Viewed

@@ -0,0 +1,67 @@
+import pandas as pd
+import numpy as np
+from gwaslab.g_Log import Log
+from gwaslab.util_in_filter_value import _get_flanking_by_chrpos
+from gwaslab.util_in_filter_value import _get_flanking_by_id
+# Calculate PIP based on approximate Bayesian factor (ABF)
+# Wakefield, J. A bayesian measure of the probability of false discovery in genetic epidemiology studies. Am J Hum Genet 81, 208–227 (2007).
+def calc_abf(insumstats,w=0.2,log=Log(),verbose=True,**kwargs):
+    log.write("Start to calculate approximate Bayesian factor for {} variants".format(len(insumstats)),verbose=verbose)
+    log.write(" - Reference: akefield, J. A bayesian measure of the probability of false discovery in genetic epidemiology studies. Am J Hum Genet 81, 208–227 (2007).",verbose=verbose)
+    log.write(" - Priors for the standard deviation W of the effect size parameter β : {} ".format(w),verbose=verbose)
+    # binary -> w=0.2
+    # quant  -> w=0.15
+    omega = w**2
+    se = insumstats["SE"]
+    v = se**2
+    r = omega / (omega+v)
+    beta = insumstats["BETA"]
+    z = beta/se
+    insumstats = insumstats.copy()
+    # (6) ABF -> reciprocal
+    insumstats.loc[:, "log_ABF"] = 1/2* (np.log(1-r) + (r * z**2))
+    return insumstats
+def calc_PIP(insumstats,log=Log(),verbose=True,**kwargs):
+    # Calculate the logarithmic sum of each ABF to find the logarithm of total_abf
+    log_total_abf = np.log(np.sum(np.exp(insumstats["log_ABF"] - np.max(insumstats["log_ABF"])))) + np.max(insumstats["log_ABF"])
+    insumstats = insumstats.copy()
+    log.write("Start to calculate PIP for {} variants".format(len(insumstats)),verbose=verbose)
+    # Calculate PIP on a logarithmic scale by subtracting log_total_abf from each log_abf
+    insumstats.loc[:, "log_PIP"] = insumstats['log_ABF'] - log_total_abf
+    # Convert PIP on logarithmic scale to exponential and back to normal scale
+    insumstats.loc[:, "PIP"] = np.exp(insumstats['log_PIP'])
+    return insumstats
+def abf_finemapping(insumstats,region=None,chrpos=None,snpid=None, log=Log(),**kwargs):
+    if region is not None:
+        region_data = insumstats[(insumstats["CHR"] == region[0]) & (insumstats["POS"] >= region[1]) & (insumstats["POS"] <= region[2])]
+    elif chrpos is not None:
+        region_data = _get_flanking_by_chrpos(insumstats, chrpos=chrpos,**kwargs)
+    elif snpid is not None:
+        region_data = _get_flanking_by_id(insumstats, snpid=snpid,**kwargs)
+    region_data = calc_abf(region_data,log=log,**kwargs)
+    region_data = calc_PIP(region_data,log=log,**kwargs)
+    return region_data
+def make_cs(insumstats,threshold=0.95,log=Log(),verbose=True):
+    insumstats = insumstats.sort_values(by="PIP",ascending=False)
+    pip_sum = 0
+    cs = pd.DataFrame()
+    for index, row in insumstats.iterrows():
+        cs = pd.concat([cs,pd.DataFrame(row).T])
+        pip_sum += row["PIP"]
+        if pip_sum > threshold:
+            break
+    log.write("Finished constructing a {}% credible set with {} variant(s)".format(str(threshold * 100),str(len(cs))),verbose=verbose)
+    return cs

gwaslab/util_ex_calculate_ldmatrix.py CHANGED Viewed

@@ -17,6 +17,8 @@ def tofinemapping(sumstats,
                   vcf=None,
                   loci=None,
                   out="./",
+                  plink="plink",
+                  plink2="plink2",
                   windowsizekb=1000,
                   n_cores=1,
                   mode="r",
@@ -56,6 +58,9 @@ def tofinemapping(sumstats,
     else:
         sig_df = sumstats.loc[sumstats["SNPID"].isin(loci),:]
+    log.write(" -plink1.9 path: {}".format(plink),verbose=verbose)
+    log.write(" -plink2 path: {}".format(plink2),verbose=verbose)
     # Drop duplicate!!!!
     log.write(" -Dropping duplicated SNPIDs...",verbose=verbose)
     sumstats = sumstats.drop_duplicates(subset=["SNPID"]).copy()
@@ -68,11 +73,13 @@ def tofinemapping(sumstats,
     if exclude_hla==True:
         sig_df = _exclude_hla(sig_df, log=log, verbose=verbose)
+    sig_df = sig_df.reset_index()
     ## for each lead variant
     for index, row in sig_df.iterrows():
         # extract snplist in each locus
         gc.collect()
+        log.write(" -Locus #{}---------------------------------------------------------------".format(index+1))
         log.write(" -Processing locus with lead variant {} at CHR {} POS {} ...".format(row["SNPID"],row["CHR"],row["POS"]))
         locus_sumstats = _extract_variants_in_locus(sumstats, windowsizekb, locus = (row["CHR"],row["POS"]))
@@ -84,7 +91,10 @@ def tofinemapping(sumstats,
                                                                     n_cores=n_cores,
                                                                     log=log,
                                                                     load_bim=True,
-                                                                    overwrite=overwrite,**kwargs)
+                                                                    overwrite=overwrite,
+                                                                    plink=plink,
+                                                                    plink2=plink2,
+                                                                    **kwargs)
         ## check available snps with reference file
         matched_sumstats = _align_sumstats_with_bim(row=row,
@@ -114,7 +124,10 @@ def tofinemapping(sumstats,
                                                             windowsizekb=windowsizekb,
                                                             out=out,
                                                             plink_log=plink_log,
-                                                            log=log,filetype=filetype,
+                                                            log=log,
+                                                            filetype=filetype,
+                                                            plink=plink,
+                                                            plink2=plink2,
                                                             verbose=verbose)
@@ -143,12 +156,12 @@ def tofinemapping(sumstats,
-def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, windowsizekb,out,plink_log,log,memory,mode,filetype,verbose=True):
+def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, windowsizekb,out,plink_log,log,memory,mode,filetype,plink,plink2,verbose=True):
     '''
     Calculate LD r matrix by calling PLINK; return file name and log
     '''
     log.write(" -Start to calculate LD r matrix...",verbose=verbose)
-    log = _checking_plink_version(v=1, log=log)
+    log = _checking_plink_version(plink=plink, log=log)
     if "@" in bfile_prefix:
         bfile_to_use = bfile_prefix.replace("@",str(row["CHR"]))
     else:
@@ -165,7 +178,7 @@ def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, w
             raise ValueError("Please use bfile instead of pfile for PLINK1.")
         script_vcf_to_bfile = """
-        plink \
+        {} \
             --bfile {} \
             --keep-allele-order \
             --extract {} \
@@ -175,7 +188,7 @@ def _calculate_ld_r(study, matched_sumstats_snpid, row, bfile_prefix, n_cores, w
             --threads {} {}\
             --write-snplist \
             --out {}
-        """.format(bfile_to_use, snplist_path , row["CHR"], mode, n_cores, memory_flag if memory is not None else "", output_prefix)
+        """.format(plink, bfile_to_use, snplist_path , row["CHR"], mode, n_cores, memory_flag if memory is not None else "", output_prefix)
         try:
             output = subprocess.check_output(script_vcf_to_bfile, stderr=subprocess.STDOUT, shell=True,text=True)

gwaslab/util_ex_calculate_prs.py CHANGED Viewed

@@ -18,6 +18,8 @@ def _calculate_prs(sumstats,
           memory=None,
           overwrite=False,
           mode=None,delete=True,
+          plink="plink",
+          plink2="plink2",
           log=Log(),**kwargs):
     #matching_alleles
@@ -30,14 +32,18 @@ def _calculate_prs(sumstats,
         chrlist.sort()
         plink_log = ""
         #process reference fileWWW
-        bfile_prefix, plink_log, ref_bim, filetype = _process_plink_input_files(  chrlist=chrlist,
+        bfile_prefix, plink_log, ref_bim, filetype = _process_plink_input_files(
+                                                                    chrlist=chrlist,
                                                                     bfile=bfile,
                                                                     vcf=vcf,
                                                                     plink_log=plink_log,
                                                                     n_cores=n_cores,
                                                                     log=log,
                                                                     load_bim=False,
-                                                                    overwrite=overwrite,**kwargs)
+                                                                    overwrite=overwrite,
+                                                                    plink=plink,
+                                                                    plink2=plink2,
+                                                                    **kwargs)
         score_file_path_list =[]
         for index, chrom in enumerate(chrlist):
             chr_sumstats = sumstats.loc[sumstats["CHR"]==chrom,:].copy()
@@ -61,7 +67,7 @@ def _calculate_prs(sumstats,
                                plink_log=plink_log,
                                log=log,
                                memory=memory,
-                               mode=mode,filetype=filetype)
+                               mode=mode,filetype=filetype,plink2=plink2)
             score_file_path_list.append(score_file_path)
             if delete == True:
                 os.remove(model_path)
@@ -71,10 +77,10 @@ def _calculate_prs(sumstats,
-def _run_calculate_prs(study, chrom , model_path, bfile_prefix, n_cores, out, plink_log, log, memory,filetype, mode=None):
+def _run_calculate_prs(study, chrom , model_path, bfile_prefix, n_cores, out, plink_log, log, memory,filetype, plink2,mode=None):
     log.write(" -Start to calculate PRS for Chr {}...".format(chrom))
-    _checking_plink_version(v=2, log=log)
+    _checking_plink_version(plink2=plink2, log=log)
     if "@" in bfile_prefix:
         bpfile_to_use = bfile_prefix.replace("@",str(chrom))
@@ -92,13 +98,13 @@ def _run_calculate_prs(study, chrom , model_path, bfile_prefix, n_cores, out, pl
         memory_flag = "--memory {}".format(memory)
     script_vcf_to_bfile = """
-    plink2 \
+    {} \
         {} \
         --score {} 1 2 3 header {} cols=+scoresums,+denom ignore-dup-ids \
         --chr {} \
         --threads {} {}\
         --out {}
-    """.format(file_flag, model_path ,  mode if mode is not None else "", chrom, n_cores, memory_flag if memory is not None else "", output_prefix)
+    """.format(plink2, file_flag, model_path ,  mode if mode is not None else "", chrom, n_cores, memory_flag if memory is not None else "", output_prefix)
     try:
         output = subprocess.check_output(script_vcf_to_bfile, stderr=subprocess.STDOUT, shell=True,text=True)

gwaslab/util_ex_ldsc.py CHANGED Viewed

@@ -304,9 +304,16 @@ def _estimate_h2_by_ldsc(insumstats, log, verbose=True, munge=False, munge_args=
     log.write(" -LDSC log:", verbose=verbose)
     summary = estimate_h2(sumstats, default_args, log)
+    results_table = None
+    if type(summary) is tuple:
+        results_table = summary[1]
+        summary = summary[0]
+        log.write(" -Coefficient results have been stored in .ldsc_h2_results", verbose=verbose)
     log.write(" -Results have been stored in .ldsc_h2", verbose=verbose)
     finished(log=log,verbose=verbose,end_line=_end_line)
-    return parse_ldsc_summary(summary)
+    return parse_ldsc_summary(summary), results_table
 ####################################################################################################################

gwaslab/util_ex_process_ref.py CHANGED Viewed

@@ -20,7 +20,9 @@ def _process_plink_input_files(chrlist,
                                bgen_mode="ref-first",
                                convert="bfile",
                                memory=None,
-                               load_bim=False):
+                               load_bim=False,
+                               plink="plink",
+                               plink2="plink2"):
     """
     Process input files (bfile,pfile,vcf,bgen) to either PLINK1 bed/bim/fam or PLINK2 pgen/psam/pvar.
@@ -66,7 +68,9 @@ def _process_plink_input_files(chrlist,
                                                             convert=convert,
                                                             memory=memory,
                                                             overwrite=overwrite,
-                                                            load_bim=load_bim)
+                                                            load_bim=load_bim,
+                                                            plink=plink,
+                                                            plink2=plink2)
         filetype = convert
     elif filetype == "bgen":
         ref_file_prefix, plink_log, ref_bims = _process_bgen(ref_file_prefix=ref_file_prefix,
@@ -81,7 +85,9 @@ def _process_plink_input_files(chrlist,
                                                             convert=convert,
                                                             memory=memory,
                                                             overwrite=overwrite,
-                                                            load_bim=load_bim)
+                                                            load_bim=load_bim,
+                                                            plink=plink,
+                                                            plink2=plink2)
         filetype = convert
     return ref_file_prefix, plink_log, ref_bims, filetype
@@ -199,11 +205,13 @@ def _process_vcf(ref_file_prefix,
                  convert="bfile",
                  memory=None,
                  overwrite=False,
-                 load_bim=False):
+                 load_bim=False,
+                 plink="plink",
+                 plink2="plink2"):
     log.write(" -Processing VCF : {}...".format(ref_file_prefix))
     #check plink version
-    log = _checking_plink_version(v=2,log=log)
+    log = _checking_plink_version(plink2=plink2,log=log)
     # file path prefix to return
     if is_wild_card==True:
@@ -243,14 +251,15 @@ def _process_vcf(ref_file_prefix,
         #if not existing or overwrite is True
         if (not is_file_exist) or overwrite:
             script_vcf_to_bfile = """
-            plink2 \
+            {} \
                 --vcf {} \
                 --chr {} \
                 {} \
                 --rm-dup force-first \
                 --threads {}{}\
                 --out {}
-            """.format(vcf_to_load,
+            """.format(plink2,
+                        vcf_to_load,
                        i,
                        make_flag,
                        n_cores, memory_flag,
@@ -288,11 +297,13 @@ def _process_bgen(ref_file_prefix,
                   convert="bfile",
                   memory=None,
                   overwrite=False,
-                  load_bim=False):
+                  load_bim=False,
+                  plink="plink",
+                 plink2="plink2"):
     log.write(" -Processing BGEN files : {}...".format(ref_file_prefix))
     #check plink version
-    log = _checking_plink_version(v=2,log=log)
+    log = _checking_plink_version(log=log,plink2=plink2)
     # file path prefix to return
     if is_wild_card==True:
@@ -338,14 +349,14 @@ def _process_bgen(ref_file_prefix,
         #if not existing or overwrite is True
         if (not is_file_exist) or overwrite:
             script_vcf_to_bfile = """
-            plink2 \
+            {} \
                 --bgen {} {} {}\
                 --chr {} \
                 {} \
                 --rm-dup force-first \
                 --threads {}{}\
                 --out {}
-            """.format(bgen_to_load, bgen_mode, sample_flag,
+            """.format(plink2,bgen_to_load, bgen_mode, sample_flag,
                        i,
                        make_flag,
                        n_cores, memory_flag,

gwaslab/util_ex_run_clumping.py CHANGED Viewed

@@ -11,7 +11,7 @@ from gwaslab.g_version import _checking_plink_version
 def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
            p="P",mlog10p="MLOG10P", overwrite=False, study=None, bfile=None,
            n_cores=1, memory=None, chrom=None, clump_p1=5e-8, clump_p2=5e-8, clump_r2=0.01, clump_kb=250,
-           log=Log(),verbose=True):
+           log=Log(),verbose=True,plink="plink",plink2="plink2"):
     ##start function with col checking##########################################################
     _start_line = "perfrom clumping"
     _end_line = "clumping"
@@ -111,7 +111,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
             bfile_to_use = bfile
         log.write(" -Performing clumping for CHR {}...".format(i),verbose=verbose)
-        log = _checking_plink_version(v=2, log=log)
+        log = _checking_plink_version(plink2=plink2, log=log)
         if memory is not None:
             memory_flag = "--memory {}".format(memory)
@@ -123,7 +123,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
         if scaled == True:
             # clumping using LOG10P
             script = """
-            plink2 \
+            {} \
                 {}\
                 --chr {} \
                 --clump {} \
@@ -136,11 +136,11 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
                 --clump-kb {} \
                 --threads {} {}\
                 --out {}
-            """.format(file_flag, chrom, clump, mlog10p,clump_log10_p1, clump_log10_p2, clump_r2, clump_kb, n_cores, memory_flag if memory is not None else "", out_single_chr)
+            """.format(plink2, file_flag, chrom, clump, mlog10p,clump_log10_p1, clump_log10_p2, clump_r2, clump_kb, n_cores, memory_flag if memory is not None else "", out_single_chr)
         else:
             # clumping using P
             script = """
-            plink2 \
+            {} \
                 {}\
                 --chr {} \
                 --clump {} \
@@ -152,7 +152,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
                 --clump-kb {} \
                 --threads {} {}\
                 --out {}
-            """.format(file_flag, chrom, clump, p, clump_p1, clump_p2, clump_r2, clump_kb, n_cores,memory_flag if memory is not None else "", out_single_chr)
+            """.format(plink2,file_flag, chrom, clump, p, clump_p1, clump_p2, clump_r2, clump_kb, n_cores,memory_flag if memory is not None else "", out_single_chr)
         try:
             output = subprocess.check_output(script, stderr=subprocess.STDOUT, shell=True,text=True)

gwaslab/viz_aux_annotate_plot.py CHANGED Viewed

@@ -38,6 +38,7 @@ def annotate_single(
     region,
     region_anno_bbox_args,
     skip,
+    anno_height=1,
     amode="int",
     snpid="SNPID",
     chrom="CHR",
@@ -131,7 +132,7 @@ def annotate_single(
             #xy=(row["i"],row["scaled_P"]+0.2)
             xy=(row["i"],row["scaled_P"]+0.01*maxy)
-            xytext=(last_pos,1.15*maxy*arm_scale)
+            xytext=(last_pos,1.15*maxy*arm_scale*anno_height)
             if anno_fixed_arm_length is not None:
                 armB_length_in_point = anno_fixed_arm_length

gwaslab/viz_aux_chromatin.py CHANGED Viewed

@@ -63,6 +63,8 @@ def _plot_chromatin_state(region_chromatin_files,
                           fig,
                           ax,
                           xlim_i,
+                          fontsize = 12,
+                          font_family = "Arial",
                           log=Log(),
                           verbose=True):
     '''
@@ -101,11 +103,10 @@ def _plot_chromatin_state(region_chromatin_files,
     ## add stripe label
     if len(region_chromatin_labels) == len(region_chromatin_files):
-        ax.set_yticks([i*0.1 for i in range(len(region_chromatin_labels))],
-                      region_chromatin_labels)
+        ax.set_yticks([i*0.1 for i in range(len(region_chromatin_labels))], region_chromatin_labels, fontsize=fontsize, family=font_family)
     else:
         ax.set_yticks(ticks=[])
-    ax.set_xticks(ticks=[])
+    #ax.set_xticks(ticks=[])
     ax.invert_yaxis()
     return fig

gwaslab/viz_aux_quickfix.py CHANGED Viewed

@@ -286,8 +286,9 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
     log.write(" -Converting data above cut line...",verbose=verbose)
     if ylabels is not None:
         ylabels = pd.Series(ylabels)
-    maxy = series.max()
     series = series.copy()
+    maxy = series.max()
     if "b" not in mode:
         log.write(" -Maximum -log10(P) value is "+str(maxy) +" .", verbose=verbose)
     elif "b" in mode:

gwaslab/viz_plot_compare_effect.py CHANGED Viewed

@@ -77,8 +77,10 @@ def compare_effect(path1,
         scaled2 = True
     if is_q_mc=="fdr" or is_q_mc=="bon":
         is_q = True
-    else:
-        raise ValueError("Please select either fdr or bon for is_q_mc.")
+    if is_q == True:
+        if is_q_mc not in [False,"fdr","bon","non"]:
+            raise ValueError("Please select either fdr or bon or non for is_q_mc.")
     if save_args is None:
         save_args = {"dpi":300,"facecolor":"white"}
     if reg_box is None:

gwaslab/viz_plot_miamiplot2.py CHANGED Viewed

@@ -247,7 +247,10 @@ def plot_miami2(
         plt.subplots_adjust(hspace=region_hspace)
     else:
         fig, ax1, ax5 = figax
+    #if same_ylim==True:
+        #maxy = merged_sumstats[["scaled_P_1","scaled_P_2"]].max().max()
     log.write("Start to create Manhattan plot for sumstats1...", verbose=verbose)
     fig,log = mqqplot(merged_sumstats,
                       chrom="CHR",
@@ -284,14 +287,8 @@ def plot_miami2(
                       _if_quick_qc=False,
                      **mqq_args2)
     log.write("Finished creating Manhattan plot for sumstats2".format(_get_version()), verbose=verbose)
-    if same_ylim==True:
-        ylim1_converted = ax1.get_ylim()
-        ylim2_converted = ax5.get_ylim()
-        if ylim1_converted > ylim2_converted:
-            ax5.set_ylim(ylim1_converted)
-        else:
-            ax1.set_ylim(ylim2_converted)
     #####################################################################################################################
     ax5.set_xlabel("")

gwaslab 3.4.46__py3-none-any.whl → 3.4.48__py3-none-any.whl

Potentially problematic release.

gwaslab 3.4.46py3-none-any.whl → 3.4.48py3-none-any.whl