PyPI - gwaslab - Versions diffs - 3.4.44__py3-none-any.whl → 3.4.46__py3-none-any.whl - Mend

gwaslab 3.4.44py3-none-any.whl → 3.4.46py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (24) hide show

gwaslab/__init__.py +2 -1
gwaslab/bd_common_data.py +22 -0
gwaslab/g_Sumstats.py +2 -0
gwaslab/g_SumstatsPair.py +1 -1
gwaslab/g_vchange_status.py +4 -2
gwaslab/g_version.py +2 -2
gwaslab/hm_harmonize_sumstats.py +14 -6
gwaslab/io_preformat_input.py +22 -1
gwaslab/qc_fix_sumstats.py +8 -1
gwaslab/util_in_filter_value.py +38 -2
gwaslab/util_in_get_sig.py +32 -8
gwaslab/util_in_meta.py +234 -0
gwaslab/util_in_snphwe.py +58 -0
gwaslab/viz_aux_chromatin.py +111 -0
gwaslab/viz_plot_compare_effect.py +4 -1
gwaslab/viz_plot_mqqplot.py +2 -0
gwaslab/viz_plot_regionalplot.py +4 -0
gwaslab/viz_plot_stackedregional.py +69 -13
{gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/METADATA +7 -7
{gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/RECORD +24 -21
{gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/WHEEL +1 -1
{gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/LICENSE +0 -0
{gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/LICENSE_before_v3.4.39 +0 -0
{gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/top_level.txt +0 -0

gwaslab/__init__.py CHANGED Viewed

@@ -44,4 +44,5 @@ from gwaslab.viz_plot_trumpetplot import plot_power
 from gwaslab.viz_plot_trumpetplot import plot_power_x
 from gwaslab.util_ex_process_h5 import process_vcf_to_hfd5
 from gwaslab.util_ex_run_susie import _run_susie_rss as run_susie_rss
-from gwaslab.io_read_tabular import _read_tabular as read_tabular
+from gwaslab.io_read_tabular import _read_tabular as read_tabular
+from gwaslab.util_in_meta import meta_analyze

gwaslab/bd_common_data.py CHANGED Viewed

@@ -298,6 +298,28 @@ def gtf_to_protein_coding(gtfpath,log=Log(),verbose=True):
     return protein_coding_path
+def gtf_to_all_gene(gtfpath,log=Log(),verbose=True):
+    all_gene_path = gtfpath[:-6]+"all_genes.gtf.gz"
+    # if not existing, extract protein coding records and output to a new file
+    if not path.isfile(all_gene_path):
+        # get gene list
+        log.write(" - Extracting genes from {}".format(gtfpath),verbose=verbose)
+        gtf = read_gtf(gtfpath,usecols=["feature","gene_biotype","gene_id","gene_name"])
+        gene_list = gtf.loc[gtf["feature"]=="gene","gene_id"].values
+        log.write(" - Loaded {} genes.".format(len(gene_list)),verbose=verbose)
+        # extract entry using csv
+        gtf_raw = pd.read_csv(gtfpath,sep="\t",header=None,comment="#",dtype="string")
+        gtf_raw["_gene_id"] = gtf_raw[8].str.extract(r'gene_id "([\w\.-]+)"')
+        gtf_raw = gtf_raw.loc[ gtf_raw["_gene_id"].isin(gene_list) ,:]
+        gtf_raw = gtf_raw.drop("_gene_id",axis=1)
+        log.write(" - Extracted records are saved to : {} ".format(all_gene_path),verbose=verbose)
+        gtf_raw.to_csv(all_gene_path, header=None, index=None, sep="\t")
+    return all_gene_path
 ####################################################################################################################
 # From BioPython: https://github.com/biopython/biopython/blob/c5a6b1374267d769b19c1022b4b45472316e78b4/Bio/Seq.py#L36
 def _maketrans(complement_mapping):

gwaslab/g_Sumstats.py CHANGED Viewed

@@ -121,6 +121,7 @@ class Sumstats():
              snpr2=None,
              status=None,
              other=[],
+             usekeys=None,
              direction=None,
              verbose=True,
              study="Study_1",
@@ -200,6 +201,7 @@ class Sumstats():
           trait=trait,
           status=status,
           other=other,
+          usekeys=usekeys,
           verbose=verbose,
           readargs=readargs,
           log=self.log)

gwaslab/g_SumstatsPair.py CHANGED Viewed

@@ -139,7 +139,7 @@ class SumstatsPair( ):
         self.clumps["clumps"], self.clumps["plink_log"] = _clump(self.data, log=self.log, p="P_1",mlog10p="MLOG10P_1", study = self.study_name, **kwargs)
     def to_coloc(self,**kwargs):
-        self.to_finemapping_file_path, self.plink_log = tofinemapping(self.data,study=self.study_name,suffixes=self.suffixes,log=self.log,**kwargs)
+        self.to_finemapping_file_path, output_file_list, self.plink_log = tofinemapping(self.data,study=self.study_name,suffixes=self.suffixes,log=self.log,**kwargs)
     def run_coloc_susie(self,**kwargs):

gwaslab/g_vchange_status.py CHANGED Viewed

@@ -1,13 +1,15 @@
 import pandas as pd
+CATEGORIES = {str(j+i) for j in [1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
 def vchange_status(status,digit,before,after):
     dic={}
     for i in range(len(before)):
         dic[before[i]]=after[i]
     if digit>1:
-        return status.str[:digit-1]+status.str[digit-1].replace(dic)+status.str[digit:]
+        return pd.Categorical(status.str[:digit-1]+status.str[digit-1].replace(dic)+status.str[digit:],categories=CATEGORIES)
     else:
-        return status.str[digit-1].replace(dic)+status.str[digit:]
+        return pd.Categorical(status.str[digit-1].replace(dic)+status.str[digit:],categories=CATEGORIES)
 def copy_status(from_status,to_status, digit):
     if digit>1:

gwaslab/g_version.py CHANGED Viewed

@@ -15,8 +15,8 @@ def _get_version():
 def gwaslab_info():
     # version meta information
     dic={
-   "version":"3.4.44",
-   "release_date":"20240424"
+   "version":"3.4.46",
+   "release_date":"20240624"
     }
     return dic

gwaslab/hm_harmonize_sumstats.py CHANGED Viewed

@@ -355,7 +355,11 @@ def oldcheckref(sumstats,ref_seq,chrom="CHR",pos="POS",ea="EA",nea="NEA",status=
     log.write("\n",end="",show_time=False,verbose=verbose)
-    sumstats[status] = sumstats[status].astype("string")
+    CATEGORIES = {str(j+i) for j in [1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
+    sumstats[status] = pd.Categorical(sumstats[status],categories=CATEGORIES)
+    #sumstats[status] = sumstats[status].astype("string")
     available_to_check =sum( (~sumstats[pos].isna()) & (~sumstats[nea].isna()) & (~sumstats[ea].isna()))
     status_0=sum(sumstats["STATUS"].str.match("\w\w\w\w\w[0]\w", case=False, flags=0, na=False))
     status_3=sum(sumstats["STATUS"].str.match("\w\w\w\w\w[3]\w", case=False, flags=0, na=False))
@@ -669,9 +673,11 @@ def checkref(sumstats,ref_seq,chrom="CHR",pos="POS",ea="EA",nea="NEA",status="ST
         sumstats_to_check = sumstats.loc[to_check_ref,[chrom,pos,ea,nea,status]]
         sumstats.loc[to_check_ref,status] = check_status(sumstats_to_check, all_records_dict, log=log, verbose=verbose)
         log.write(" -Finished checking records", verbose=verbose)
-    sumstats[status] = sumstats[status].astype("string")
+    CATEGORIES = {str(j+i) for j in [1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
+    sumstats[status] = pd.Categorical(sumstats[status],categories=CATEGORIES)
+    #sumstats[status] = sumstats[status].astype("string")
     available_to_check =sum( (~sumstats[pos].isna()) & (~sumstats[nea].isna()) & (~sumstats[ea].isna()))
     status_0=sum(sumstats["STATUS"].str.match("\w\w\w\w\w[0]\w", case=False, flags=0, na=False))
     status_3=sum(sumstats["STATUS"].str.match("\w\w\w\w\w[3]\w", case=False, flags=0, na=False))
@@ -700,6 +706,7 @@ def checkref(sumstats,ref_seq,chrom="CHR",pos="POS",ea="EA",nea="NEA",status="ST
     if remove is True:
         sumstats = sumstats.loc[~sumstats["STATUS"].str.match("\w\w\w\w\w[8]\w"),:]
         log.write(" -Variants not on given reference sequence were removed.",verbose=verbose)
     finished(log, verbose, _end_line)
     return sumstats
@@ -861,8 +868,9 @@ def parallelizeassignrsid(sumstats, path, ref_mode="vcf",snpid="SNPID",rsid="rsI
         if is_enough_info == False: return sumstats
         ############################################################################################
-        standardized_normalized = sumstats["STATUS"].str.match("\w\w\w[0][01234]\w\w", case=False, flags=0, na=False)
+        #standardized_normalized = sumstats["STATUS"].str.match("\w\w\w[0][01234]\w\w", case=False, flags=0, na=False)
+        standardized_normalized = sumstats["STATUS"] == sumstats["STATUS"]
         if rsid not in sumstats.columns:
             sumstats[rsid]=pd.Series(dtype="string")

gwaslab/io_preformat_input.py CHANGED Viewed

@@ -55,6 +55,7 @@ def preformat(sumstats,
           trait=None,
           build=None,
           other=[],
+          usekeys=None,
           verbose=False,
           readargs=None,
           log=None):
@@ -65,6 +66,11 @@ def preformat(sumstats,
     dtype_dictionary ={}
  #######################################################################################################################################################
+    # workflow:
+    # 1. formatbook
+    # 2. user specified header
+    # 3. usekeys
     if fmt is not None:
         # loading format parameters
         log.write("Start to load format from formatbook....",verbose=verbose)
@@ -129,6 +135,8 @@ def preformat(sumstats,
         ################################################
         for key,value in rename_dictionary.items():
+            # check avaiable keys  key->raw header
+            # usecols : a list of raw headers to load from file/DataFrame
             if key in raw_cols:
                 usecols.append(key)
             if value in ["EA","NEA"]:
@@ -137,7 +145,7 @@ def preformat(sumstats,
                 dtype_dictionary[value]="string"
     except ValueError:
-        raise ValueError("Please input a path or a pd.DataFrame, and make sure the columns you specified are in the file.")
+        raise ValueError("Please input a path or a pd.DataFrame, and make sure the separator is correct and the columns you specified are in the file.")
     ###################################################################################################################################################
     ## check columns/datatype to use
@@ -276,6 +284,19 @@ def preformat(sumstats,
         else:
             study = raw_cols[9]
             usecols =  usecols + [study]
+    if usekeys is not None:
+    # extract only specified keys
+        usecols_new =[]
+        for i in usekeys:
+            for k, v in rename_dictionary.items():
+                if i == v:
+                    usecols_new.append(k)
+        usecols_valid =[]
+        for i in usecols_new:
+            if i in usecols:
+                usecols_valid.append(i)
+        usecols = usecols_valid
  #loading data ##########################################################################################################
     try:

gwaslab/qc_fix_sumstats.py CHANGED Viewed

@@ -1061,6 +1061,13 @@ def check_range(sumstats, var_range, header, coltocheck, cols_to_check, log, ver
             if sum(is_low_p) >0:
                 log.warning("Extremely low P detected (P=0 or P < minimum positive value of float64) : {}".format(sum(is_low_p)))
                 log.warning("Please consider using MLOG10P instead.")
+        if header=="INFO":
+            is_high_info =  sumstats["INFO"]>1
+            if sum(is_high_info) >0:
+                log.warning("High INFO detected (INFO>1) : {}".format(sum(is_high_info)))
+                log.warning("max(INFO): {}".format(sumstats["INFO"].max()))
+                log.warning("Please check if this is as expected.")
         if sum(~is_valid)>0:
             try:
@@ -1102,7 +1109,7 @@ def sanitycheckstats(sumstats,
                      HR=(-100,100),
                      HR_95L=(0,float("Inf")),
                      HR_95U=(0,float("Inf")),
-                     info=(0,1),
+                     info=(0,2),
                      float_tolerence = 1e-7,
                      verbose=True,
                      log=Log()):

gwaslab/util_in_filter_value.py CHANGED Viewed

@@ -10,6 +10,7 @@ from gwaslab.g_vchange_status import vchange_status
 from gwaslab.qc_fix_sumstats import sortcoordinate
 from gwaslab.qc_fix_sumstats import start_to
 from gwaslab.qc_fix_sumstats import finished
+from gwaslab.qc_fix_sumstats import _process_build
 from gwaslab.hm_harmonize_sumstats import is_palindromic
 import gc
@@ -430,8 +431,43 @@ def _filter_snp(sumstats, mode="in", ea="EA",nea="NEA", log=Log(),verbose=True):
     log.write("Finished filtering SNPs.",verbose=verbose)
     return snp
-def _exclude_hla(sumstats, chrom="CHR", pos="POS", lower=25000000 ,upper=34000000 ,log=Log(), verbose=True):
+def _exclude_hla(sumstats, chrom="CHR", pos="POS", lower=None ,upper=None, build=None, mode="xmhc", log=Log(), verbose=True):
+    if build is not None:
+        build = _process_build(build = build,log = log,verbose = verbose)
+        # xMHC : HIST1H2AA ~ 7.6mb ~ RPL12P1
+        # reference: Horton, R., Wilming, L., Rand, V., Lovering, R. C., Bruford, E. A., Khodiyar, V. K., ... & Beck, S. (2004). Gene map of the extended human MHC. Nature Reviews Genetics, 5(12), 889-899.
+        # hg38:  25,726,063 ~ 33,400,644
+        # hg19 : 25,726,291 ~ 33,368,421
+        # HLA : GABBR1 ~ 3.78mb ~ KIFC1
+        # reference: Shiina, T., Hosomichi, K., Inoko, H., & Kulski, J. K. (2009). The HLA genomic loci map: expression, interaction, diversity and disease. Journal of human genetics, 54(1), 15-39.
+        # hg38:  29,602,238 ~ 33,409,896
+        # hg19:  29,570,015 ~ 33,377,673
+        if build == "19":
+            if mode =="xmhc":
+                lower=25000000
+                upper=34000000
+            if mode =="hla" or mode =="mhc":
+                lower=29500000
+                upper=33500000
+        if build == "38":
+            if mode =="xmhc":
+                lower=25000000
+                upper=34000000
+            if mode =="hla" or mode =="mhc":
+                lower=29500000
+                upper=33500000
+    else:
+        # -> 25,000,000 ~ 34,000,000
+        if mode =="xmhc":
+            lower=25000000
+            upper=34000000
+        if mode =="hla" or mode =="mhc":
+            lower=29500000
+            upper=33500000
     raw_len = len(sumstats)
     if str(sumstats[chrom].dtype) == "string":

gwaslab/util_in_get_sig.py CHANGED Viewed

@@ -11,6 +11,7 @@ from gwaslab.bd_common_data import get_chr_to_number
 from gwaslab.bd_common_data import get_number_to_chr
 from gwaslab.bd_common_data import get_chr_to_NC
 from gwaslab.bd_common_data import gtf_to_protein_coding
+from gwaslab.bd_common_data import gtf_to_all_gene
 from gwaslab.bd_download import check_and_download
 from gwaslab.util_ex_gwascatalog import gwascatalog_trait
 from gwaslab.qc_fix_sumstats import check_dataframe_shape
@@ -38,6 +39,7 @@ def getsig(insumstats,
            wc_correction=False,
            build="19",
            source="ensembl",
+           gtf_path=None,
            verbose=True):
     """
     Extract the lead variants using a sliding window. P or MLOG10P will be used and converted to SCALEDP for sorting.
@@ -172,6 +174,7 @@ def getsig(insumstats,
                xymt=xymt,
                build=build,
                source=source,
+               gtf_path=gtf_path,
                verbose=verbose)
     # drop internal id
@@ -253,6 +256,7 @@ def annogene(
            xymt=["X","Y","MT"],
            build="19",
            source="ensembl",
+           gtf_path=None,
            verbose=True):
     log.write("Start to annotate variants with nearest gene name(s)...", verbose=verbose)
@@ -267,8 +271,13 @@ def annogene(
             #| gzip >Homo_sapiens.GRCh37.75.processed.chr.gtf.gz
             #gtf_path = check_and_download("ensembl_hg19_gtf_protein_coding")
-            gtf_path = check_and_download("ensembl_hg19_gtf")
-            gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
+            if gtf_path is None:
+                gtf_path = check_and_download("ensembl_hg19_gtf")
+                gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
+            else:
+                log.write(" -Using user-provided gtf:{}".format(gtf_path))
+                gtf_path = gtf_to_all_gene(gtf_path,log=log,verbose=verbose)
             gtf_db_path = gtf_path[:-2]+"db"
             data = Genome(
@@ -283,8 +292,13 @@ def annogene(
         elif build=="38":
             log.write(" -Assigning Gene name using ensembl_hg38_gtf for protein coding genes", verbose=verbose)
             #gtf_path = check_and_download("ensembl_hg38_gtf_protein_coding")
-            gtf_path = check_and_download("ensembl_hg38_gtf")
-            gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
+            if gtf_path is None:
+                gtf_path = check_and_download("ensembl_hg38_gtf")
+                gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
+            else:
+                log.write(" -Using user-provided gtf:{}".format(gtf_path))
+                gtf_path = gtf_to_all_gene(gtf_path,log=log,verbose=verbose)
             gtf_db_path = gtf_path[:-2]+"db"
             data = Genome(
                 reference_name='GRCh38',
@@ -300,8 +314,13 @@ def annogene(
         if build=="19":
             log.write(" -Assigning Gene name using NCBI refseq latest GRCh37 for protein coding genes", verbose=verbose)
             #gtf_path = check_and_download("refseq_hg19_gtf_protein_coding")
-            gtf_path = check_and_download("refseq_hg19_gtf")
-            gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
+            if gtf_path is None:
+                gtf_path = check_and_download("refseq_hg19_gtf")
+                gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
+            else:
+                log.write(" -Using user-provided gtf:{}".format(gtf_path))
+                gtf_path = gtf_to_all_gene(gtf_path,log=log,verbose=verbose)
             gtf_db_path = gtf_path[:-2]+"db"
             data = Genome(
                 reference_name='GRCh37',
@@ -315,8 +334,13 @@ def annogene(
         elif build=="38":
             log.write(" -Assigning Gene name using NCBI refseq latest GRCh38 for protein coding genes", verbose=verbose)
             #gtf_path = check_and_download("refseq_hg38_gtf_protein_coding")
-            gtf_path = check_and_download("refseq_hg38_gtf")
-            gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
+            if gtf_path is None:
+                gtf_path = check_and_download("refseq_hg38_gtf")
+                gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
+            else:
+                log.write(" -Using user-provided gtf:{}".format(gtf_path))
+                gtf_path = gtf_to_all_gene(gtf_path,log=log,verbose=verbose)
             gtf_db_path = gtf_path[:-2]+"db"
             data = Genome(
                 reference_name='GRCh38',

gwaslab/util_in_meta.py ADDED Viewed

@@ -0,0 +1,234 @@
+import pandas as pd
+import numpy as np
+from scipy.stats.distributions import chi2
+from scipy.stats import norm
+from gwaslab.g_Log import Log
+from gwaslab.io_to_pickle import load_data_from_pickle
+from gwaslab.g_Sumstats import Sumstats
+import gc
+def meta_analyze(sumstats_list,random_effects=False, match_allele=True, log=Log()):
+    ###########################################################################
+    columns=["SNPID","CHR","POS","EA","NEA"]
+    results_df = pd.DataFrame(columns=columns)
+    log.write("Start to perform meta-analysis...")
+    log.write(" -Datasets:")
+    for index,sumstats_path in enumerate(sumstats_list):
+        if isinstance(sumstats_path, pd.DataFrame):
+            log.write("  -Sumstats #{}: {} ".format(index, sumstats_path))
+        elif isinstance(sumstats_path, Sumstats):
+            log.write("  -Sumstats #{}: {} ".format(index, sumstats_path))
+        else:
+            log.write("  -Sumstats #{}: {} ".format(index, sumstats_path))
+    # extract all variants information
+    log.write(" -Iterating through {} datasets to determine variant list...".format(len(sumstats_list)))
+    for index,sumstats_path in enumerate(sumstats_list):
+        sumstats = get_sumstats(sumstats_path,usekeys=["SNPID","CHR","POS","EA","NEA"])
+        new_rows = sumstats.loc[~sumstats["SNPID"].isin(results_df["SNPID"]),["SNPID","CHR","POS","EA","NEA"]]
+        log.write("  -Sumstats #{}: {} new variants (out of {}) are being added to analysis...".format(index, len(new_rows),len(sumstats)))
+        if len(new_rows)>0:
+            if len(results_df) == 0:
+                results_df = new_rows
+            else:
+                results_df = pd.concat([results_df, new_rows],ignore_index=True)
+        del sumstats
+        del new_rows
+        gc.collect()
+    ###########################################################################
+    log.write(" -Initiating result DataFrame...")
+    columns=["SNPID","CHR","POS","EA","NEA","_BETAW_SUM","_EA_N","_NEA_N","_BETA2W_SUM","_W_SUM","EAF","N","DIRECTION","BETA","SE","DOF"]
+    results_df = results_df.set_index("SNPID")
+    results_df["N"] = 0
+    results_df["_BETAW_SUM"] = 0.0
+    results_df["_BETA2W_SUM"] = 0.0
+    results_df["_W_SUM"] = 0.0
+    results_df["_W2_SUM"] = 0.0
+    results_df["_EA_N"] = 0.0
+    results_df["_NEA_N"] = 0.0
+    results_df["N"] = 0
+    results_df["DIRECTION"] = ""
+    results_df["BETA"] = 0.0
+    results_df["SE"] = 0.0
+    results_df["DOF"] = -1
+    dtype_dict ={
+        "_BETAW_SUM":"float64",
+        "_EA_N":"float64",
+        "_NEA_N":"float64",
+        "_BETA2W_SUM":"float64",
+        "_W_SUM":"float64",
+        "BETA":"float64",
+        "SE":"float64",
+        "N":"Int64",
+        "DOF":"Int64"
+    }
+    results_df=results_df.astype(dtype_dict)
+    ###########################################################################
+    log.write(" -Iterating through {} datasets to compute statistics for fixed-effect model...".format(len(sumstats_list)))
+    for index,sumstats_path in enumerate(sumstats_list):
+        to_use_sumstats = process_sumstats(sumstats_path,
+                                           results_df[["EA","NEA"]],
+                                           index=index,
+                                           match_allele=match_allele,)
+        sumstats_index = to_use_sumstats.index
+        results_df_not_in_sumstat_index = results_df.index[~results_df.index.isin(to_use_sumstats.index)]
+        # N and DOF
+        results_df.loc[sumstats_index, "N"]         += to_use_sumstats["N"]
+        results_df.loc[sumstats_index, "DOF"]       += 1
+        # BEAT and SE
+        results_df.loc[sumstats_index,"_BETA2W_SUM"] += to_use_sumstats["BETA"]**2 *(1/(to_use_sumstats["SE"]**2))
+        results_df.loc[sumstats_index,"_BETAW_SUM"]  += to_use_sumstats["BETA"]*(1/(to_use_sumstats["SE"]**2))
+        results_df.loc[sumstats_index,"_W_SUM"]      += 1/(to_use_sumstats["SE"]**2)
+        results_df.loc[sumstats_index,"_W2_SUM"]     += results_df.loc[sumstats_index,"_W_SUM"]**2
+        # EAF
+        results_df.loc[sumstats_index,"_EA_N"] += to_use_sumstats["N"]*to_use_sumstats["EAF"]
+        results_df.loc[sumstats_index,"_NEA_N"]  += to_use_sumstats["N"]*(1 - to_use_sumstats["EAF"])
+        # DIRECTION
+        beta_index = to_use_sumstats[to_use_sumstats["BETA"]>0].index
+        results_df.loc[beta_index, "DIRECTION"] += "+"
+        beta_index = to_use_sumstats[to_use_sumstats["BETA"]==0].index
+        results_df.loc[beta_index, "DIRECTION"] += "0"
+        beta_index = to_use_sumstats[to_use_sumstats["BETA"]<0].index
+        results_df.loc[beta_index, "DIRECTION"] += "-"
+        results_df.loc[results_df_not_in_sumstat_index, "DIRECTION"] += "?"
+        del to_use_sumstats
+        gc.collect()
+    ##############################################################################
+    # fixed - effect statistics
+    results_df["BETA"] = results_df["_BETAW_SUM"] / results_df["_W_SUM"]
+    results_df["EAF"] = results_df["_EA_N"] / (results_df["_EA_N"] + results_df["_NEA_N"])
+    results_df["SE"] = np.sqrt(1/results_df["_W_SUM"])
+    results_df["Z"] = results_df["BETA"] / results_df["SE"]
+    results_df["P"] = norm.sf(abs(results_df["Z"]))*2
+    results_df["Q"] = results_df["_BETA2W_SUM"] - (results_df["_BETAW_SUM"]**2 / results_df["_W_SUM"])
+    for dof in results_df["DOF"].unique():
+        results_df_dof_index = results_df["DOF"] == dof
+        results_df.loc[results_df_dof_index,"P_HET"] = chi2.sf(results_df.loc[results_df_dof_index, "Q"].values,dof)
+        gc.collect()
+    results_df["I2_HET"] = (results_df["Q"] - results_df["DOF"])/results_df["Q"]
+    results_df.loc[results_df["I2_HET"]<0, "I2_HET"] = 0
+    results_df=results_df.drop(columns=["_EA_N","_NEA_N"])
+    gc.collect()
+    ###########################################################################
+    if random_effects==True:
+        log.write(" -Iterating through {} datasets to compute statistics for random-effects model...".format(len(sumstats_list)))
+        results_df["_R2"] = (results_df["Q"] - results_df["DOF"])/(results_df["_W_SUM"] - (results_df["_W2_SUM"]/results_df["_W_SUM"]))
+        results_df.loc[results_df["_R2"]<0, "_R2"] = 0
+        variant_index_random = results_df[results_df["_R2"]>0].index
+        results_df["_BETAW_SUM_R"] = 0.0
+        results_df["_W_SUM_R"] = 0.0
+        results_df["BETA_RANDOM"] = results_df["BETA"]
+        results_df["SE_RANDOM"] = results_df["SE"]
+        for index,sumstats_path in enumerate(sumstats_list):
+            to_use_sumstats = process_sumstats(sumstats_path,
+                                               results_df.loc[variant_index_random, ["EA","NEA"]],
+                                               index=index,
+                                               match_allele=match_allele,
+                                               extract_index=variant_index_random)
+            sumstats_index = to_use_sumstats.index
+            # BEAT and SE
+            results_df.loc[sumstats_index,"_BETAW_SUM_R"]  += to_use_sumstats["BETA"]*(1/(to_use_sumstats["SE"]**2 + results_df.loc[sumstats_index,"_R2"]))
+            results_df.loc[sumstats_index,"_W_SUM_R"]      += 1/(to_use_sumstats["SE"]**2 + results_df.loc[sumstats_index,"_R2"])
+            del to_use_sumstats
+            del sumstats_index
+            gc.collect()
+        results_df.loc[variant_index_random,"BETA_RANDOM"] = results_df.loc[variant_index_random,"_BETAW_SUM_R"] / results_df.loc[variant_index_random,"_W_SUM_R"]
+        results_df.loc[variant_index_random,"SE_RANDOM"] = np.sqrt(1/results_df.loc[variant_index_random,"_W_SUM_R"])
+        results_df["Z_RANDOM"] = results_df["BETA_RANDOM"] / results_df["SE_RANDOM"]
+        results_df["P_RANDOM"] = norm.sf(abs(results_df["Z_RANDOM"]))*2
+        results_df = results_df.drop(columns=["_BETAW_SUM_R","_W_SUM_R"])
+        gc.collect()
+    ###########################################################################
+    results_df = results_df.drop(columns=["_BETAW_SUM","_BETA2W_SUM","_W_SUM","_R2","_W2_SUM"]).sort_values(by=["CHR","POS"])
+    gc.collect()
+    log.write("Finished meta-analysis successfully!")
+    return results_df
+def process_sumstats(sumstats_path, results_df, index, extract_index=None, match_allele=True, log=Log()):
+    if extract_index is None:
+        extract_index = results_df.index
+    sumstats = get_sumstats(sumstats_path)
+    to_use_sumstats = sumstats.loc[sumstats["SNPID"].isin(extract_index.values),["SNPID","EA","NEA","BETA","N","SE","EAF"]]
+    if len(to_use_sumstats)>0:
+        n_pre_dup = len(to_use_sumstats)
+        log.write("  -Processing {} variants from sumstats #{}".format(len(to_use_sumstats), index))
+        to_use_sumstats = to_use_sumstats.drop_duplicates(subset="SNPID").set_index("SNPID")
+        n_post_dup = len(to_use_sumstats)
+        if n_pre_dup - n_post_dup>0:
+            log.write("  -Dropping {} duplicated variants from sumstats #{}".format(n_pre_dup - n_post_dup, index))
+        if match_allele==True:
+            sumstats_index = to_use_sumstats.index
+            # drop not matched
+            is_match = (to_use_sumstats.loc[sumstats_index,"EA"] == results_df.loc[sumstats_index, "EA"] )&(to_use_sumstats.loc[sumstats_index,"NEA"] == results_df.loc[sumstats_index, "NEA"])
+            is_flip = (to_use_sumstats.loc[sumstats_index,"EA"] == results_df.loc[sumstats_index, "NEA"])&( to_use_sumstats.loc[sumstats_index,"NEA"] == results_df.loc[sumstats_index, "EA"])
+            is_flip = is_flip | ((to_use_sumstats.loc[sumstats_index,"NEA"] == results_df.loc[sumstats_index, "EA"])&( to_use_sumstats.loc[sumstats_index,"EA"] == results_df.loc[sumstats_index, "NEA"]))
+            is_to_use = is_match|is_flip
+            if sum(~is_to_use)>0:
+                log.write("  -Dropping {} variants with unmatched alleles from sumstats #{}".format(sum(~is_to_use), index))
+            to_use_sumstats.loc[is_flip[is_flip].index, "BETA"] =  -to_use_sumstats.loc[is_flip[is_flip].index, "BETA"]
+            to_use_sumstats.loc[is_flip[is_flip].index, "EAF"]  = 1-to_use_sumstats.loc[is_flip[is_flip].index, "EAF"]
+            to_use_sumstats = to_use_sumstats.loc[is_to_use[is_to_use].index,:]
+    gc.collect()
+    return to_use_sumstats
+def get_sumstats(input_path,usekeys=None):
+    if isinstance(input_path, tuple):
+        path = input_path[0]
+        path_args = input_path[1]
+    else:
+        path = input_path
+        path_args={}
+    if isinstance(path, pd.DataFrame):
+        sumstats = Sumstats(path,fmt="auto",verbose=False,usekeys=usekeys,**path_args).data
+    elif isinstance(path, Sumstats):
+        sumstats = path.data
+        if usekeys is not None:
+            sumstats = sumstats[usekeys]
+    elif path[-6:] == "pickle":
+        sumstats = load_data_from_pickle(path)
+        if usekeys is not None:
+            sumstats = sumstats[usekeys]
+    else:
+        sumstats = Sumstats(path,fmt="auto",verbose=False,usekeys=usekeys,**path_args).data
+    return sumstats

gwaslab/util_in_snphwe.py ADDED Viewed

@@ -0,0 +1,58 @@
+import numpy as np
+import pandas as pd
+def snphwe(obs_hets, obs_hom1, obs_hom2):
+    # Convert cpp code from (Jeremy McRae) to python
+    # https://github.com/jeremymcrae/snphwe/blob/master/src/snp_hwe.cpp
+    #/* (original comments)
+    #// This code implements an exact SNP test of Hardy-Weinberg Equilibrium as
+    #// described in Wigginton, JE, Cutler, DJ, and Abecasis, GR (2005) A Note on
+    #// Exact Tests of Hardy-Weinberg Equilibrium. AJHG 76: 887-893
+    #//
+    #// Written by Jan Wigginton
+    #*/
+    obs_homr = min(obs_hom1, obs_hom2)
+    obs_homc = max(obs_hom1, obs_hom2)
+    rare = 2 * obs_homr + obs_hets
+    genotypes = obs_hets + obs_homc + obs_homr
+    probs = np.array([0.0 for i in range(rare +1)])
+    mid = rare * (2 * genotypes - rare) // (2 * genotypes)
+    if mid % 2 != rare%2:
+        mid += 1
+    probs[mid] = 1.0
+    sum_p = 1 #probs[mid]
+    curr_homr = (rare - mid) // 2
+    curr_homc = genotypes - mid - curr_homr
+    for curr_hets in range(mid, 1, -2):
+        probs[curr_hets - 2] = probs[curr_hets] * curr_hets * (curr_hets - 1.0)/ (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0))
+        sum_p+= probs[curr_hets - 2]
+        curr_homr += 1
+        curr_homc += 1
+    curr_homr = (rare - mid) // 2
+    curr_homc = genotypes - mid - curr_homr
+    for curr_hets in range(mid, rare-1, 2):
+        probs[curr_hets + 2] = probs[curr_hets] * 4.0 * curr_homr * curr_homc/ ((curr_hets + 2.0) * (curr_hets + 1.0))
+        sum_p += probs[curr_hets + 2]
+        curr_homr -= 1
+        curr_homc -= 1
+    target = probs[obs_hets]
+    p_hwe = 0.0
+    for p in probs:
+        if p <= target :
+            p_hwe += p / sum_p
+    return min(p_hwe,1)

gwaslab/viz_aux_chromatin.py ADDED Viewed

@@ -0,0 +1,111 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+from gwaslab.g_Log import Log
+#STATE NO.	MNEMONIC	DESCRIPTION	COLOR NAME	COLOR CODE
+#1	TssA	Active TSS	Red	255,0,0
+#2	TssAFlnk	Flanking Active TSS	Orange Red	255,69,0
+#3	TxFlnk	Transcr. at gene 5' and 3'	LimeGreen	50,205,50
+#4	Tx	Strong transcription	Green	0,128,0
+#5	TxWk	Weak transcription	DarkGreen	0,100,0
+#6	EnhG	Genic enhancers	GreenYellow	194,225,5
+#7	Enh	Enhancers	Yellow	255,255,0
+#8	ZNF/Rpts	ZNF genes & repeats	Medium Aquamarine	102,205,170
+#9	Het	Heterochromatin	PaleTurquoise	138,145,208
+#10	TssBiv	Bivalent/Poised TSS	IndianRed	205,92,92
+#11	BivFlnk	Flanking Bivalent TSS/Enh	DarkSalmon	233,150,122
+#12	EnhBiv	Bivalent Enhancer	DarkKhaki	189,183,107
+#13	ReprPC	Repressed PolyComb	Silver	128,128,128
+#14	ReprPCWk	Weak Repressed PolyComb	Gainsboro	192,192,192
+#15	Quies	Quiescent/Low	White	255,255,255
+color_dict={
+    "E1": np.array([255,0,0]),
+    "E2": np.array([255,69,0]),
+    "E3": np.array([50,205,50]),
+    "E4": np.array([0,128,0]),
+    "E5": np.array([0,100,0]),
+    "E6": np.array([194,225,5]),
+    "E7": np.array([255,255,0]),
+    "E8": np.array([102,205,170]),
+    "E9": np.array([138,145,208]),
+    "E10":np.array([205,92,92]),
+    "E11":np.array([233,150,122]),
+    "E12":np.array([189,183,107]),
+    "E13":np.array([128,128,128]),
+    "E14":np.array([192,192,192]),
+    "E15":np.array([255,255,255])
+}
+color_dict_i={
+    1: np.array([255,0,0]),
+    2: np.array([255,69,0]),
+    3: np.array([50,205,50]),
+    4: np.array([0,128,0]),
+    5: np.array([0,100,0]),
+    6: np.array([194,225,5]),
+    7: np.array([255,255,0]),
+    8: np.array([102,205,170]),
+    9: np.array([138,145,208]),
+    10:np.array([205,92,92]),
+    11:np.array([233,150,122]),
+    12:np.array([189,183,107]),
+    13:np.array([128,128,128]),
+    14:np.array([192,192,192]),
+    15:np.array([255,255,255])
+}
+def _plot_chromatin_state(region_chromatin_files,
+                          region_chromatin_labels,
+                          region,
+                          fig,
+                          ax,
+                          xlim_i,
+                          log=Log(),
+                          verbose=True):
+    '''
+    files : a list of numbers
+    '''
+    target_chr = region[0]
+    target_start = region[1]
+    target_end = region[2]
+    offset_i = xlim_i[0] - region[1]
+    ax.set_ylim([-0.05,0.1*len(region_chromatin_files)-0.05])
+    ax.set_xlim([offset_i+target_start,offset_i+target_end])
+    px_for_01 = ax.transData.transform([0,0])[1] - ax.transData.transform([0,0.1])[1]
+    point=fig.dpi/72
+    points_for_01 = px_for_01*72 / fig.dpi
+    # each tissue
+    for i,file in enumerate(region_chromatin_files):
+        log.write(" -Loading : {}".format(file), verbose=verbose)
+        enh = pd.read_csv(file,sep="\t",header=None)
+        enh.columns=["ID","START","END","STATE"]
+        enh["CHR"] =  enh["ID"].str.extract(r"chr([0-9]+)").astype("float").astype("Int64")
+        enh["STATE_i"] =  enh["STATE"].str.extract(r"([0-9]+)_*").astype("float").astype("Int64")
+        enh_in_region = (enh["CHR"] == target_chr) & ((enh["END"] > target_start) & (enh["START"]<target_end))
+        df =enh.loc[enh_in_region,["STATE_i","START","END"]].sort_values("STATE_i",ascending=False)
+        log.write("  -Number of records in specified region: {}".format(len(df)), verbose=verbose)
+        # each block
+        for index, row in df.iterrows():
+            color=color_dict_i[row["STATE_i"]]
+            ax.plot([offset_i + row["START"] ,offset_i + row["END"]],
+                    [i*0.1,i*0.1],
+                    c=color/255,linewidth=points_for_01,solid_capstyle="butt")
+    ## add stripe label
+    if len(region_chromatin_labels) == len(region_chromatin_files):
+        ax.set_yticks([i*0.1 for i in range(len(region_chromatin_labels))],
+                      region_chromatin_labels)
+    else:
+        ax.set_yticks(ticks=[])
+    ax.set_xticks(ticks=[])
+    ax.invert_yaxis()
+    return fig

gwaslab/viz_plot_compare_effect.py CHANGED Viewed

@@ -75,7 +75,10 @@ def compare_effect(path1,
     if scaled == True:
         scaled1 = True
         scaled2 = True
+    if is_q_mc=="fdr" or is_q_mc=="bon":
+        is_q = True
+    else:
+        raise ValueError("Please select either fdr or bon for is_q_mc.")
     if save_args is None:
         save_args = {"dpi":300,"facecolor":"white"}
     if reg_box is None:

gwaslab/viz_plot_mqqplot.py CHANGED Viewed

@@ -133,6 +133,7 @@ def mqqplot(insumstats,
           anno_style="right",
           anno_fixed_arm_length=None,
           anno_source = "ensembl",
+          anno_gtf_path=None,
           anno_adjust=False,
           anno_max_iter=100,
           arm_offset=50,
@@ -816,6 +817,7 @@ def mqqplot(insumstats,
                                    log=log,
                                    build=build,
                                    source=anno_source,
+                                   gtf_path=anno_gtf_path,
                                    verbose=verbose).rename(columns={"GENE":"Annotation"})
         log.write("Finished extracting variants for annotation...",verbose=verbose)

gwaslab/viz_plot_regionalplot.py CHANGED Viewed

@@ -365,6 +365,10 @@ def  _plot_recombination_rate(sumstats,pos, region, ax1, rr_path, rr_chr_dict, r
     rc = rc.loc[(rc["Position(bp)"]<region[2]) & (rc["Position(bp)"]>region[1]),:]
     ax4.plot(rc_track_offset+rc["Position(bp)"],rc["Rate(cM/Mb)"],color="#5858FF",zorder=1)
+    ax1.set_zorder(ax4.get_zorder()+1)
+    ax1.patch.set_visible(False)
     if rr_ylabel:
         ax4.set_ylabel("Recombination rate(cM/Mb)")
     if rr_lim!="max":

gwaslab/viz_plot_stackedregional.py CHANGED Viewed

@@ -22,6 +22,7 @@ from gwaslab.bd_common_data import get_number_to_chr
 from gwaslab.bd_common_data import get_recombination_rate
 from gwaslab.bd_common_data import get_gtf
 from gwaslab.viz_aux_reposition_text import adjust_text_position
+from gwaslab.viz_aux_chromatin import _plot_chromatin_state
 from gwaslab.viz_aux_quickfix import _quick_fix
 from gwaslab.viz_aux_quickfix import _get_largenumber
 from gwaslab.viz_aux_quickfix import _quick_add_tchrpos
@@ -37,15 +38,20 @@ from gwaslab.io_to_pickle import load_data_from_pickle
 from gwaslab.g_Sumstats import Sumstats
 from gwaslab.viz_aux_save_figure import save_figure
 from gwaslab.viz_plot_mqqplot import mqqplot
+import matplotlib.patches as patches
 def plot_stacked_mqq(objects,
                         vcfs=None,
                         mode="r",
                         mqqratio=3,
                         region=None,
+                        region_chromatin_height=0.1,
+                        region_chromatin_files = None,
+                        region_chromatin_labels= None,
                         titles= None,
                         title_pos=None,
                         title_args=None,
+                        #title_box = None,
                         gtf=None,
                         gene_track_height=0.5,
                         fig_args=None,
@@ -72,11 +78,11 @@ def plot_stacked_mqq(objects,
         fig_args = {"dpi":200}
     if region_lead_grid_line is None:
         region_lead_grid_line = {"alpha":0.5,"linewidth" : 2,"linestyle":"--","color":"#FF0000"}
-    if title_pos is None:
-        title_pos = [0.01,0.97]
-    if title_args is None:
-        title_args = {}
+    if region_chromatin_files is None:
+        region_chromatin_files = []
+        region_chromatin_height = len(region_chromatin_files) * region_chromatin_height
+    if region_chromatin_labels is None:
+        region_chromatin_labels = []
     # create figure and axes ##################################################################################################################
     if mode=="r":
         if len(vcfs)==1:
@@ -84,9 +90,15 @@ def plot_stacked_mqq(objects,
         n_plot = len(sumstats_list)
         n_plot_plus_gene_track = n_plot + 1
+        if len(region_chromatin_files)>0 and mode=="r":
+            height_ratios = [1 for i in range(n_plot_plus_gene_track-1)]+[region_chromatin_height]+[gene_track_height]
+            n_plot_plus_gene_track +=1
+        else:
+            height_ratios = [1 for i in range(n_plot_plus_gene_track-1)]+[gene_track_height]
         fig_args["figsize"] = [16,subplot_height*n_plot_plus_gene_track]
         fig, axes = plt.subplots(n_plot_plus_gene_track, 1, sharex=True,
-                             gridspec_kw={'height_ratios': [1 for i in range(n_plot_plus_gene_track-1)]+[gene_track_height]},
+                             gridspec_kw={'height_ratios': height_ratios},
                              **fig_args)
         plt.subplots_adjust(hspace=region_hspace)
     elif mode=="m":
@@ -179,18 +191,58 @@ def plot_stacked_mqq(objects,
                             **mqq_args_for_each_plot[index]
                             )
             lead_variants_is[index] = (lead_i,lead_i2)
+    if len(region_chromatin_files)>0 and mode=="r":
+        xlim_i = axes[-1].get_xlim()
+        fig = _plot_chromatin_state(     region_chromatin_files = region_chromatin_files,
+                                         region_chromatin_labels = region_chromatin_labels,
+                                         region = region,
+                                         fig = fig,
+                                         ax = axes[-2],
+                                         xlim_i=xlim_i,
+                                         log=log,
+                                         verbose=verbose)
     # adjust labels
     # drop labels for each plot
     # set a common laebl for all plots
-    if titles is not None:
+    #if title_box is None:
+    #    title_box = dict(boxstyle='square', facecolor='white', alpha=1.0, edgecolor="black")
+    #    title_box = {}
+    if title_args is None:
+        title_args = {}
+    if titles is not None and mode=="r":
+        if title_pos is None:
+            title_pos = [0.01,0.01]
         for index,title in enumerate(titles):
-            axes[index].text(title_pos[0], title_pos[1] , title, transform=axes[index].transAxes,ha="left", va='top',**title_args)
+            current_text = axes[index].text(title_pos[0], title_pos[1] , title, transform=axes[index].transAxes,ha="left", va='bottom',zorder=999999, **title_args)
+            r = fig.canvas.get_renderer()
+            bb = current_text.get_window_extent(renderer=r).transformed(axes[index].transAxes.inverted())
+            width = bb.width
+            height = bb.height
+            rect = patches.Rectangle((0.0,0.0),
+                            height=height + 0.02*2,
+                            width=width + 0.01*2,
+                            transform=axes[index].transAxes,
+                            linewidth=1,
+                            edgecolor='black',
+                            facecolor='white',
+                            alpha=1.0,
+                            zorder=99998)
+            axes[index].add_patch(rect)
+            rect.set(zorder=99998)
+    else:
+        if title_pos is None:
+            title_pos = [0.01,0.97]
+        for index,title in enumerate(titles):
+            axes[index].text(title_pos[0], title_pos[1] , title, transform=axes[index].transAxes,ha="left", va='top',zorder=999999, **title_args)
     ##########################################################################################################################################
     # draw the line for lead variants
-    _draw_grid_line_for_lead_variants(mode, lead_variants_is, n_plot, axes, region_lead_grid_line)
+    _draw_grid_line_for_lead_variants(mode, lead_variants_is, n_plot, axes, region_lead_grid_line,region_chromatin_files)
     ##########################################################################################################################################
     _drop_old_y_labels(axes, n_plot)
@@ -208,12 +260,16 @@ def _drop_old_y_labels(axes, n_plot):
     for index in range(n_plot):
         axes[index].set_ylabel("")
-def _draw_grid_line_for_lead_variants(mode, lead_variants_is, n_plot, axes, region_lead_grid_line):
+def _draw_grid_line_for_lead_variants(mode, lead_variants_is, n_plot, axes, region_lead_grid_line,region_chromatin_files):
+    if len(region_chromatin_files)>0:
+        n_plot_and_track = n_plot+2
+    else:
+        n_plot_and_track = n_plot+1
     if mode=="r":
         for index, sig_is in lead_variants_is.items():
             for sig_i in sig_is:
                 if sig_i is not None:
-                    for each_axis_index in range(n_plot + 1):
+                    for each_axis_index in range(n_plot_and_track):
                         axes[each_axis_index].axvline(x=sig_i, zorder=2,**region_lead_grid_line)
 def _add_new_y_label(mode, fig, gene_track_height,n_plot,subplot_height ):

{gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gwaslab
-Version: 3.4.44
+Version: 3.4.46
 Summary: A collection of handy tools for GWAS SumStats
 Author-email: Yunye <yunye@gwaslab.com>
 Project-URL: Homepage, https://cloufield.github.io/gwaslab/
@@ -8,16 +8,16 @@ Project-URL: Github, https://github.com/Cloufield/gwaslab
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
-Requires-Python: <=3.10,>=3.9
+Requires-Python: <3.11,>=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 License-File: LICENSE_before_v3.4.39
 Requires-Dist: pandas !=1.5,>=1.3
-Requires-Dist: numpy >=1.21.2
-Requires-Dist: matplotlib !=3.7.2,>=3.5
+Requires-Dist: numpy <2,>=1.21.2
+Requires-Dist: matplotlib !=3.7.2,<3.9,>=3.5
 Requires-Dist: seaborn >=0.12
 Requires-Dist: scipy >=1.12
-Requires-Dist: pySAM <0.20,>=0.18.1
+Requires-Dist: pySAM ==0.22.1
 Requires-Dist: Biopython >=1.79
 Requires-Dist: adjustText <=0.8,>=0.7.3
 Requires-Dist: liftover >=1.1.13
@@ -51,7 +51,7 @@ Warning: Known issues of GWASLab are summarized in [https://cloufield.github.io/
 ### install via pip
 ```
-pip install gwaslab==3.4.43
+pip install gwaslab==3.4.45
 ```
 ```python
@@ -90,7 +90,7 @@ Create a Python 3.9 environment and install gwaslab using pip:
 ```
 conda env create -n gwaslab_test -c conda-forge python=3.9
 conda activate gwaslab
-pip install gwaslab==3.4.43
+pip install gwaslab==3.4.45
 ```
 or create a new environment using yml file [environment_3.4.40.yml](https://github.com/Cloufield/gwaslab/blob/main/environment_3.4.40.yml)

{gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/RECORD RENAMED Viewed

@@ -1,22 +1,22 @@
-gwaslab/__init__.py,sha256=dFnrh4L620F5JirsSF98SmkuligA-fybIGdBF6r9Ims,2386
-gwaslab/bd_common_data.py,sha256=v98X3tdRNOVE2gCiSHkfyBb0pSIjTk5IFG8A725Oj3o,12639
+gwaslab/__init__.py,sha256=7TKJaODdpeuQKibL7gIEa4MtyQ0pmrU-vIHQ-Et27lQ,2433
+gwaslab/bd_common_data.py,sha256=qr6OMbBaTH2Smfu8347SO9NmF410tn8dq8pRGF5-OpY,13751
 gwaslab/bd_config.py,sha256=TP-r-DPhJD3XnRYZbw9bQHXaDIkiRgK8bG9HCt-UaLc,580
 gwaslab/bd_download.py,sha256=cDDk2C5IvjeAzvPvVYGTkI4Ss33DUtEDjGo8eAbQRvY,15663
 gwaslab/bd_get_hapmap3.py,sha256=asNjQYeGfQi8u3jnfenRvDdKMs5ptql5wpcUzqMlwUI,3937
 gwaslab/cache_manager.py,sha256=HOTnSkCOyGEPLRl90WT8D_6pAdI8d8AzenMIDGuCeWc,28113
 gwaslab/g_Log.py,sha256=C3Zv-_6c3C9ms8bgQ-ytplz22sjk7euqXYkWr9zNeAs,1573
 gwaslab/g_Phenotypes.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-gwaslab/g_Sumstats.py,sha256=NOEQd00guGch_GIt5bHv1wcrAvETfChqzmtgm-nIx_I,35298
-gwaslab/g_SumstatsPair.py,sha256=tbgT-_1CqDEw22s4LbVJWF3ChELxz0gy1DEBzLc9ODU,8833
+gwaslab/g_Sumstats.py,sha256=TUcFQFyODS_-FYMdXDvrBijG4Qtfi1igIWM-eEgb0nc,35352
+gwaslab/g_SumstatsPair.py,sha256=20snPb4SlI6ftMGVjgxAuyxsxYRQF-GzzlBSnoB-3Lo,8851
 gwaslab/g_SumstatsT.py,sha256=u_DighLMnMxwTLnqm-B58pA0G6WXRj6pudPyKMVKjSU,2133
 gwaslab/g_Sumstats_summary.py,sha256=FECvvFXJVKaCX5dggBvvk9YvJ6AbdbcLfjltysX7wEE,6380
 gwaslab/g_meta.py,sha256=htWlgURWclm9R6UqFcX1a93WN27xny7lGUeyJZOtszQ,2583
-gwaslab/g_vchange_status.py,sha256=eX0jdIb6Spa07ZdpWNqUWqdVBWS0fuH2yrt4PDi3Res,1746
-gwaslab/g_version.py,sha256=44qNMttuGuAyDPPblc1z0V9R5lHo9z8x2_E6JuZ6eBE,1818
+gwaslab/g_vchange_status.py,sha256=jLoVzMJFhB5k_cJKzHuBNc2HZGBWydAunCNa0n_d54g,1923
+gwaslab/g_version.py,sha256=g2bR-qFeFvLADj57VXMT5dufwba2YGD86hfDHRYURfU,1818
 gwaslab/hm_casting.py,sha256=FqP4EQl83Q2OKLw004OgLIvUH795TVCGwziLk5jsHqY,11368
-gwaslab/hm_harmonize_sumstats.py,sha256=CADIG_KLsoq4y21wN0fyaaG2S4PDpKu96Y0of6b0TaU,78184
+gwaslab/hm_harmonize_sumstats.py,sha256=1hjUdle2DSKHGBp2BktfFqf-QHU_q2xWl_mPhiYc_ZA,78616
 gwaslab/hm_rsid_to_chrpos.py,sha256=ODWREO0jPN0RAfNzL5fRzSRANfhiksOvUVPuEsFZQqA,6552
-gwaslab/io_preformat_input.py,sha256=w62JLAr16Ru0EgUtBCEV2eXRO89OqhidQxwf2IPAM38,20014
+gwaslab/io_preformat_input.py,sha256=AZ43WGqVTzbo3XtClWhjRjsj6pBR9stw6JBL_TZ461U,20673
 gwaslab/io_read_ldsc.py,sha256=8S9n4imgl4d0WPms_GYld-6uUM5z7iWGiCA-M814kzY,12123
 gwaslab/io_read_tabular.py,sha256=EG-C6KhCutt4J4LlOMgXnqzJvU-EZXzVhMvaDFnHrMM,2380
 gwaslab/io_to_formats.py,sha256=QuGWdvnAamaZAuhymj-0SuNBaKz1maTTyH396gvVaO8,29229
@@ -28,7 +28,7 @@ gwaslab/ldsc_parse.py,sha256=MBnfgcWlV4oHp9MoDRh1mpilaHhAR15Af77hMFn4-5k,10564
 gwaslab/ldsc_regressions.py,sha256=yzbGjgNV7u-SWXNPsh9S8y9mK97Bim_Nmad9G9V18ZU,30078
 gwaslab/ldsc_sumstats.py,sha256=O0olsDxKlh1MJ1gAuEN1t40rxhajOEwOQ20ak7xoDrI,26245
 gwaslab/qc_check_datatype.py,sha256=kW68uk4dTLOU2b1dHoVat6n0loundDysAjIqxsXW28Q,3379
-gwaslab/qc_fix_sumstats.py,sha256=cpJibJ_77p4cg39R4zRunhOK2deIK4PfQA9wmYZgyqk,92745
+gwaslab/qc_fix_sumstats.py,sha256=-DQz5dPW6YXXVP-LV2Txa4lJrpZHhqAoKNny6IYAW18,93100
 gwaslab/run_script.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 gwaslab/util_ex_calculate_ldmatrix.py,sha256=LpE__LoYRHLgVKlCHo6lYWlz9LEUVUDqYPEAP-Svbm0,14598
 gwaslab/util_ex_calculate_prs.py,sha256=5l1eiZs8YwIpEgp7i3IurP8n5KwQM5awbG9fWSm4iT4,9053
@@ -47,23 +47,26 @@ gwaslab/util_in_calculate_power.py,sha256=JfHJFg3tNF0f4NHgWlzVW2mSxCiP07mAHIyEfV
 gwaslab/util_in_convert_h2.py,sha256=a8Cbudt3xn9WP2bPc-7ysuowB-LYub8j8GeDXl7Lk7Q,6483
 gwaslab/util_in_correct_winnerscurse.py,sha256=Gp--yAQ8MMzdkWIvXP9C1BHVjZc-YzqHfYWhAj19w9w,2110
 gwaslab/util_in_fill_data.py,sha256=gdTwYA6FvBMnrtxAeL0lEj_Z0aGIoRNPScWDlJvZWeQ,14021
-gwaslab/util_in_filter_value.py,sha256=6yz3omukfqhmkfGZwGtr2BPQ6FcSTj4l6o7EhPMXRz0,22100
+gwaslab/util_in_filter_value.py,sha256=dY4X66N9A4MHCRHjPqLYFufMM91ggLRwUBf_nJYh8Lg,23605
 gwaslab/util_in_get_density.py,sha256=kpKXH69acMkeYVG5vs-VbJC3COhmuLBfYco-wuOxgjc,3934
-gwaslab/util_in_get_sig.py,sha256=atyBJZCWGUSgy-nvIR8_a_isseq1nKhzTaRVG2LbKQk,37762
+gwaslab/util_in_get_sig.py,sha256=9kq1GXacknO2YnVmsTli1GlPA728ASweTZ3UKm3Wszo,38783
+gwaslab/util_in_meta.py,sha256=5K9lIZcIgUy0AERqHy1GvMN2X6dp45JUUgopuDLgt4o,11284
+gwaslab/util_in_snphwe.py,sha256=-KpIDx6vn_nah6H55IkV2OyjXQVXV13XyBL069WE1wM,1751
 gwaslab/viz_aux_annotate_plot.py,sha256=R-1GT89E4NEBAMNTYzNawdi9rjQV5LCnODgnYOOKsys,32184
+gwaslab/viz_aux_chromatin.py,sha256=NzbFFpbwAMH-39F8z0qJaExw-JcKYcAlzyzbMkvFo5M,4002
 gwaslab/viz_aux_quickfix.py,sha256=Z6ZNEAUFuWVDTzH-qGreNGxPxJLCmqhXtBrvDOgo4g8,18308
 gwaslab/viz_aux_reposition_text.py,sha256=iRIP-Rkltlei068HekJcVubiqPrunBqvAoSQ1eHk04M,4304
 gwaslab/viz_aux_save_figure.py,sha256=nL-aoE8Kg06h7FgleGRBIZjhI-6w5gpn3E1HWMwBig8,2664
 gwaslab/viz_plot_compare_af.py,sha256=qtXW45-Sq_ugK8ZfqBYMpmf58SKi3lB3YyHnzn_akcE,5344
-gwaslab/viz_plot_compare_effect.py,sha256=8om3y6YQfnOk4FfkKSpKr2KqJcsMeCwQ6FRRKbDrm3U,49366
+gwaslab/viz_plot_compare_effect.py,sha256=iA74jMzh-G65U6BeXyQro08tPlJWpNyvtrjFsYHLvFM,49505
 gwaslab/viz_plot_forestplot.py,sha256=xgOnefh737CgdQxu5naVyRNBX1NQXPFKzf51fbh6afs,6771
 gwaslab/viz_plot_miamiplot.py,sha256=rCFEp7VNuVqeBBG3WRkmFAtFklbF79BvIQQYiSY70VY,31238
 gwaslab/viz_plot_miamiplot2.py,sha256=SWv82D8UBbREKsk8EoKth-2w68l6FbXyVLsb_E1hh8o,15882
-gwaslab/viz_plot_mqqplot.py,sha256=PzRWnm11whxww7ut-bzFkj1sbPc_c0OP7yRpIgYo2iQ,61739
+gwaslab/viz_plot_mqqplot.py,sha256=oVFiLe6Xv_-ryY8I36tGjU9StjM_ust72YSSfnZgaUg,61828
 gwaslab/viz_plot_qqplot.py,sha256=psQgVpP29686CEZkzQz0iRbApzqy7aE3GGiBcazVvNw,7247
-gwaslab/viz_plot_regionalplot.py,sha256=PBIWkNj2fj-dRLKQJNpM8wor5jya2anqix0-UYLE0Is,37901
+gwaslab/viz_plot_regionalplot.py,sha256=8u-5-yfy-UaXhaxVVz3Y5k2kBAoqzczUw1hyyD450iI,37983
 gwaslab/viz_plot_rg_heatmap.py,sha256=PidUsgOiEVt6MfBPCF3_yDhOEytZ-I1q-ZD6_0pFrV4,13713
-gwaslab/viz_plot_stackedregional.py,sha256=EAHz5SZGengZ_pxcOg62ZRimGRz6_goQlp9MLCwmeZc,11890
+gwaslab/viz_plot_stackedregional.py,sha256=yWxAJyqA3Kv9PUFTLzgbAu_hP7wi3_3bVVQlJECm8Gc,14687
 gwaslab/viz_plot_trumpetplot.py,sha256=ZHdc6WcVx0-oKoj88yglRkmB4bS9pOiEMcuwKW35Yvo,42672
 gwaslab/data/formatbook.json,sha256=N2nJs80HH98Rsu9FxaSvIQO9J5yIV97WEtAKjRqYwiY,38207
 gwaslab/data/reference.json,sha256=k8AvvgDsuLxzv-NCJHWvTUZ5q_DLAFxs1Th3jtL313k,11441
@@ -73,9 +76,9 @@ gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz,sha256=qD9RsC5S2h6l-OdpW
 gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz,sha256=Y8ZT2FIAhbhlgCJdE9qQVAiwnV_fcsPt72usBa7RSBM,10225828
 gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz,sha256=R7IkssKu0L4WwkU9SrS84xCMdrkkKL0gnTNO_OKbG0Y,219
 gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz,sha256=76CIU0pibDJ72Y6UY-TbIKE9gEPwTELAaIbCXyjm80Q,470
-gwaslab-3.4.44.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-gwaslab-3.4.44.dist-info/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
-gwaslab-3.4.44.dist-info/METADATA,sha256=PJUO56s9S_nfVnZlRCYuSq-6BADrRxjcwjAqaAjJWc8,7764
-gwaslab-3.4.44.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-gwaslab-3.4.44.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
-gwaslab-3.4.44.dist-info/RECORD,,
+gwaslab-3.4.46.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+gwaslab-3.4.46.dist-info/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
+gwaslab-3.4.46.dist-info/METADATA,sha256=aw5qahaxh7TAYCLNPdBO1FmHCWQk3mQcOlZohaGqorw,7765
+gwaslab-3.4.46.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
+gwaslab-3.4.46.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
+gwaslab-3.4.46.dist-info/RECORD,,

{gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.43.0)
+Generator: setuptools (70.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/LICENSE RENAMED Viewed

File without changes

{gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/LICENSE_before_v3.4.39 RENAMED Viewed

File without changes

{gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/top_level.txt RENAMED Viewed

File without changes

gwaslab 3.4.44__py3-none-any.whl → 3.4.46__py3-none-any.whl

Potentially problematic release.

gwaslab 3.4.44py3-none-any.whl → 3.4.46py3-none-any.whl