PyPI - gwaslab - Versions diffs - 3.4.38__py3-none-any.whl → 3.4.39__py3-none-any.whl - Mend

gwaslab 3.4.38py3-none-any.whl → 3.4.39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (51) hide show

gwaslab/bd_common_data.py +6 -3
gwaslab/bd_download.py +9 -9
gwaslab/bd_get_hapmap3.py +43 -9
gwaslab/g_Log.py +14 -5
gwaslab/g_Sumstats.py +86 -18
gwaslab/g_SumstatsPair.py +70 -23
gwaslab/g_SumstatsT.py +2 -2
gwaslab/g_version.py +10 -10
gwaslab/hm_casting.py +9 -4
gwaslab/hm_harmonize_sumstats.py +88 -83
gwaslab/io_preformat_input.py +14 -14
gwaslab/io_read_ldsc.py +49 -1
gwaslab/ldsc_irwls.py +198 -0
gwaslab/ldsc_jackknife.py +514 -0
gwaslab/ldsc_ldscore.py +417 -0
gwaslab/ldsc_parse.py +294 -0
gwaslab/ldsc_regressions.py +747 -0
gwaslab/ldsc_sumstats.py +629 -0
gwaslab/qc_check_datatype.py +1 -1
gwaslab/qc_fix_sumstats.py +163 -161
gwaslab/util_ex_calculate_ldmatrix.py +2 -2
gwaslab/util_ex_gwascatalog.py +24 -24
gwaslab/util_ex_ldproxyfinder.py +9 -9
gwaslab/util_ex_ldsc.py +189 -0
gwaslab/util_in_calculate_gc.py +6 -6
gwaslab/util_in_calculate_power.py +42 -43
gwaslab/util_in_convert_h2.py +8 -8
gwaslab/util_in_fill_data.py +28 -28
gwaslab/util_in_filter_value.py +91 -52
gwaslab/util_in_get_density.py +8 -8
gwaslab/util_in_get_sig.py +407 -65
gwaslab/viz_aux_annotate_plot.py +12 -12
gwaslab/viz_aux_quickfix.py +18 -18
gwaslab/viz_aux_reposition_text.py +3 -3
gwaslab/viz_aux_save_figure.py +14 -5
gwaslab/viz_plot_compare_af.py +29 -30
gwaslab/viz_plot_compare_effect.py +63 -71
gwaslab/viz_plot_miamiplot2.py +6 -6
gwaslab/viz_plot_mqqplot.py +17 -3
gwaslab/viz_plot_qqplot.py +1 -1
gwaslab/viz_plot_regionalplot.py +33 -32
gwaslab/viz_plot_rg_heatmap.py +28 -26
gwaslab/viz_plot_stackedregional.py +40 -21
gwaslab/viz_plot_trumpetplot.py +50 -55
gwaslab-3.4.39.dist-info/LICENSE +674 -0
{gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/METADATA +4 -3
gwaslab-3.4.39.dist-info/RECORD +80 -0
gwaslab-3.4.38.dist-info/RECORD +0 -72
/gwaslab-3.4.38.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
{gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
{gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0

gwaslab/util_ex_calculate_ldmatrix.py CHANGED Viewed

@@ -199,7 +199,7 @@ def _align_sumstats_with_bim(row, locus_sumstats, ref_bim, log=Log(),suffixes=No
     log.write("   -#Total Variants matched:{}".format(sum(allele_match)))
     if row["SNPID"] not in combined_df.loc[perfect_match,"SNPID"].values:
-        log.write("   -Warning: Lead variant was not available in reference!!!!!!!!!!!!!!!")
+        log.warning("Lead variant was not available in reference!")
     # adjust statistics
     output_columns=["SNPID","CHR","POS","EA_bim","NEA_bim"]
@@ -256,4 +256,4 @@ def _check_snpid_order(snplist_path, matched_sumstats_snpid,log):
     if list(matched_sumstats_snpid) == list(snpid_list):
         log.write(" -Sumstats SNPID order and LD matrix SNPID order are matched.")
     else:
-        log.write(" -Warning: Sumstats SNPID order and LD matrix SNPID order are not matched...")
+        log.warning("Sumstats SNPID order and LD matrix SNPID order are not matched!")

gwaslab/util_ex_gwascatalog.py CHANGED Viewed

@@ -9,26 +9,26 @@ def gwascatalog_trait(efo,source="NCBI",sig_level=5e-8,verbose=True,log=Log()):
     #https://www.ebi.ac.uk/gwas/rest/docs/api
     base_url = "https://www.ebi.ac.uk/gwas/rest/api/efoTraits/"+efo
-    if verbose: log.write("Start to retrieve data from GWASCatalog...")
-    if verbose: log.write(" -Please make sure your sumstats is based on GRCh38...")
-    if verbose: log.write(" -Requesting (GET) trait information through the GWASCatalog API...")
-    if verbose: log.write(" -EFO trait api: "+ base_url)
+    log.write("Start to retrieve data from GWASCatalog...", verbose=verbose)
+    log.write(" -Please make sure your sumstats is based on GRCh38...", verbose=verbose)
+    log.write(" -Requesting (GET) trait information through the GWASCatalog API...", verbose=verbose)
+    log.write(" -EFO trait api: "+ base_url, verbose=verbose)
     text = requests.get(base_url)
-    if verbose:
-        log.write(" -Status code: {}".format(text.status_code))
-        if text.status_code!=200:
-            log.write(" -Status code is not 200. Access failed. Please check your internet or the GWAS Catalog sever status.")
-            log.write(" -Message:{}".format(text.text))
-            return 0
+    log.write(" -Status code: {}".format(text.status_code), verbose=verbose)
+    if text.status_code!=200:
+        log.write(" -Status code is not 200. Access failed. Please check your internet or the GWAS Catalog sever status.", verbose=verbose)
+        log.write(" -Message:{}".format(text.text), verbose=verbose)
+        return 0
     api_response = json.loads(text.text)
-    if verbose: log.write(" -Trait Name:",api_response["trait"])
-    if verbose: log.write(" -Trait URL:",api_response["uri"])
+    log.write(" -Trait Name:",api_response["trait"], verbose=verbose)
+    log.write(" -Trait URL:",api_response["uri"], verbose=verbose)
     base_url = "https://www.ebi.ac.uk/gwas/rest/api/efoTraits/"+efo+"/associations?projection=associationByEfoTrait"
-    if verbose: log.write(" -Requesting (GET) GWAS associations through the GWASCatalog API...")
-    if verbose: log.write(" -associationsByTraitSummary API: "+ base_url)
-    if verbose: log.write(" -Note: this step might take a while...")
+    log.write(" -Requesting (GET) GWAS associations through the GWASCatalog API...", verbose=verbose)
+    log.write(" -associationsByTraitSummary API: "+ base_url, verbose=verbose)
+    log.write(" -Note: this step might take a while...", verbose=verbose)
     # get request and check status code of response
     raw_data = requests.get(base_url)
@@ -37,13 +37,13 @@ def gwascatalog_trait(efo,source="NCBI",sig_level=5e-8,verbose=True,log=Log()):
     is_proceed = check_request_status_code(raw_data.status_code,verbose=verbose,log=log)
     if is_proceed is False: return False
-    if verbose: log.write(" -Loading json ...")
+    log.write(" -Loading json ...", verbose=verbose)
     # Transform API response from JSON into Python dictionary
     api_response = json.loads(raw_data.text)
-    if verbose: log.write(" -Parsing json ...")
+    log.write(" -Parsing json ...", verbose=verbose)
     # An
     records=list()
-    if verbose: log.write(" -Number of reported associations for "+ efo +" in GWASCatalog:",len( api_response["_embedded"]["associations"]))
+    log.write(" -Number of reported associations for "+ efo +" in GWASCatalog:",len( api_response["_embedded"]["associations"]), verbose=verbose)
     for association in api_response["_embedded"]["associations"]:
         #association statistics:
@@ -126,12 +126,12 @@ def gwascatalog_trait(efo,source="NCBI",sig_level=5e-8,verbose=True,log=Log()):
                                 records.append(row)
             #rsid locations
     gwascatalog_lead_snps = pd.DataFrame(records,columns=["SNPID","CHR","POS","REPORT_GENENAME","CLOSEST_GENENAMES","FUNCTION_CLASS","OR","BETA","SE","P","TRAIT","STUDY","PUBMEDID","AUTHOR"])
-    if verbose: log.write(" -Loading retrieved data into gwaslab Sumstats object ...")
+    log.write(" -Loading retrieved data into gwaslab Sumstats object ...", verbose=verbose)
     sigs = gl.Sumstats(gwascatalog_lead_snps.copy(),fmt="gwaslab",other=['REPORT_GENENAME', 'CLOSEST_GENENAMES','TRAIT', 'STUDY', 'PUBMEDID','AUTHOR'],verbose=False)
     sigs.fix_pos(verbose=False)
     sigs.fix_chr(verbose=False)
     sigs.sort_coordinate(verbose=False)
-    if verbose: log.write("Finished retrieving data from GWASCatalog...")
+    log.write("Finished retrieving data from GWASCatalog...", verbose=verbose)
     #return gwaslab Sumstats object
     return sigs
@@ -142,14 +142,14 @@ def check_request_status_code(request_code,verbose=True,log=Log()):
     is_proceed=False
     if request_code == 200:
-        if verbose: log.write(" -Status code 200 OK: Retrieved data from GWASCatalog successffully ...")
+        log.write(" -Status code 200 OK: Retrieved data from GWASCatalog successffully ...", verbose=verbose)
         is_proceed=True
     elif request_code == 404:
-        if verbose: log.write(" -Status code 404 Not Found: The requested resource did not exist ...")
+        log.write(" -Status code 404 Not Found: The requested resource did not exist ...", verbose=verbose)
     elif request_code == 301:
-        if verbose: log.write(" -Status code 301 Moved Permanently: The requested resource did not exist ...")
+        log.write(" -Status code 301 Moved Permanently: The requested resource did not exist ...", verbose=verbose)
     elif request_code == 400:
-        if verbose: log.write(" -Status code 400 Bad Request: The requested resource did not exist ...")
+        log.write(" -Status code 400 Bad Request: The requested resource did not exist ...", verbose=verbose)
     return is_proceed

gwaslab/util_ex_ldproxyfinder.py CHANGED Viewed

@@ -49,8 +49,8 @@ def _extract_with_ld_proxy(  snplist=None,
                             ld_threshold=0.8
                             ):
     ### Load vcf#######################################################################################
-    if verbose: log.write("Start to load reference genotype...")
-    if verbose: log.write(" -reference vcf path : "+ vcf_path)
+    log.write("Start to load reference genotype...", verbose=verbose)
+    log.write(" -reference vcf path : "+ vcf_path, verbose=verbose)
     if tabix is None:
         tabix = which("tabix")
     vcf_chr_dict = auto_check_vcf_chr_dict(vcf_path=vcf_path, vcf_chr_dict=vcf_chr_dict, verbose=verbose, log=log)
@@ -122,7 +122,7 @@ def _extract_with_ld_proxy(  snplist=None,
     extracted_sumstats = pd.concat([extracted_sumstats, ld_proxies],ignore_index=True)
-    if verbose: log.write("Finished loading reference genotype successfully!")
+    log.write("Finished loading reference genotype successfully!", verbose=verbose)
     return extracted_sumstats
@@ -139,13 +139,13 @@ def _get_rsq(    row,
         ref_genotype = read_vcf(vcf_path,region=vcf_chr_dict[region[0]]+":"+str(region[1])+"-"+str(region[2]),tabix=tabix)
         if ref_genotype is None:
-            if verbose: log.write("  -Warning: no data was retrieved. Skipping ...")
+            log.warning("No data was retrieved. Skipping ...", verbose=verbose)
             ref_genotype=dict()
             ref_genotype["variants/POS"]=np.array([],dtype="int64")
             return None
-        if verbose: log.write("  -Retrieving index...")
-        if verbose: log.write("  -Ref variants in the region: {}".format(len(ref_genotype["variants/POS"])))
+        log.write("  -Retrieving index...", verbose=verbose)
+        log.write("  -Ref variants in the region: {}".format(len(ref_genotype["variants/POS"])), verbose=verbose)
         #  match sumstats pos and ref pos:
         # get ref index for its first appearance of sumstats pos
         #######################################################################################
@@ -170,7 +170,7 @@ def _get_rsq(    row,
             else:
                 # no position match
                 return None
-        if verbose: log.write("  -Matching variants using POS, NEA, EA ...")
+        log.write("  -Matching variants using POS, NEA, EA ...", verbose=verbose)
         sumstats["REFINDEX"] = sumstats.loc[:,["POS","NEA","EA"]].apply(lambda x: match_varaint(x), axis=1)
         log.write("  -Matched variants in sumstats and vcf:{} ".format(sum(~sumstats["REFINDEX"].isna())))
@@ -190,7 +190,7 @@ def _get_rsq(    row,
             lead_snp_genotype = GenotypeArray([ref_genotype["calldata/GT"][lead_snp_ref_index]]).to_n_alt()
             other_snp_genotype = GenotypeArray(ref_genotype["calldata/GT"][other_snps_ref_index]).to_n_alt()
-            if verbose: log.write("  -Calculating Rsq...")
+            log.write("  -Calculating Rsq...", verbose=verbose)
             if len(other_snp_genotype)>1:
                 valid_r2= np.power(rogers_huff_r_between(lead_snp_genotype,other_snp_genotype)[0],2)
@@ -198,7 +198,7 @@ def _get_rsq(    row,
                 valid_r2= np.power(rogers_huff_r_between(lead_snp_genotype,other_snp_genotype),2)
             sumstats.loc[~sumstats["REFINDEX"].isna(),"RSQ"] = valid_r2
         else:
-            if verbose: log.write("  -Lead SNP not found in reference...")
+            log.write("  -Lead SNP not found in reference...", verbose=verbose)
             sumstats["RSQ"]=None
         sumstats["RSQ"] = sumstats["RSQ"].astype("float")

gwaslab/util_ex_ldsc.py ADDED Viewed

@@ -0,0 +1,189 @@
+from gwaslab.ldsc_sumstats import estimate_h2
+from gwaslab.ldsc_sumstats import estimate_rg
+from gwaslab.g_Log import Log
+from gwaslab.qc_fix_sumstats import start_to
+from gwaslab.qc_fix_sumstats import finished
+from gwaslab.qc_fix_sumstats import skipped
+from gwaslab.io_read_ldsc import parse_ldsc_summary
+class ARGS():
+    def __init__(self, **args):
+        self.out = "ldsc"
+        self.bfile = None
+        self.l2 = False
+        self.extract = None
+        self.keep = None
+        self.ld_wind_snps = None
+        self.ld_wind_kb = None
+        self.ld_wind_cm = None
+        self.print_snps = None
+        self.annot =None
+        self.thin_annot = False
+        self.cts_bin = None
+        self.cts_breaks = None
+        self.cts_names = None
+        self.per_allele = False
+        self.pq_exp =None
+        self.no_print_annot = False
+        if "h2" in args.keys():
+            self.h2 = args["h2"]
+        else:
+            self.h2 = None
+        self.h2_cts = None
+        if "rg" in args.keys():
+            self.rg = args["rg"]
+        else:
+            self.rg = None
+        if "ref_ld" in args.keys():
+            self.ref_ld = args["ref_ld"]
+        else:
+            self.ref_ld = None
+        if "ref_ld_chr" in args.keys():
+            self.ref_ld_chr = args["ref_ld_chr"]
+        else:
+            self.ref_ld_chr = None
+        if "w_ld" in args.keys():
+            self.w_ld = args["w_ld"]
+        else:
+            self.w_ld = None
+        if "w_ld_chr" in args.keys():
+            self.w_ld_chr = args["w_ld_chr"]
+        else:
+            self.w_ld_chr = None
+        self.overlap_annot = False
+        self.print_coefficients = "ldsc"
+        self.frqfile = None
+        self.frqfile_chr = None
+        self.no_intercept = None
+        self.intercept_h2 = None
+        self.intercept_gencov = None
+        self.M = None
+        self.two_step = None
+        self.chisq_max = None
+        self.ref_ld_chr_cts = None
+        self.print_cov = None
+        self.print_delete_vals = False
+        self.chunk_size = 50
+        self.pickle = False
+        self.yes_really = False
+        self.invert_anyway = False
+        self.n_blocks = 200
+        self.not_M_5_50 = False
+        self.no_check_alleles = False
+        self.return_silly_things = False
+        if "samp_prev" in args.keys():
+            self.samp_prev = args["samp_prev"]
+        else:
+            self.samp_prev = None
+        if "pop_prev" in args.keys():
+            self.pop_prev = args["pop_prev"]
+        else:
+            self.pop_prev = None
+def _estimate_h2_by_ldsc(insumstats, log, verbose=True, **args):
+    sumstats = insumstats.copy()
+    ##start function with col checking##########################################################
+    _start_line = "run LD score regression"
+    _end_line = "running LD score regression"
+    _start_cols =[]
+    _start_function = ".estimate_h2_by_ldsc()"
+    _must_args ={}
+    is_enough_info = start_to(sumstats=sumstats,
+                            log=log,
+                            verbose=verbose,
+                            start_line=_start_line,
+                            end_line=_end_line,
+                            start_cols=_start_cols,
+                            start_function=_start_function,
+                            **_must_args)
+    if is_enough_info == False: return None
+    ############################################################################################
+    log.write(" -Run single variate LD score regression:", verbose=verbose)
+    log.write("  -Adopted from LDSC source code: https://github.com/bulik/ldsc", verbose=verbose)
+    log.write("  -Please cite LDSC: Bulik-Sullivan, et al. LD Score Regression Distinguishes Confounding from Polygenicity in Genome-Wide Association Studies. Nature Genetics, 2015.", verbose=verbose)
+    log.write(" -Arguments:", verbose=verbose)
+    for key, value in args.items():
+        log.write("  -{}:{}".format(key, value), verbose=verbose)
+    default_args = ARGS(**args)
+    if "Z" not in sumstats.columns:
+        sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
+    sumstats = sumstats.rename(columns={"EA":"A1","NEA":"A2","rsID":"SNP"})
+    log.write(" -LDSC log:", verbose=verbose)
+    summary = estimate_h2(sumstats, default_args, log)
+    log.write(" -Results have been stored in .ldsc_h2", verbose=verbose)
+    finished(log=log,verbose=verbose,end_line=_end_line)
+    return parse_ldsc_summary(summary)
+def _estimate_rg_by_ldsc(insumstats, other_traits ,log, verbose=True, **args):
+    sumstats = insumstats.copy()
+    ##start function with col checking##########################################################
+    _start_line = "run LD score regression for genetic correlation"
+    _end_line = "running LD score regression for genetic correlation"
+    _start_cols =[]
+    _start_function = ".estimate_rg_by_ldsc()"
+    _must_args ={}
+    is_enough_info = start_to(sumstats=sumstats,
+                            log=log,
+                            verbose=verbose,
+                            start_line=_start_line,
+                            end_line=_end_line,
+                            start_cols=_start_cols,
+                            start_function=_start_function,
+                            **_must_args)
+    if is_enough_info == False: return None
+    ############################################################################################
+    log.write(" -Run cross-trait LD score regression:", verbose=verbose)
+    log.write("  -Adopted from LDSC source code: https://github.com/bulik/ldsc", verbose=verbose)
+    log.write("  -Please cite LDSC: Bulik-Sullivan, B., et al. An Atlas of Genetic Correlations across Human Diseases and Traits. Nature Genetics, 2015.", verbose=verbose)
+    log.write(" -Arguments:", verbose=verbose)
+    for key, value in args.items():
+        log.write("  -{}:{}".format(key, value), verbose=verbose)
+    default_args = ARGS(**args)
+    if "Z" not in sumstats.columns:
+        sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
+    sumstats = sumstats.rename(columns={"EA":"A1","NEA":"A2","rsID":"SNP"})
+    other_traits_to_use = []
+    alias = default_args.rg.split(",")[1:]
+    for index, each_other_sumstats in enumerate(other_traits):
+        log.write(" -Processing sumstats with alias {} ({})".format(alias[index], each_other_sumstats.meta["gwaslab"]["study_name"]))
+        if "rsID" not in each_other_sumstats.data.columns:
+            to_append = each_other_sumstats.filter_hapmap3(verbose=False).data.rename(columns={"EA":"A1","NEA":"A2","rsID":"SNP"})
+        else:
+            to_append = each_other_sumstats.data.rename(columns={"EA":"A1","NEA":"A2","rsID":"SNP"})
+        if "Z" not in to_append.columns:
+            to_append["Z"] = to_append["BETA"]/to_append["SE"]
+        other_traits_to_use.append(to_append[["SNP","A1","A2","Z","N"]])
+    log.write(" -LDSC log:", verbose=verbose)
+    summary = estimate_rg(sumstats[["SNP","A1","A2","Z","N"]], other_traits_to_use, default_args, log)[1]
+    log.write(" -Results have been stored in .ldsc_rg", verbose=verbose)
+    finished(log=log,verbose=verbose,end_line=_end_line)
+    return summary

gwaslab/util_in_calculate_gc.py CHANGED Viewed

@@ -12,34 +12,34 @@ def lambdaGC(insumstats,include_chrXYMT=True, x=23 ,y=24, mt=25, mode="P",level=
     sumstats=insumstats.loc[:,["CHR",mode]]
     if include_chrXYMT is False:
-        if verbose: log.write(" -Excluding chrX, chrY, chrMT from lambda GC calculation.")
+        log.write(" -Excluding chrX, chrY, chrMT from lambda GC calculation.", verbose=verbose)
         xymt= [x,y,mt,"chrx","chry","chrmt","chrX","chrY","chrMT","chrM","M","x","y","mt","X","Y","MT"]
         sumstats = sumstats.loc[~sumstats["CHR"].isin(xymt),:]
     indata = sumstats[mode].values
     if len(indata) == 0:
-        if verbose: log.write("  -No available variants to use for calculation.")
+        log.write("  -No available variants to use for calculation.", verbose=verbose)
         return np.nan
     if mode=="p" or mode=="P":
         observedMedianChi2 = sp.stats.chi2.isf(np.nanmedian(indata),1)
         expectedMedianChi2 = sp.stats.chi2.ppf(level,1)
         lambdagc=observedMedianChi2/expectedMedianChi2
-        if verbose: log.write(" -Lambda GC (P mode) at "+ str(1 - level)+ " is"," ","{:.5f}".format(lambdagc))
+        log.write(" -Lambda GC (P mode) at "+ str(1 - level)+ " is"," ","{:.5f}".format(lambdagc), verbose=verbose)
     elif mode=="mlog10p" or mode=="MLOG10P":
         observedMedianChi2 = sp.stats.chi2.isf( np.nanmedian(np.power(10,-indata)) ,1)
         expectedMedianChi2 = sp.stats.chi2.ppf(level,1)
         lambdagc=observedMedianChi2/expectedMedianChi2
-        if verbose: log.write(" -Lambda GC (MLOG10P mode) at "+ str(1- level)+ " is"," ","{:.5f}".format(lambdagc))
+        log.write(" -Lambda GC (MLOG10P mode) at "+ str(1- level)+ " is"," ","{:.5f}".format(lambdagc), verbose=verbose)
     elif mode=="z" or mode=="Z":
         observedMedianChi2 = np.median((indata)**2)
         expectedMedianChi2 = sp.stats.chi2.ppf(level,1)
         lambdagc=observedMedianChi2/expectedMedianChi2
-        if verbose:log.write(" -Lambda GC (Z mode) at "+ str(1- level)+ " is"," ","{:.5f}".format(lambdagc))
+        if verbose:log.write(" -Lambda GC (Z mode) at "+ str(1- level)+ " is"," ","{:.5f}".format(lambdagc), verbose=verbose)
     elif mode=="chi2" or mode=="CHISQ":
         observedMedianChi2 = np.median(indata)
         expectedMedianChi2 = sp.stats.chi2.ppf(level,1)
         lambdagc=observedMedianChi2/expectedMedianChi2
-        if verbose:log.write(" -Lambda GC (CHISQ mode) at "+ str(1- level)+ " is"," ","{:.5f}".format(lambdagc))
+        log.write(" -Lambda GC (CHISQ mode) at "+ str(1- level)+ " is"," ","{:.5f}".format(lambdagc), verbose=verbose)
     else:
         return np.nan
     return lambdagc

gwaslab/util_in_calculate_power.py CHANGED Viewed

@@ -21,24 +21,23 @@ def get_power(
               log=Log(),
               verbose=True
              ):
-    if verbose: log.write(" Start to calculate statistical power...")
+    log.write(" Start to calculate statistical power...", verbose=verbose)
     if mode=="b":
-        if verbose:
-            log.write(" -Input settings (b mode):")
-            log.write("  -Number of cases:{}".format(ncase))
-            log.write("  -Number of controls:{}".format(ncontrol))
-            if genotype_rr is not None:
-                log.write("  -Risk allele RR:{:.3f}".format(genotype_rr))
-            elif genotype_or is not None:
-                log.write("  -Risk allele OR:{:.3f}".format(genotype_or))
-            elif beta is not None:
-                log.write("  -Risk allele beta:{:.3f}".format(beta))
-            else:
-                genotype_rr = 0.1
-                log.write("  -Risk allele RR:{:.3f}".format(genotype_rr))
-            log.write("  -Disease prevalence:{:.3f}".format(prevalence))
-            log.write("  -Risk allele frequency: {:.3f}".format(daf))
-            log.write("  -Significance level: {:.3e}".format(sig_level))
+        log.write(" -Input settings (b mode):", verbose=verbose)
+        log.write("  -Number of cases:{}".format(ncase), verbose=verbose)
+        log.write("  -Number of controls:{}".format(ncontrol), verbose=verbose)
+        if genotype_rr is not None:
+            log.write("  -Risk allele RR:{:.3f}".format(genotype_rr), verbose=verbose)
+        elif genotype_or is not None:
+            log.write("  -Risk allele OR:{:.3f}".format(genotype_or), verbose=verbose)
+        elif beta is not None:
+            log.write("  -Risk allele beta:{:.3f}".format(beta), verbose=verbose)
+        else:
+            genotype_rr = 0.1
+            log.write("  -Risk allele RR:{:.3f}".format(genotype_rr), verbose=verbose)
+        log.write("  -Disease prevalence:{:.3f}".format(prevalence), verbose=verbose)
+        log.write("  -Risk allele frequency: {:.3f}".format(daf), verbose=verbose)
+        log.write("  -Significance level: {:.3e}".format(sig_level), verbose=verbose)
         # Skol, A. D., Scott, L. J., Abecasis, G. R., & Boehnke, M. (2006). Joint analysis is more efficient than replication-based analysis for two-stage genome-wide association studies. Nature genetics, 38(2), 209-213.
         aaf = daf**2
         abf = 2 * (daf) * (1 - daf)
@@ -56,11 +55,11 @@ def get_power(
                 # https://jamanetwork.com/journals/jama/fullarticle/188182
             if or_to_rr ==False:
-                if verbose: log.write(" -Alogorithm: Skol, Andrew D., et al. Nature genetics 38.2 (2006): 209-213....")
-                if verbose: log.write(" -GRR is approximated using OR. For prevalence < 10%, GRR is very similar to OR....")
+                log.write(" -Alogorithm: Skol, Andrew D., et al. Nature genetics 38.2 (2006): 209-213....", verbose=verbose)
+                log.write(" -GRR is approximated using OR. For prevalence < 10%, GRR is very similar to OR....", verbose=verbose)
             else:
-                if verbose: log.write(" -OR is converted to GRR using base prevalence: {}".format(prevalence))
-                if verbose: log.write(" -Alogorithm: Zhang, J., & Kai, F. Y. (1998). What's the relative risk?: A method of correcting the odds ratio in cohort studies of common outcomes. Jama, 280(19), 1690-1691.....")
+                log.write(" -OR is converted to GRR using base prevalence: {}".format(prevalence), verbose=verbose)
+                log.write(" -Alogorithm: Zhang, J., & Kai, F. Y. (1998). What's the relative risk?: A method of correcting the odds ratio in cohort studies of common outcomes. Jama, 280(19), 1690-1691.....", verbose=verbose)
         # additive
         x = [ 2*genotype_rr-1, genotype_rr, 1 ]
@@ -68,19 +67,19 @@ def get_power(
         aap= x[0] * prevalence / (x[0]*aaf + x[1]*abf + x[2]*bbf)
         abp= x[1] * prevalence / (x[0]*aaf + x[1]*abf + x[2]*bbf)
         bbp= x[2] * prevalence / (x[0]*aaf + x[1]*abf + x[2]*bbf)
-        if verbose: log.write("Probability of disease :")
-        if verbose: log.write(" - Individuals with AA genotype: {:.3f}".format(aap))
-        if verbose: log.write(" - Individuals with AB genotype: {:.3f}".format(abp))
-        if verbose: log.write(" - Individuals with BB genotype: {:.3f}".format(bbp))
+        log.write("Probability of disease :", verbose=verbose)
+        log.write(" - Individuals with AA genotype: {:.3f}".format(aap), verbose=verbose)
+        log.write(" - Individuals with AB genotype: {:.3f}".format(abp), verbose=verbose)
+        log.write(" - Individuals with BB genotype: {:.3f}".format(bbp), verbose=verbose)
         pcase= (aap * aaf + abp * abf*0.5) / prevalence
         pcontrol=((1-aap )* aaf + (1-abp )* abf*0.5) / (1 - prevalence)
         vcase = pcase *(1-pcase)
         vcontrol =pcontrol *(1-pcontrol)
-        if verbose: log.write("Expected risk allele frequency:")
-        if verbose: log.write(" - In cases: {:.3f}".format(pcase))
-        if verbose: log.write(" - In controls: {:.3f}".format(pcontrol))
+        log.write("Expected risk allele frequency:", verbose=verbose)
+        log.write(" - In cases: {:.3f}".format(pcase), verbose=verbose)
+        log.write(" - In controls: {:.3f}".format(pcontrol), verbose=verbose)
         num= (pcase - pcontrol)
         den= np.sqrt( (vcase/ncase +  vcontrol/ncontrol)*0.5 )
@@ -88,22 +87,22 @@ def get_power(
         c = ss.norm.isf(sig_level/2)
         power = 1 - ss.norm.cdf(c-u) + ss.norm.cdf(-c-u)
-        if verbose: log.write("Expected power: {:.3f}".format(power))
+        log.write("Expected power: {:.3f}".format(power), verbose=verbose)
     elif mode=="q":
         if beta is None:
             beta = 0.1
-        if verbose:
-            log.write(" -Input settings (q mode):")
-            log.write("  -Significance level: {}".format(sig_level))
-            log.write("  -EAF: {}".format(eaf))
-            log.write("  -BETA: {}".format(beta))
-            log.write("  -N: {}".format(n))
-            log.write("  -SNPR2: {}".format(2*eaf*(1-eaf)*(beta**2)))
+        log.write(" -Input settings (q mode):", verbose=verbose)
+        log.write("  -Significance level: {}".format(sig_level), verbose=verbose)
+        log.write("  -EAF: {}".format(eaf), verbose=verbose)
+        log.write("  -BETA: {}".format(beta), verbose=verbose)
+        log.write("  -N: {}".format(n), verbose=verbose)
+        log.write("  -SNPR2: {}".format(2*eaf*(1-eaf)*(beta**2)), verbose=verbose)
         c = ss.chi2.isf(sig_level,df=1)
         NCP = n * 2*eaf*(1-eaf)*(beta**2)/vary
         power = 1 - ss.ncx2.cdf(c, df=1, nc=NCP)
-    if verbose: log.write("Finished calculating statistical power.")
+    log.write("Finished calculating statistical power.", verbose=verbose)
     return power
 def get_beta(
@@ -137,11 +136,11 @@ def get_beta(
             eafs = np.linspace(eaf_range[1],eaf_range[0],n_matrix)
             betas =  np.linspace(beta_range[0],beta_range[1],n_matrix)
-            if verbose: log.write(" -Updating eaf-beta matrix...")
+            log.write(" -Updating eaf-beta matrix...", verbose=verbose)
             for i in range(n_matrix):
                     eaf_beta_matrix[i,] = calculate_power_single(beta=betas,eaf=eafs[i],n=n,sig_level=sig_level,vary=vary)
-            if verbose: log.write(" -Extracting eaf-beta combinations with power = {}...".format(t))
+            log.write(" -Extracting eaf-beta combinations with power = {}...".format(t), verbose=verbose)
             i,j=1,1
             eaf_beta = []
             while i<n_matrix-1 and j<n_matrix-1:
@@ -207,11 +206,11 @@ def get_beta_binary(
         eafs = np.linspace(eaf_range[1],eaf_range[0],n_matrix)
         betas =  np.linspace(beta_range[0],beta_range[1],n_matrix)
-        if verbose: log.write(" -Updating eaf-beta matrix...")
+        log.write(" -Updating eaf-beta matrix...", verbose=verbose)
         if or_to_rr ==False:
-            if verbose: log.write(" -GRR is approximated using OR. For prevalence < 10%, GRR is very similar to OR....")
+            log.write(" -GRR is approximated using OR. For prevalence < 10%, GRR is very similar to OR....", verbose=verbose)
         else:
-            if verbose: log.write(" -OR is converted to GRR using base prevalence: {}".format(prevalence))
+            log.write(" -OR is converted to GRR using base prevalence: {}".format(prevalence), verbose=verbose)
         for i in range(n_matrix):
                 eaf_beta_matrix[i,] = calculate_power_single(beta=betas,
@@ -222,7 +221,7 @@ def get_beta_binary(
                                                                 sig_level=sig_level,
                                                                 or_to_rr=or_to_rr)
-        if verbose: log.write(" -Extracting eaf-beta combinations with power = {}...".format(t))
+        log.write(" -Extracting eaf-beta combinations with power = {}...".format(t), verbose=verbose)
         i,j=1,1
         eaf_beta = []
         while i<n_matrix-1 and j<n_matrix-1:

gwaslab/util_in_convert_h2.py CHANGED Viewed

@@ -65,7 +65,7 @@ def _get_per_snp_r2(sumstats,
            adjuested=False,
            verbose=True):
     # Pierce, B. L., Ahsan, H., & VanderWeele, T. J. (2011). Power and instrument strength requirements for Mendelian randomization studies using multiple genetic variants. International journal of epidemiology, 40(3), 740-752.
-    if verbose: log.write("Start to calculate per-SNP heritibility...")
+    log.write("Start to calculate per-SNP heritibility...", verbose=verbose)
     if type(k) is int or type(k) is float:
        pass
     elif k =="all":
@@ -81,18 +81,18 @@ def _get_per_snp_r2(sumstats,
             # Var(e) = betase**2 * 2 * N * MAF * (1-MAF)
             # r2 = Var(beta * X) / Var(y)
-            if verbose: log.write(" -Calculating per-SNP rsq by 2 * (BETA**2) * AF * (1-AF) / Var(y)...")
+            log.write(" -Calculating per-SNP rsq by 2 * (BETA**2) * AF * (1-AF) / Var(y)...", verbose=verbose)
             sumstats["_VAR(BETAX)"] = 2*(sumstats[beta]**2)*sumstats[af]*(1-sumstats[af])
             if type(vary) is int or type(vary) is float:
-                if verbose: log.write(" -Var(y) is provided: {}...".format(vary))
+                log.write(" -Var(y) is provided: {}...".format(vary), verbose=verbose)
                 sumstats["SNPR2"] = sumstats["_VAR(BETAX)"] / vary
             elif vary=="se":
-                if verbose: log.write(" -Var(y) is estimated from VAR(BETA * X), N, MAF, SE: {}...".format(vary))
+                log.write(" -Var(y) is estimated from VAR(BETA * X), N, MAF, SE: {}...".format(vary), verbose=verbose)
                 sumstats["_SIGMA2"] = sumstats[se]**2 * 2*(sumstats[n])*sumstats[af]*(1-sumstats[af])
                 sumstats["SNPR2"] = sumstats["_VAR(BETAX)"] / (sumstats["_SIGMA2"] + sumstats["_VAR(BETAX)"])
         else:
-            if verbose: log.write(" -Warning: Not enough informationfor calculation.")
+            log.warning("Not enough information for calculation.")
     if mode=="b":
         if ncase not in sumstats.columns:
@@ -117,11 +117,11 @@ def _get_per_snp_r2(sumstats,
     else:
         snpr2 = "SNPR2"
     if n in sumstats.columns:
-        if verbose: log.write(" -Calculating F-statistic: F = [(N-k-1)/k] * (r2/1-r2)... where k = {}".format(k))
-        if verbose: log.write(" -For r2, {} is used.".format(snpr2))
+        log.write(" -Calculating F-statistic: F = [(N-k-1)/k] * (r2/1-r2)... where k = {}".format(k), verbose=verbose)
+        log.write(" -For r2, {} is used.".format(snpr2), verbose=verbose)
         sumstats["F"] = sumstats[snpr2]*(sumstats[n]-1 -k)/((1-sumstats[snpr2]) * k)
-    if verbose: log.write("Finished calculating per-SNP heritability!")
+    log.write("Finished calculating per-SNP heritability!", verbose=verbose)
     return sumstats
 #
 def get_population_allele_frequency(af, prop, odds_ratio, prevalence,eps=1e-15):

gwaslab 3.4.38__py3-none-any.whl → 3.4.39__py3-none-any.whl

Potentially problematic release.

gwaslab 3.4.38py3-none-any.whl → 3.4.39py3-none-any.whl