PyPI - gwaslab - Versions diffs - 3.4.37__py3-none-any.whl → 3.4.39__py3-none-any.whl - Mend

gwaslab 3.4.37py3-none-any.whl → 3.4.39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (57) hide show

gwaslab/bd_common_data.py +6 -3
gwaslab/bd_download.py +9 -9
gwaslab/bd_get_hapmap3.py +43 -9
gwaslab/data/formatbook.json +722 -721
gwaslab/g_Log.py +22 -5
gwaslab/g_Sumstats.py +110 -163
gwaslab/g_SumstatsPair.py +76 -25
gwaslab/g_SumstatsT.py +2 -2
gwaslab/g_Sumstats_summary.py +3 -3
gwaslab/g_version.py +10 -10
gwaslab/hm_casting.py +36 -17
gwaslab/hm_harmonize_sumstats.py +354 -221
gwaslab/hm_rsid_to_chrpos.py +1 -1
gwaslab/io_preformat_input.py +49 -43
gwaslab/io_read_ldsc.py +49 -1
gwaslab/io_to_formats.py +428 -295
gwaslab/ldsc_irwls.py +198 -0
gwaslab/ldsc_jackknife.py +514 -0
gwaslab/ldsc_ldscore.py +417 -0
gwaslab/ldsc_parse.py +294 -0
gwaslab/ldsc_regressions.py +747 -0
gwaslab/ldsc_sumstats.py +629 -0
gwaslab/qc_check_datatype.py +3 -3
gwaslab/qc_fix_sumstats.py +891 -778
gwaslab/util_ex_calculate_ldmatrix.py +31 -13
gwaslab/util_ex_gwascatalog.py +25 -25
gwaslab/util_ex_ldproxyfinder.py +10 -10
gwaslab/util_ex_ldsc.py +189 -0
gwaslab/util_ex_process_ref.py +3 -3
gwaslab/util_ex_run_coloc.py +26 -4
gwaslab/util_in_calculate_gc.py +6 -6
gwaslab/util_in_calculate_power.py +42 -43
gwaslab/util_in_convert_h2.py +8 -8
gwaslab/util_in_fill_data.py +30 -30
gwaslab/util_in_filter_value.py +201 -74
gwaslab/util_in_get_density.py +10 -10
gwaslab/util_in_get_sig.py +445 -71
gwaslab/viz_aux_annotate_plot.py +12 -12
gwaslab/viz_aux_quickfix.py +42 -37
gwaslab/viz_aux_reposition_text.py +10 -7
gwaslab/viz_aux_save_figure.py +18 -8
gwaslab/viz_plot_compare_af.py +32 -33
gwaslab/viz_plot_compare_effect.py +63 -71
gwaslab/viz_plot_miamiplot2.py +34 -26
gwaslab/viz_plot_mqqplot.py +126 -75
gwaslab/viz_plot_qqplot.py +11 -8
gwaslab/viz_plot_regionalplot.py +36 -33
gwaslab/viz_plot_rg_heatmap.py +28 -26
gwaslab/viz_plot_stackedregional.py +40 -21
gwaslab/viz_plot_trumpetplot.py +65 -61
gwaslab-3.4.39.dist-info/LICENSE +674 -0
{gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/METADATA +5 -4
gwaslab-3.4.39.dist-info/RECORD +80 -0
gwaslab-3.4.37.dist-info/RECORD +0 -72
/gwaslab-3.4.37.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
{gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
{gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0

gwaslab/bd_common_data.py CHANGED Viewed

@@ -280,17 +280,20 @@ def gtf_to_protein_coding(gtfpath,log=Log(),verbose=True):
     protein_coding_path = gtfpath[:-6]+"protein_coding.gtf.gz"
     # if not existing, extract protein coding records and output to a new file
     if not path.isfile(protein_coding_path):
         # get gene list
-        if verbose: log.write(" - Extracting protein_coding genes from {}".format(gtfpath))
+        log.write(" - Extracting protein_coding genes from {}".format(gtfpath),verbose=verbose)
         gtf = read_gtf(gtfpath,usecols=["feature","gene_biotype","gene_id","gene_name"])
         gene_list = gtf.loc[(gtf["feature"]=="gene") & (gtf["gene_biotype"]=="protein_coding"),"gene_id"].values
-        if verbose: log.write(" - Loaded {} protein_coding genes.".format(len(gene_list)))
+        log.write(" - Loaded {} protein_coding genes.".format(len(gene_list)),verbose=verbose)
         # extract entry using csv
         gtf_raw = pd.read_csv(gtfpath,sep="\t",header=None,comment="#",dtype="string")
         gtf_raw["_gene_id"] = gtf_raw[8].str.extract(r'gene_id "([\w\.-]+)"')
         gtf_raw = gtf_raw.loc[ gtf_raw["_gene_id"].isin(gene_list) ,:]
         gtf_raw = gtf_raw.drop("_gene_id",axis=1)
-        if verbose: log.write(" - Extracted records are saved to : {} ".format(protein_coding_path))
+        log.write(" - Extracted records are saved to : {} ".format(protein_coding_path),verbose=verbose)
         gtf_raw.to_csv(protein_coding_path, header=None, index=None, sep="\t")
     return protein_coding_path

gwaslab/bd_download.py CHANGED Viewed

@@ -106,7 +106,7 @@ def check_available_ref(log=Log(),verbose=True):
     Check available reference files for gwaslab.
     Return a dictionary of available reference files.
     '''
-    if verbose : log.write("Start to check available reference files...")
+    log.write("Start to check available reference files...", verbose=verbose)
     #ref_path = path.dirname(__file__) + '/data/reference.json'
     ref_path = options.paths["reference"]
     if not path.exists(ref_path):
@@ -115,11 +115,11 @@ def check_available_ref(log=Log(),verbose=True):
     dicts = json.load(open(ref_path))
     if dicts is not None:
         for key,value in dicts.items():
-            if verbose :log.write(" -",key," : ",value)
+            log.write(" -",key," : ",value, verbose=verbose)
         return dicts
     else:
-        if verbose :log.write(" -No available reference files.")
-    if verbose :log.write("Finished checking available reference files...")
+        log.write(" -No available reference files.", verbose=verbose)
+    log.write("Finished checking available reference files...", verbose=verbose)
     return {}
 def update_available_ref(log=Log()):
@@ -167,8 +167,8 @@ def get_path(name,log=Log(),verbose=True):
     #config_path =  path.dirname(__file__) + '/data/config.json'
     config_path = options.paths["config"]
     if not path.exists(config_path):
-        if verbose : log.write("Config file not exists...")
-        if verbose : log.write("Created new config file...")
+        log.write("Config file not exists...", verbose=verbose)
+        log.write("Created new config file...", verbose=verbose)
         initiate_config()
     else:
         try:
@@ -176,9 +176,9 @@ def get_path(name,log=Log(),verbose=True):
             if path.exists(dicts[name]):
                 return dicts[name]
             else:
-                if verbose : log.write("File not exist.")
+                log.write("File not exist.", verbose=verbose)
         except:
-            if verbose : log.write("No records in config file. Please download first.")
+            log.write("No records in config file. Please download first.", verbose=verbose)
     return False
 ##################################################################################
@@ -277,7 +277,7 @@ def check_file_integrity(local_path, md5sum,log):
         log.write(" -MD5 verified.")
         return 1
     else:
-        log.write("WARNING: -MD5 VERIFICATION FAILED !")
+        log.warning("-MD5 VERIFICATION FAILED!")
         return 0
 def remove_file(name,log=Log()):

gwaslab/bd_get_hapmap3.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import pandas as pd
 from os import path
 from gwaslab.g_Log import Log
+from gwaslab.qc_fix_sumstats import start_to
+from gwaslab.qc_fix_sumstats import skipped
+from gwaslab.qc_fix_sumstats import finished
 #A unique identifier (e.g., the rs number)
 #Allele 1 (effect allele)
 #Allele 2 (non-effect allele)
@@ -8,30 +12,60 @@ from gwaslab.g_Log import Log
 #A P-value
 #A signed summary statistic (beta, OR, log odds, Z-score, etc)
-def gethapmap3(sumstats,rsid="rsID",chrom="CHR", pos="POS", ea="EA", nea="NEA",build="19", verbose=True,log=Log()):
-    if verbose:log.write(" -Processing "+str(len(sumstats))+" raw variants...")
+def gethapmap3(sumstats,rsid="rsID",chrom="CHR", pos="POS", ea="EA", nea="NEA",build="19", verbose=True, match_allele= True, log=Log()):
+    ##start function with col checking##########################################################
+    _start_line = "extract HapMap3 SNPs"
+    _end_line = "extracting HapMap3 SNPs"
+    _start_cols =[]
+    _start_function = ".gethapmap3"
+    _must_args ={}
+    is_enough_info = start_to(sumstats=sumstats,
+                            log=log,
+                            verbose=verbose,
+                            start_line=_start_line,
+                            end_line=_end_line,
+                            start_cols=_start_cols,
+                            start_function=_start_function,
+                            **_must_args)
+    if is_enough_info == False: return None
+    ############################################################################################
     if build=="19":
         data_path =  path.dirname(__file__) + '/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz'
     elif build=="38":
         data_path =  path.dirname(__file__) + '/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz'
-    if verbose:log.write(" -Loading Hapmap3 variants data...")
-    hapmap3_ref = pd.read_csv(data_path,sep="\s+",usecols=["#CHROM","POS","rsid"],dtype={"#CHROM":"string","POS":"string"})
+    log.write(" -Loading Hapmap3 variants from built-in datasets...", verbose=verbose)
+    if match_allele:
+        additional_cols= ["A1","A2"]
+    else:
+        additional_cols=[]
+    hapmap3_ref = pd.read_csv(data_path,sep="\s+",usecols=["#CHROM","POS","rsid"]+additional_cols, dtype={"#CHROM":"string","POS":"string"})
     #rsid    A1      A2      #CHROM  POS
     #rs3094315       G       A       1       752566
     if rsid in sumstats.columns:
         output = sumstats.loc[sumstats[rsid].isin(hapmap3_ref["rsid"].values),:].copy()
         return output
     elif chrom in sumstats.columns and pos in sumstats.columns:
-        if verbose: log.write(" -Since rsID not in sumstats, chr:pos( build "+build+") will be used for matching...")
+        log.write(" -Since rsID not in sumstats, CHR:POS( build "+build+") will be used for matching...", verbose=verbose)
         sumstats   ["chr:pos"] = sumstats[chrom].astype("string")+":"+sumstats[pos].astype("string")
         hapmap3_ref["chr:pos"] = hapmap3_ref["#CHROM"]+":"+hapmap3_ref["POS"]
         hapmap3_ref = hapmap3_ref.rename(columns={"rsid":"rsID"})
-        output = pd.merge(sumstats,hapmap3_ref.loc[:,["chr:pos","rsID"]],left_on="chr:pos",right_on="chr:pos",how="inner",suffixes=('', '_hapmap3')).copy()
-        output = output.drop(columns="chr:pos")
-        if verbose: log.write(" -Raw input contains "+str(len(output))+" hapmaps variants based on chr:pos...")
+        output = pd.merge(sumstats,hapmap3_ref.loc[:,["chr:pos","rsID"]+additional_cols],left_on="chr:pos",right_on="chr:pos",how="inner",suffixes=('', '_hapmap3')).copy()
+        if match_allele:
+            log.write(" -Checking if alleles are same...")
+            is_matched = ((output[ea].astype("string") == output["A1"]) & (output[nea].astype("string") == output["A2"])) \
+                            | ((output[ea].astype("string") == output["A2"]) & (output[nea].astype("string") == output["A1"]))
+            log.write(" -Variants with macthed alleles: {}".format(sum(is_matched)))
+            output = output.loc[is_matched,:]
+        output = output.drop(columns=["chr:pos"]+additional_cols)
+        log.write(" -Raw input contains "+str(len(output))+" Hapmap3 variants based on CHR:POS...", verbose=verbose)
+        finished(log=log,verbose=verbose,end_line=_end_line)
         return output
     else:
         raise ValueError("Not enough information to match SNPs. Please check your sumstats...")

gwaslab 3.4.37__py3-none-any.whl → 3.4.39__py3-none-any.whl

Potentially problematic release.

gwaslab 3.4.37py3-none-any.whl → 3.4.39py3-none-any.whl