PyPI - gwaslab - Versions diffs - 3.4.37__py3-none-any.whl → 3.4.39__py3-none-any.whl - Mend

gwaslab 3.4.37py3-none-any.whl → 3.4.39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (57) hide show

gwaslab/bd_common_data.py +6 -3
gwaslab/bd_download.py +9 -9
gwaslab/bd_get_hapmap3.py +43 -9
gwaslab/data/formatbook.json +722 -721
gwaslab/g_Log.py +22 -5
gwaslab/g_Sumstats.py +110 -163
gwaslab/g_SumstatsPair.py +76 -25
gwaslab/g_SumstatsT.py +2 -2
gwaslab/g_Sumstats_summary.py +3 -3
gwaslab/g_version.py +10 -10
gwaslab/hm_casting.py +36 -17
gwaslab/hm_harmonize_sumstats.py +354 -221
gwaslab/hm_rsid_to_chrpos.py +1 -1
gwaslab/io_preformat_input.py +49 -43
gwaslab/io_read_ldsc.py +49 -1
gwaslab/io_to_formats.py +428 -295
gwaslab/ldsc_irwls.py +198 -0
gwaslab/ldsc_jackknife.py +514 -0
gwaslab/ldsc_ldscore.py +417 -0
gwaslab/ldsc_parse.py +294 -0
gwaslab/ldsc_regressions.py +747 -0
gwaslab/ldsc_sumstats.py +629 -0
gwaslab/qc_check_datatype.py +3 -3
gwaslab/qc_fix_sumstats.py +891 -778
gwaslab/util_ex_calculate_ldmatrix.py +31 -13
gwaslab/util_ex_gwascatalog.py +25 -25
gwaslab/util_ex_ldproxyfinder.py +10 -10
gwaslab/util_ex_ldsc.py +189 -0
gwaslab/util_ex_process_ref.py +3 -3
gwaslab/util_ex_run_coloc.py +26 -4
gwaslab/util_in_calculate_gc.py +6 -6
gwaslab/util_in_calculate_power.py +42 -43
gwaslab/util_in_convert_h2.py +8 -8
gwaslab/util_in_fill_data.py +30 -30
gwaslab/util_in_filter_value.py +201 -74
gwaslab/util_in_get_density.py +10 -10
gwaslab/util_in_get_sig.py +445 -71
gwaslab/viz_aux_annotate_plot.py +12 -12
gwaslab/viz_aux_quickfix.py +42 -37
gwaslab/viz_aux_reposition_text.py +10 -7
gwaslab/viz_aux_save_figure.py +18 -8
gwaslab/viz_plot_compare_af.py +32 -33
gwaslab/viz_plot_compare_effect.py +63 -71
gwaslab/viz_plot_miamiplot2.py +34 -26
gwaslab/viz_plot_mqqplot.py +126 -75
gwaslab/viz_plot_qqplot.py +11 -8
gwaslab/viz_plot_regionalplot.py +36 -33
gwaslab/viz_plot_rg_heatmap.py +28 -26
gwaslab/viz_plot_stackedregional.py +40 -21
gwaslab/viz_plot_trumpetplot.py +65 -61
gwaslab-3.4.39.dist-info/LICENSE +674 -0
{gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/METADATA +5 -4
gwaslab-3.4.39.dist-info/RECORD +80 -0
gwaslab-3.4.37.dist-info/RECORD +0 -72
/gwaslab-3.4.37.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
{gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
{gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0

gwaslab/g_Sumstats_summary.py CHANGED Viewed

@@ -15,7 +15,7 @@ def summarize(insumstats,
     for i in [snpid,rsid,eaf,p,n,status]:
         if i in insumstats.columns:
             cols.append(i)
-    sumstats= insumstats.loc[:,cols].copy()
+    sumstats= insumstats[cols].copy()
     ###############################################################################
     numeric_cols=[]
     output = {}
@@ -68,7 +68,7 @@ def summarize(insumstats,
         sumstats.drop(columns='uniq_index',inplace=True)
         status_dic = {}
         for index,row in status_summary.iterrows():
-            status_dic[str(index)]=row[0]
+            status_dic[str(index)]=row.iloc[0]
         output["STATUS"]=status_dic
         numeric_cols.append("STATUS")
     df = pd.DataFrame.from_dict({(i,j): output[i][j]
@@ -84,7 +84,7 @@ def summarize(insumstats,
     return df
 def sum_status(id_to_use, sumstats):
-        results = sumstats.groupby("STATUS").count()
+        results = sumstats.groupby("STATUS",observed=True).count()
         results = results.loc[results[id_to_use]>0,:].sort_values(id_to_use,ascending=False)
         return results

gwaslab/g_version.py CHANGED Viewed

@@ -3,10 +3,10 @@ import subprocess
 import os
 import numpy as np
-def _show_version(log=Log()):
+def _show_version(log=Log(), verbose=True):
     # show version when loading sumstats
-    log.write("GWASLab v{} https://cloufield.github.io/gwaslab/".format(gwaslab_info()["version"]))
-    log.write("(C) 2022-2024, Yunye He, Kamatani Lab, MIT License, gwaslab@gmail.com")
+    log.write("GWASLab v{} https://cloufield.github.io/gwaslab/".format(gwaslab_info()["version"]),verbose=verbose)
+    log.write("(C) 2022-2024, Yunye He, Kamatani Lab, MIT License, gwaslab@gmail.com",verbose=verbose)
 def _get_version():
     # return short version string like v3.4.33
@@ -15,12 +15,12 @@ def _get_version():
 def gwaslab_info():
     # version meta information
     dic={
-   "version":"3.4.37",
-   "release_date":"20240129"
+   "version":"3.4.39",
+   "release_date":"20240210"
     }
     return dic
-def _checking_plink_version(v=2,log=Log()):
+def _checking_plink_version(v=2,log=Log(), verbose=True):
     if v==1:
         which_plink_script = "plink --version"
     elif v==2:
@@ -29,19 +29,19 @@ def _checking_plink_version(v=2,log=Log()):
     log.write("   -PLINK version: {}".format(output.strip()))
     return log
-def _checking_r_version(r, log):
+def _checking_r_version(r, log=Log(), verbose=True):
     which_r_script = "{} --version".format(r)
     output = subprocess.check_output(which_r_script, stderr=subprocess.STDOUT, shell=True,text=True)
-    log.write(" -R version: {}".format(output.strip()))
+    log.write(" -R version: {}".format(output.strip()),verbose=verbose)
     return log
-def _check_susie_version(r,log):
+def _check_susie_version(r,log=Log(), verbose=True):
     rscript = 'print(packageVersion("susieR"))'
     temp_r = "_gwaslab_susie_temp_check_version_{}.R".format(np.random.randint(1, 99999999))
     with open(temp_r,"w") as file:
         file.write(rscript)
     which_susie_script = "{} {}".format(r, temp_r)
     output = subprocess.check_output(which_susie_script, stderr=subprocess.STDOUT, shell=True,text=True)
-    log.write(" -SuSieR version: {}".format(output.strip()))
+    log.write(" -SuSieR version: {}".format(output.strip()),verbose=verbose)
     os.remove(temp_r)
     return log

gwaslab/hm_casting.py CHANGED Viewed

@@ -5,18 +5,24 @@ from pandas.api.types import CategoricalDtype
 from gwaslab.g_vchange_status import copy_status
 from gwaslab.g_vchange_status import vchange_status
 from gwaslab.qc_fix_sumstats import flipallelestats
+from gwaslab.qc_check_datatype import check_datatype
+from gwaslab.qc_fix_sumstats import start_to
 from gwaslab.util_in_fill_data import filldata
 from Bio import SeqIO
 from itertools import combinations
-def _merge_mold_with_sumstats(mold, sumstats, ref_path=None, windowsizeb=10, log=Log(),suffixes=("_MOLD",""),verbose=True,return_not_matched_mold =False):
+def _merge_mold_with_sumstats_by_chrpos(mold, sumstats, ref_path=None, windowsizeb=10, log=Log(),suffixes=("_MOLD",""),verbose=True,return_not_matched_mold =False):
     cols_to_drop = []
     for i in sumstats.columns:
         if i in ["SNPID","rsID"]:
             cols_to_drop.append(i)
+    log.write("Start to merge sumstats...", verbose=verbose)
     if len(cols_to_drop)>0:
-        log.write("Dropping old IDs:{}".format(cols_to_drop))
+        log.write(" -Dropping old IDs:{}".format(cols_to_drop), verbose=verbose)
         sumstats = sumstats.drop(columns=cols_to_drop)
     if ref_path is not None :
@@ -29,18 +35,20 @@ def _merge_mold_with_sumstats(mold, sumstats, ref_path=None, windowsizeb=10, log
     if return_not_matched_mold:
         mold["_IDENTIFIER_FOR_VARIANT"] = range(len(mold))
+    # mold sumffix + mold
     mold_sumstats = pd.merge(mold, sumstats, on=["CHR","POS"], how="inner",suffixes=suffixes)
-    log.write("After merging by CHR and POS:{}".format(len(mold_sumstats)))
+    log.write(" -After merging by CHR and POS:{}".format(len(mold_sumstats)), verbose=verbose)
     mold_sumstats = _keep_variants_with_same_allele_set(mold_sumstats,suffixes=suffixes)
-    log.write("Matched variants:{}".format(len(mold_sumstats)))
+    log.write(" -Matched variants:{}".format(len(mold_sumstats)), verbose=verbose)
-    if ref_path is not None:
-        # match removed sumstats
-        mold_removed = mold.loc[~mold[index1].isin(mold_sumstats[index1]),:]
-        iron_removed = sumstats.loc[~sumstats[index2].isin(mold_sumstats[index2]),:]
-        _match_two_sumstats(mold_removed,iron_removed,ref_path,windowsizeb=windowsizeb)
-        mold_sumstats.drop(columns=["_INDEX",""])
+    #if ref_path is not None:
+    #    # match removed sumstats
+    #    mold_removed = mold.loc[~mold[index1].isin(mold_sumstats[index1]),:]
+    #    iron_removed = sumstats.loc[~sumstats[index2].isin(mold_sumstats[index2]),:]
+    #    _match_two_sumstats(mold_removed,iron_removed,ref_path,windowsizeb=windowsizeb)
+    #    mold_sumstats.drop(columns=["_INDEX",""])
     if return_not_matched_mold == True:
         sumstats1 = mold.loc[~mold["_IDENTIFIER_FOR_VARIANT"].isin(mold_sumstats["_IDENTIFIER_FOR_VARIANT"]),:]
@@ -59,14 +67,17 @@ def _keep_variants_with_same_allele_set(sumstats, log=Log(),verbose=True,suffixe
     all_alleles = set(list(sumstats[ea1].unique())+list(sumstats[nea1].unique())+list(sumstats[ea2].unique())+list(sumstats[nea2].unique()))
     allele_type = CategoricalDtype(categories=all_alleles, ordered=False)
-    sumstats.loc[:, [nea1,ea1,nea2,ea2]] = sumstats.loc[:, [nea1,ea1,nea2,ea2]].astype(allele_type)
+    sumstats[[nea1,ea1,nea2,ea2]] = sumstats[[nea1,ea1,nea2,ea2]].astype(allele_type)
     is_perfect_match = (sumstats[ea2] == sumstats[ea1]) & (sumstats[nea2] == sumstats[nea1])
     is_flipped_match = (sumstats[ea2] == sumstats[nea1]) & (sumstats[nea2] == sumstats[ea1])
     is_allele_set_match = is_flipped_match | is_perfect_match
-    sumstats.loc[~is_allele_set_match,:]
+    log.write(" -Matching alleles and keeping only variants with same allele set: ", verbose=verbose)
+    log.write("  -Perfect match: {}".format(sum(is_perfect_match)), verbose=verbose)
+    log.write("  -Flipped match: {}".format(sum(is_flipped_match)), verbose=verbose)
+    log.write("  -Unmatched : {}".format(sum(~is_allele_set_match)), verbose=verbose)
     return sumstats.loc[is_allele_set_match,:]
 def _align_with_mold(sumstats, log=Log(),verbose=True, suffixes=("_MOLD","")):
@@ -77,10 +88,18 @@ def _align_with_mold(sumstats, log=Log(),verbose=True, suffixes=("_MOLD","")):
     nea2="NEA"+suffixes[1]
     status1="STATUS"+suffixes[0]
     status2="STATUS"+suffixes[1]
     is_perfect_match = (sumstats[ea2] == sumstats[ea1]) & (sumstats[nea2] == sumstats[nea1])
     is_flipped_match = (sumstats[ea2] == sumstats[nea1]) & (sumstats[nea2] == sumstats[ea1])
+    log.write(" -Aligning alleles with reference: ", verbose=verbose)
+    log.write("  -Perfect match: {}".format(sum(is_perfect_match)), verbose=verbose)
+    log.write("  -Flipped match: {}".format(sum(is_flipped_match)), verbose=verbose)
+    log.write("  -For perfect match: copy STATUS from reference...", verbose=verbose)
     sumstats.loc[is_perfect_match,status2] = copy_status(sumstats.loc[is_perfect_match,status1], sumstats.loc[is_perfect_match,status2],6)
+    log.write("  -For Flipped match: convert STATUS xxxxx[456789]x to xxxxx3x...", verbose=verbose)
     sumstats.loc[is_flipped_match,status2] = vchange_status(sumstats.loc[is_flipped_match,status2],6,"456789","333333")
     return sumstats
@@ -119,9 +138,9 @@ def _sort_pair_cols(molded_sumstats, verbose=True, log=Log(), order=None, stats_
         if i not in order:
             output_columns.append(i)
-    if verbose: log.write(" -Reordering columns to    :", ",".join(output_columns))
-    molded_sumstats = molded_sumstats.loc[:, output_columns]
-    if verbose: log.write("Finished sorting columns successfully!")
+    log.write(" -Reordering columns to    :", ",".join(output_columns), verbose=verbose)
+    molded_sumstats = molded_sumstats[ output_columns]
+    log.write("Finished sorting columns successfully!", verbose=verbose)
     return molded_sumstats
@@ -154,7 +173,7 @@ def _match_two_sumstats(mold,sumstats,ref_path,windowsizeb=25,verbose=True,log=L
             record_chr = int(str(record.id).strip("chrCHR").upper())
             if record_chr in chromlist:
-                if verbose:  log.write(record_chr," ", end="",show_time=False)
+                log.write(record_chr," ", end="",show_time=False,verbose=verbose)
                 chromlist.remove(record_chr)
             else:
                 continue

gwaslab 3.4.37__py3-none-any.whl → 3.4.39__py3-none-any.whl

Potentially problematic release.

gwaslab 3.4.37py3-none-any.whl → 3.4.39py3-none-any.whl