PyPI - gwaslab - Versions diffs - 3.4.37__py3-none-any.whl → 3.4.39__py3-none-any.whl - Mend

gwaslab 3.4.37py3-none-any.whl → 3.4.39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (57) hide show

gwaslab/bd_common_data.py +6 -3
gwaslab/bd_download.py +9 -9
gwaslab/bd_get_hapmap3.py +43 -9
gwaslab/data/formatbook.json +722 -721
gwaslab/g_Log.py +22 -5
gwaslab/g_Sumstats.py +110 -163
gwaslab/g_SumstatsPair.py +76 -25
gwaslab/g_SumstatsT.py +2 -2
gwaslab/g_Sumstats_summary.py +3 -3
gwaslab/g_version.py +10 -10
gwaslab/hm_casting.py +36 -17
gwaslab/hm_harmonize_sumstats.py +354 -221
gwaslab/hm_rsid_to_chrpos.py +1 -1
gwaslab/io_preformat_input.py +49 -43
gwaslab/io_read_ldsc.py +49 -1
gwaslab/io_to_formats.py +428 -295
gwaslab/ldsc_irwls.py +198 -0
gwaslab/ldsc_jackknife.py +514 -0
gwaslab/ldsc_ldscore.py +417 -0
gwaslab/ldsc_parse.py +294 -0
gwaslab/ldsc_regressions.py +747 -0
gwaslab/ldsc_sumstats.py +629 -0
gwaslab/qc_check_datatype.py +3 -3
gwaslab/qc_fix_sumstats.py +891 -778
gwaslab/util_ex_calculate_ldmatrix.py +31 -13
gwaslab/util_ex_gwascatalog.py +25 -25
gwaslab/util_ex_ldproxyfinder.py +10 -10
gwaslab/util_ex_ldsc.py +189 -0
gwaslab/util_ex_process_ref.py +3 -3
gwaslab/util_ex_run_coloc.py +26 -4
gwaslab/util_in_calculate_gc.py +6 -6
gwaslab/util_in_calculate_power.py +42 -43
gwaslab/util_in_convert_h2.py +8 -8
gwaslab/util_in_fill_data.py +30 -30
gwaslab/util_in_filter_value.py +201 -74
gwaslab/util_in_get_density.py +10 -10
gwaslab/util_in_get_sig.py +445 -71
gwaslab/viz_aux_annotate_plot.py +12 -12
gwaslab/viz_aux_quickfix.py +42 -37
gwaslab/viz_aux_reposition_text.py +10 -7
gwaslab/viz_aux_save_figure.py +18 -8
gwaslab/viz_plot_compare_af.py +32 -33
gwaslab/viz_plot_compare_effect.py +63 -71
gwaslab/viz_plot_miamiplot2.py +34 -26
gwaslab/viz_plot_mqqplot.py +126 -75
gwaslab/viz_plot_qqplot.py +11 -8
gwaslab/viz_plot_regionalplot.py +36 -33
gwaslab/viz_plot_rg_heatmap.py +28 -26
gwaslab/viz_plot_stackedregional.py +40 -21
gwaslab/viz_plot_trumpetplot.py +65 -61
gwaslab-3.4.39.dist-info/LICENSE +674 -0
{gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/METADATA +5 -4
gwaslab-3.4.39.dist-info/RECORD +80 -0
gwaslab-3.4.37.dist-info/RECORD +0 -72
/gwaslab-3.4.37.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
{gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
{gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0

gwaslab/viz_aux_annotate_plot.py CHANGED Viewed

@@ -58,7 +58,7 @@ def annotate_single(
                 annotation_col="CHR:POS"
         elif anno:
                 annotation_col=anno
-        if verbose: log.write(" -Annotating using column "+annotation_col+"...")
+        log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
         ## calculate y span
         if region is not None:
@@ -66,7 +66,7 @@ def annotate_single(
         else:
             y_span = sumstats["i"].max()-sumstats["i"].min()
-        if verbose: log.write(" -Adjusting text positions with repel_force={}...".format(repel_force))
+        log.write(" -Adjusting text positions with repel_force={}...".format(repel_force), verbose=verbose)
         if anno_style == "expand" :
             to_annotate.loc[:, "ADJUSTED_i"] = adjust_text_position(to_annotate["i"].values.copy(), y_span, repel_force,max_iter=anno_max_iter,log=log,amode=amode,verbose=verbose)
         ##  iterate through variants to be annotated
@@ -214,7 +214,7 @@ def annotate_single(
             anno_count +=1
         #anno_adjust_keyargs = {"arrowprops":dict(arrowstyle='->', color='grey', linewidth=0.1,relpos=(0.5,0.5))}
         if anno_adjust==True:
-            if verbose: log.write(" -Auto-adjusting text positions...")
+            log.write(" -Auto-adjusting text positions...", verbose=verbose)
             adjust_text(texts = anno_to_adjust_list,
                         autoalign=False,
                         only_move={'points':'x', 'text':'x', 'objects':'x'},
@@ -232,7 +232,7 @@ def annotate_single(
                         )
     else:
-        if verbose: log.write(" -Skip annotating")
+        log.write(" -Skip annotating", verbose=verbose)
     return ax1
@@ -275,7 +275,7 @@ def annotate_pair(
         for index,ax,to_annotate_df,anno_d, anno_alias in [(0,ax1,to_annotate1,anno_d1,anno_alias1),(1,ax5,to_annotate5,anno_d2,anno_alias2)]:
             ###################### annotate() args
             if to_annotate_df.empty is True:
-                if verbose: log.write(" -Skipping annotation...")
+                log.write(" -Skipping annotation...", verbose=verbose)
                 continue
             fontweight = "normal"
@@ -313,7 +313,7 @@ def annotate_pair(
                             annotation_col=anno
                         else:
                             annotation_col=anno+"_"+str(index+1)
-                if verbose: log.write(" -Annotating using column "+annotation_col+"...")
+                log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
                 ## calculate y span
                 if region is not None:
@@ -453,7 +453,7 @@ def annotate_pair(
                     anno_count +=1
             if anno_adjust==True:
-                if verbose: log.write(" -Auto-adjusting text positions for plot {}...".format(index))
+                log.write(" -Auto-adjusting text positions for plot {}...".format(index), verbose=verbose)
                 if index==0:
                     va="bottom"
                     ha='left'
@@ -476,7 +476,7 @@ def annotate_pair(
                             lim =anno_max_iter
                             )
     else:
-        if verbose: log.write(" -Skip annotating")
+        log.write(" -Skip annotating", verbose=verbose)
     return ax1,ax5
@@ -521,7 +521,7 @@ def annotate_subtype(
                 annotation_col="CHR:POS"
         elif anno:
                 annotation_col=anno
-        if verbose: log.write(" -Annotating using column "+annotation_col+"...")
+        log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
         ## calculate y span
         if region is not None:
@@ -529,7 +529,7 @@ def annotate_subtype(
         else:
             y_span = sumstats["i"].max()-sumstats["i"].min()
-        if verbose: log.write(" -Adjusting text positions with repel_force={}...".format(repel_force))
+        log.write(" -Adjusting text positions with repel_force={}...".format(repel_force), verbose=verbose)
         if anno_style == "expand" :
             to_annotate.loc[:, "ADJUSTED_i"] = adjust_text_position(to_annotate["i"].values.copy(), y_span, repel_force,max_iter=anno_max_iter,log=log,verbose=verbose)
         ##  iterate through variants to be annotated
@@ -656,7 +656,7 @@ def annotate_subtype(
             anno_count +=1
         #anno_adjust_keyargs = {"arrowprops":dict(arrowstyle='->', color='grey', linewidth=0.1,relpos=(0.5,0.5))}
         if anno_adjust==True:
-            if verbose: log.write(" -Auto-adjusting text positions...")
+            log.write(" -Auto-adjusting text positions...", verbose=verbose)
             adjust_text(texts = anno_to_adjust_list,
                         autoalign=False,
                         only_move={'points':'x', 'text':'x', 'objects':'x'},
@@ -674,7 +674,7 @@ def annotate_subtype(
                         )
     else:
-        if verbose: log.write(" -Skip annotating")
+        log.write(" -Skip annotating", verbose=verbose)
     return ax1

gwaslab/viz_aux_quickfix.py CHANGED Viewed

@@ -5,7 +5,7 @@ from gwaslab.bd_common_data import get_chr_to_number
 from gwaslab.bd_common_data import get_number_to_chr
 from math import ceil
-def _quick_fix(sumstats, chr_dict=get_chr_to_number(), scaled=False, chrom="CHR", pos="POS", p="P", mlog10p="MLOG10P",verbose=True, log=Log()):
+def _quick_fix(sumstats, chr_dict=get_chr_to_number(), scaled=False, chrom="CHR", pos="POS", p="P", mlog10p="MLOG10P",log=Log(), verbose=True):
     '''
     quick sanity check for input sumstats
     '''
@@ -31,39 +31,40 @@ def _quick_fix_p_value(sumstats, p="P", mlog10p="MLOG10P", scaled=False,verbose=
     '''
     if scaled==True:
         # if scaled, add scaled P and P col
-        if verbose:log.write(" -P values are already scaled...")
-        if verbose:log.write(" -Sumstats -log10(P) values are being converted to P...")
+        log.write(" -P values are already scaled...", verbose=verbose)
+        log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
         sumstats["scaled_P"] = sumstats[mlog10p].copy()
         sumstats[p]= np.power(10,-sumstats[mlog10p].astype("float64"))
         return sumstats
     # bad p : na and outside (0,1]
     bad_p_value = (sumstats[p].isna()) | (sumstats[p] > 1) | (sumstats[p] <= 0)
-    if verbose:
-        log.write(" -Sanity check after conversion: " + str(sum(bad_p_value)) +
-                  " variants with P value outside of (0,1] will be removed...")
+    log.write(" -Sanity check after conversion: " + str(sum(bad_p_value)) +
+                  " variants with P value outside of (0,1] will be removed...", verbose=verbose)
     sumstats = sumstats.loc[~bad_p_value, :]
     return sumstats
-def _quick_fix_mlog10p(sumstats,p="P", mlog10p="MLOG10P", scaled=False, verbose=True, log=Log()):
+def _quick_fix_mlog10p(insumstats,p="P", mlog10p="MLOG10P", scaled=False, log=Log(), verbose=True):
     '''
     drop variants with bad -log10(P) values
     '''
+    sumstats = insumstats.copy()
     if scaled != True:
-        if verbose:log.write(" -Sumstats P values are being converted to -log10(P)...")
+        log.write(" -Sumstats P values are being converted to -log10(P)...", verbose=verbose)
         sumstats["scaled_P"] = -np.log10(sumstats[p].astype("float64"))
     #with pd.option_context('mode.use_inf_as_na', True):
     #    is_na = sumstats["scaled_P"].isna()
     if_inf_na = np.isinf(sumstats["scaled_P"]) | sumstats["scaled_P"].isna()
-    if verbose:
-        log.write(" -Sanity check: "+str(sum(if_inf_na)) +
-                  " na/inf/-inf variants will be removed...")
+    log.write(" -Sanity check: "+str(sum(if_inf_na)) +
+                  " na/inf/-inf variants will be removed...", verbose=verbose)
     sumstats = sumstats.loc[~if_inf_na, :]
     return sumstats
-def _quick_fix_eaf(seires, verbose=True, log=Log()):
+def _quick_fix_eaf(seires,log=Log(), verbose=True):
     '''
     conversion of eaf to maf
     '''
@@ -73,7 +74,7 @@ def _quick_fix_eaf(seires, verbose=True, log=Log()):
     return seires.copy()
-def _quick_fix_chr(seires, chr_dict, verbose=True, log=Log()):
+def _quick_fix_chr(seires, chr_dict,log=Log(), verbose=True):
     '''
     conversion and check for chr
     '''
@@ -84,7 +85,7 @@ def _quick_fix_chr(seires, chr_dict, verbose=True, log=Log()):
     return seires
-def _quick_fix_pos(seires, verbose=True, log=Log()):
+def _quick_fix_pos(seires,log=Log(), verbose=True):
     '''
     force conversion for pos
     '''
@@ -92,7 +93,7 @@ def _quick_fix_pos(seires, verbose=True, log=Log()):
     return seires
-def _get_largenumber(*args, log=Log()):
+def _get_largenumber(*args,log=Log(), verbose=True):
     '''
     get a helper large number, >> max(pos)
     '''
@@ -104,11 +105,11 @@ def _get_largenumber(*args, log=Log()):
         else:
             break
         if i == 7:
-            log.write(" -Warning: max POS is too large!")
+            log.warning("Max POS is too large!")
     return large_number
-def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000, dropchrpos=False, verbose=True, log=Log()):
+def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000, dropchrpos=False,log=Log(), verbose=True):
     sumstats["TCHR+POS"] = sumstats["CHR"]*large_number + sumstats["POS"]
     sumstats["TCHR+POS"] = sumstats["TCHR+POS"].astype('Int64')
     if dropchrpos == True:
@@ -117,7 +118,7 @@ def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000,
     return sumstats
-def _quick_merge_sumstats(sumstats1, sumstats2):
+def _quick_merge_sumstats(sumstats1, sumstats2, log=Log(), verbose=True):
     merged_sumstats = pd.merge(sumstats1, sumstats2, on="TCHR+POS", how="outer", suffixes=('_1', '_2'))
     merged_sumstats["CHR"] = merged_sumstats["CHR_1"]
     merged_sumstats["POS"] = merged_sumstats["POS_1"]
@@ -126,7 +127,7 @@ def _quick_merge_sumstats(sumstats1, sumstats2):
     merged_sumstats = merged_sumstats.drop(labels=["CHR_1", "CHR_2", "POS_1", "POS_2"],axis=1)
     return merged_sumstats
-def _quick_assign_i(sumstats, chrom="CHR",pos="POS"):
+def _quick_assign_i(sumstats, chrom="CHR",pos="POS",log=Log(), verbose=True):
     # sort by CHR an POS
     sumstats = sumstats.sort_values([chrom,pos])
     # set new id
@@ -158,7 +159,7 @@ def _quick_assign_i(sumstats, chrom="CHR",pos="POS"):
     sumstats["i"] = np.floor(pd.to_numeric(sumstats["i"], errors='coerce')).astype('Int64')
     return sumstats, chrom_df
-def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos="POS",drop_chr_start=False,_posdiccul=None):
+def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos="POS",drop_chr_start=False,_posdiccul=None,log=Log(), verbose=True):
     # align all variants on a single axis (i)
     sumstats = sumstats.sort_values([chrom,pos])
     if use_rank is True:
@@ -218,7 +219,7 @@ def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos=
     sumstats["i"] = np.floor(pd.to_numeric(sumstats["i"], errors='coerce')).astype('Int64')
     return sumstats, chrom_df
-def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_level=5e-6, lower_level=5e-4):
+def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_level=5e-6, lower_level=5e-4,log=Log(), verbose=True):
     size_series = series.copy()
     size_series[:] = 1
@@ -231,7 +232,7 @@ def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_
     size_series[is_sig_level] = 4
     return size_series
-def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SNPID",chrom="CHR",pos="POS",verbose=True, log=Log()):
+def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SNPID",chrom="CHR",pos="POS",log=Log(), verbose=True):
     to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
     #assign colors: 0 is hightlight color
     for i,row in to_highlight.iterrows():
@@ -243,7 +244,7 @@ def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SN
         sumstats.loc[right_chr&up_pos&low_pos,"HUE"]="0"
     return sumstats
-def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight_windowkb, chrom="CHR",pos="POS",verbose=True, log=Log()):
+def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight_windowkb, chrom="CHR",pos="POS",log=Log(), verbose=True):
     #assign colors: 0 is hightlight color
     to_highlight1 = pd.DataFrame()
     to_highlight2 = pd.DataFrame()
@@ -271,34 +272,35 @@ def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight
             sumstats.loc[right_chr&up_pos&low_pos,"HUE2"]="0"
     return sumstats, to_highlight1, to_highlight2
-def _quick_extract_snp_in_region(sumstats, region, chrom="CHR",pos="POS",verbose=True, log=Log()):
+def _quick_extract_snp_in_region(sumstats, region, chrom="CHR",pos="POS",log=Log(), verbose=True):
     region_chr = region[0]
     region_start = region[1]
     region_end = region[2]
-    if verbose:log.write(" -Extract SNPs in region : chr"+str(region_chr)+":"+str(region[1])+"-"+str(region[2])+ "...")
+    log.write(" -Extract SNPs in region : chr"+str(region_chr)+":"+str(region[1])+"-"+str(region[2])+ "...", verbose=verbose)
     is_in_region_snp = (sumstats[chrom]==region_chr) &(sumstats[pos]<region_end) &(sumstats[pos]>region_start)
-    if verbose:log.write(" -Extract SNPs in specified regions: "+str(sum(is_in_region_snp)))
+    log.write(" -Extract SNPs in specified regions: "+str(sum(is_in_region_snp)), verbose=verbose)
     sumstats = sumstats.loc[is_in_region_snp,:]
     return sumstats
-def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose,lines_to_plot, log):
+def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_plot, log):
+    log.write(" -Converting data above cut line...",verbose=verbose)
     if ylabels is not None:
         ylabels = pd.Series(ylabels)
     maxy = series.max()
     series = series.copy()
     if "b" not in mode:
-        if verbose: log.write(" -Maximum -log10(P) value is "+str(maxy) +" .")
+        log.write(" -Maximum -log10(P) value is "+str(maxy) +" .", verbose=verbose)
     elif "b" in mode:
-        if verbose: log.write(" -Maximum DENSITY value is "+str(maxy) +" .")
+        log.write(" -Maximum DENSITY value is "+str(maxy) +" .", verbose=verbose)
     maxticker=int(np.round(series.max(skipna=True)))
     if cut:
         # auto mode : determine curline and cut factor
         if cut==True:
-            if verbose: log.write(" -Cut Auto mode is activated...")
+            log.write(" -Cut Auto mode is activated...", verbose=verbose)
             if maxy<30:
-                if verbose: log.write(" - maxy <30 , no need to cut.")
+                log.write(" - maxy <30 , no need to cut.", verbose=verbose)
                 cut=0
             else:
                 cut = 20
@@ -325,9 +327,9 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose,lines_to_plo
             else:
                 # cut linear mode
                 if "b" not in mode:
-                    if verbose: log.write(" -Minus log10(P) values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...")
+                    log.write(" -Minus log10(P) values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...", verbose=verbose)
                 else:
-                    if verbose: log.write(" -Minus DENSITY values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...")
+                    log.write(" -Minus DENSITY values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...", verbose=verbose)
                 maxticker=int(np.round(series.max(skipna=True)))
@@ -340,7 +342,7 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose,lines_to_plo
                 #sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"] = (sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"]-cut)/cutfactor +  cut
                 maxy = (maxticker-cut)/cutfactor + cut
-    if verbose: log.write("Finished data conversion and sanity check.")
     return series, maxy, maxticker, cut, cutfactor,ylabels,lines_to_plot
 #def _cut_line(level, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, log):
@@ -362,9 +364,11 @@ def _set_yticklabels(cut,
                      font_family,
                      ytick3,
                      ylabels,
-                     ylabels_converted
+                     ylabels_converted,
+                     log=Log(),
+                     verbose=True
                      ):
+    log.write(" -Processing Y tick lables...",verbose=verbose)
     # if no cut
     if cut == 0:
             ax1.set_ylim(skip, ceil(maxy*1.2) )
@@ -430,7 +434,8 @@ def _set_yticklabels(cut,
     return ax1
-def _jagged_y(cut,skip,ax1,mode,mqqratio,jagged_len,jagged_wid):
+def _jagged_y(cut,skip,ax1,mode,mqqratio,jagged_len,jagged_wid, log=Log(), verbose=True):
+    log.write(" -Processing jagged Y axis...",verbose=verbose)
     tycut = cut +0.3 #(cut - skip)/ (ax1.get_ylim()[1] - skip) + 0.002
     dy= jagged_len * (cut - skip)
     x0 =  0

gwaslab/viz_aux_reposition_text.py CHANGED Viewed

@@ -2,23 +2,25 @@ import pandas as pd
 import numpy as np
 from gwaslab.g_Log import Log
-def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode="int",log=Log(),verbose=True):
+def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode="int",log=Log(),verbose=True, min_factor=None):
     # check the number of variants to annotate
     #if repel_force>0:
     #    if 1/(repel_force*2 +0.01) < len(positions):
-    #        if verbose: log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants")
+    #        log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants")
     #else:
     if len(positions)>30:
-        if verbose: log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants")
+        log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants",verbose=verbose)
     # calculate the steps
     if amode=="int":
         step = int(yspan*repel_force)
     elif amode=="log":
-        min_factor = np.min(positions)
+        if min_factor is None:
+            min_factor = np.min(positions)
         #(1, max) -> (0, log(max)))
-        positions = np.log(positions/min_factor)
+        positions = np.log2(positions/min_factor)
         step = max(positions)*repel_force
     else:
         step = yspan*repel_force
@@ -33,7 +35,8 @@ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode=
             if amode=="int":
                 return  np.floor(pd.to_numeric(positions, errors='coerce')).astype('Int64').copy()
             elif amode=="log":
-                return  np.exp(pd.to_numeric(positions, errors='coerce')) * min_factor
+                return  np.power(2, pd.to_numeric(positions, errors='coerce'))* min_factor
             else:
                 return  pd.to_numeric(positions, errors='coerce')
         else:
@@ -44,7 +47,7 @@ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode=
                 move_position_from_center_float(positions, index, step)
     # when reaching maximum iteration, return anyway
-    if verbose: log.write(" -Reaching maximum iteration: {}; Skipping...".format(max_iter))
+    log.write(" -Reaching maximum iteration: {}; Skipping...".format(max_iter),verbose=verbose)
     if amode=="int":
         return np.floor(pd.to_numeric(positions, errors='coerce')).astype('Int64').copy()
     elif amode=="log":

gwaslab/viz_aux_save_figure.py CHANGED Viewed

@@ -4,23 +4,33 @@ import time
 import os.path
 def save_figure(fig, save, keyword, save_args=None, log = Log(), verbose=True):
+    log.write("Start to save figure..." ,verbose=verbose)
     if save_args is None:
         save_args = {}
     if save:
-        if verbose: log.write("Saving plot:")
         if save==True:
             default_path = get_default_path(keyword)
             fig.savefig(default_path, bbox_inches="tight",**save_args)
-            log.write(" -Saved to "+ default_path + " successfully!" )
+            log.write(" -Saved to "+ default_path + " successfully!" ,verbose=verbose)
         else:
-            if os.path.exists(save):
-                fig.savefig(save,bbox_inches="tight",**save_args)
-                log.write(" -Saved to "+ save + " successfully! (overwrite)" )
+            if save[-3:]=="pdf":
+                if os.path.exists(save):
+                    fig.savefig(save, **save_args)
+                    log.write(" -Saved to "+ save + " successfully! (pdf, overwrite)" ,verbose=verbose)
+                else:
+                    fig.savefig(save, **save_args)
+                    log.write(" -Saved to "+ save + " successfully! (pdf)" ,verbose=verbose)
             else:
-                fig.savefig(save,bbox_inches="tight",**save_args)
-                log.write(" -Saved to "+ save + " successfully!" )
+                if os.path.exists(save):
+                    fig.savefig(save,bbox_inches="tight",**save_args)
+                    log.write(" -Saved to "+ save + " successfully! (overwrite)" ,verbose=verbose)
+                else:
+                    fig.savefig(save,bbox_inches="tight",**save_args)
+                    log.write(" -Saved to "+ save + " successfully!" ,verbose=verbose)
     else:
-        log.write(" -Skip saving figures!" )
+        log.write(" -Skip saving figure!" ,verbose=verbose)
+    log.write("Finished saving figure..." ,verbose=verbose)
 def get_default_path(keyword,fmt="png"):
     path_dictionary = {

gwaslab/viz_plot_compare_af.py CHANGED Viewed

@@ -10,6 +10,7 @@ from gwaslab.viz_aux_save_figure import save_figure
 def plotdaf(sumstats,
              eaf="EAF",
              daf="DAF",
+             raf="RAF",
              threshold=0.16,
              xlabel="Alternative Allele Frequency in Reference Population (RAF)",
              ylabel="Effect Allele Frequency in Sumstats (EAF)",
@@ -43,7 +44,7 @@ def plotdaf(sumstats,
     if plt_args is None:
         plt_args={"figsize":(8,4),"dpi":300}
     if histplot_args is None:
-        histplot_args={"log_scale":(False,True)}
+        histplot_args={"log_scale":(False,False)}
     if reg_line_args is None:
         reg_line_args={"color":'#cccccc', "linestyle":'--'}
     if threshold_line_args is None:
@@ -55,8 +56,9 @@ def plotdaf(sumstats,
     if save_args is None:
         save_args =  {}
-    if verbose: log.write("Start to plot Reference frequency vs Effect allele frequency plot...")
-    if not ((eaf in sumstats.columns) and (daf in sumstats.columns)):
+    log.write("Start to plot allele frequency comparison plot...", verbose=verbose)
+    if not ((eaf in sumstats.columns) and ((daf in sumstats.columns)) or (raf in sumstats.columns)):
         raise ValueError("EAF and/or DAF columns were not detected.")
     if "SNPID" in sumstats.columns:
@@ -70,40 +72,42 @@ def plotdaf(sumstats,
     if "NEA" in sumstats.columns:
         alleles.append("NEA")
+    if daf not in sumstats.columns:
+        sumstats[daf] = sumstats[eaf] - sumstats[raf]
     sumstats = sumstats.loc[(~sumstats[eaf].isna())&(~sumstats[daf].isna()),[snpid,eaf,daf]+alleles].copy()
-    sumstats.loc[:,daf] = sumstats.loc[:,daf].astype("float")
-    sumstats.loc[:,eaf] = sumstats.loc[:,eaf].astype("float")
-    if verbose: log.write(" -Plotting valriants:" + str(len(sumstats)))
-    sumstats.loc[:,"RAF"]=sumstats[eaf] - sumstats[daf]
+    sumstats[daf] = sumstats[daf].astype("float")
+    sumstats[eaf] = sumstats[eaf].astype("float")
+    log.write(" -Plotting valriants:" + str(len(sumstats)), verbose=verbose)
+    if raf not in sumstats.columns:
+        sumstats[raf] = sumstats[eaf] - sumstats[daf]
     sns.set_style("ticks")
-    fig, (ax1, ax2) = plt.subplots(1, 2,**plt_args)
-    ax1.scatter(sumstats["RAF"],sumstats[eaf],label="Non-outlier", **scatter_args)
+    fig, [ax1, ax2] = plt.subplots(1, 2,**plt_args)
+    ax1.scatter(sumstats[raf],sumstats[eaf],label="Non-outlier", **scatter_args)
     if is_threshold is True:
         is_outliers = sumstats[daf].abs() > threshold
         if sum(is_outliers)>0:
-            ax1.scatter(sumstats.loc[is_outliers, "RAF"],sumstats.loc[is_outliers, eaf],label="Outlier", **scatter_args_outlier)
+            ax1.scatter(sumstats.loc[is_outliers, raf],sumstats.loc[is_outliers, eaf],label="Outlier", **scatter_args_outlier)
     if legend1 ==True:
         ax1.legend()
     if is_reg is True:
-        if verbose: log.write(" -Plotting regression line...")
-        reg = ss.linregress(sumstats["RAF"],sumstats[eaf])
-        if verbose:log.write(" -Beta = ", reg[0])
-        if verbose:log.write(" -Intercept = ", reg[1])
-        if verbose:log.write(" -R2 = ", reg[2])
+        log.write(" -Plotting regression line...", verbose=verbose)
+        reg = ss.linregress(sumstats[raf],sumstats[eaf])
+        log.write(" -Beta = ", reg[0], verbose=verbose)
+        log.write(" -Intercept = ", reg[1], verbose=verbose)
+        log.write(" -R2 = ", reg[2], verbose=verbose)
         ax1.axline(xy1=(0,reg[1]),slope=reg[0],zorder=1,**reg_line_args)
         if r2 is True:
             ax1.text(0.98,0.02, "$R^2 = {:.3f}$".format(reg[2]), transform=ax1.transAxes, **r2_args)
     if is_threshold is True:
-        if verbose: log.write(" -Threshold : " + str(threshold))
+        log.write(" -Threshold : " + str(threshold), verbose=verbose)
         num = sum(np.abs(sumstats[daf])>threshold )
-        if verbose: log.write(" -Variants with relatively large DAF : ",num )
-        if verbose: log.write(" -Percentage for variants with relatively large DAF : ",num/len(sumstats) )
+        log.write(" -Variants with relatively large DAF : ",num , verbose=verbose)
+        log.write(" -Percentage for variants with relatively large DAF : ",num/len(sumstats) , verbose=verbose)
         ax1.axline(xy1=(0,threshold),slope=1,zorder=1,**threshold_line_args)
         ax1.axline(xy1=(threshold,0),slope=1,zorder=1,**threshold_line_args)
@@ -119,25 +123,20 @@ def plotdaf(sumstats,
     ax1.set_ylim([0,1])
-    sumstats.loc[:,"ID"] = sumstats.index
+    sumstats["ID"] = sumstats.index
-    to_plot = pd.melt(sumstats,id_vars=['ID'], value_vars=['EAF',"RAF"], var_name='Types', value_name='Allele Frequency')
-    sns.histplot(data=to_plot, x="Allele Frequency", hue="Types", fill=True, ax=ax2, legend = legend2 ,**histplot_args)
-    ax2.set_xlabel("Allele Frequency",**font_args)
+    to_plot = pd.melt(sumstats,id_vars=['ID'], value_vars=[eaf,raf], var_name='Types', value_name='Allele Frequency').dropna()
+    sns.histplot(data=to_plot, x="Allele Frequency",
+                 hue="Types", fill=True,
+                 ax=ax2, legend = legend2,
+                 **histplot_args)
+    ax2.set_xlabel("Allele Frequency",**font_args)
     plt.tight_layout()
     save_figure(fig, save, keyword="afc",save_args=save_args, log=log, verbose=verbose)
-    #if save:
-    #    if verbose: log.write("Saving plot:")
-    #    if save==True:
-    #        fig.savefig("./allele_frequency_comparison.png",bbox_inches="tight",**save_args)
-    #        log.write(" -Saved to "+ "./allele_frequency_comparison.png" + " successfully!" )
-    #    else:
-    #        fig.savefig(save,bbox_inches="tight",**save_args)
-    #        log.write(" -Saved to "+ save + " successfully!" )
     sumstats = sumstats.drop(columns="ID")
     return fig, sumstats[is_outliers].copy()

gwaslab 3.4.37__py3-none-any.whl → 3.4.39__py3-none-any.whl

Potentially problematic release.

gwaslab 3.4.37py3-none-any.whl → 3.4.39py3-none-any.whl