PyPI - gwaslab - Versions diffs - 3.4.38__py3-none-any.whl → 3.4.39__py3-none-any.whl - Mend

gwaslab 3.4.38py3-none-any.whl → 3.4.39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gwaslab might be problematic. Click here for more details.

Files changed (51) hide show

gwaslab/bd_common_data.py +6 -3
gwaslab/bd_download.py +9 -9
gwaslab/bd_get_hapmap3.py +43 -9
gwaslab/g_Log.py +14 -5
gwaslab/g_Sumstats.py +86 -18
gwaslab/g_SumstatsPair.py +70 -23
gwaslab/g_SumstatsT.py +2 -2
gwaslab/g_version.py +10 -10
gwaslab/hm_casting.py +9 -4
gwaslab/hm_harmonize_sumstats.py +88 -83
gwaslab/io_preformat_input.py +14 -14
gwaslab/io_read_ldsc.py +49 -1
gwaslab/ldsc_irwls.py +198 -0
gwaslab/ldsc_jackknife.py +514 -0
gwaslab/ldsc_ldscore.py +417 -0
gwaslab/ldsc_parse.py +294 -0
gwaslab/ldsc_regressions.py +747 -0
gwaslab/ldsc_sumstats.py +629 -0
gwaslab/qc_check_datatype.py +1 -1
gwaslab/qc_fix_sumstats.py +163 -161
gwaslab/util_ex_calculate_ldmatrix.py +2 -2
gwaslab/util_ex_gwascatalog.py +24 -24
gwaslab/util_ex_ldproxyfinder.py +9 -9
gwaslab/util_ex_ldsc.py +189 -0
gwaslab/util_in_calculate_gc.py +6 -6
gwaslab/util_in_calculate_power.py +42 -43
gwaslab/util_in_convert_h2.py +8 -8
gwaslab/util_in_fill_data.py +28 -28
gwaslab/util_in_filter_value.py +91 -52
gwaslab/util_in_get_density.py +8 -8
gwaslab/util_in_get_sig.py +407 -65
gwaslab/viz_aux_annotate_plot.py +12 -12
gwaslab/viz_aux_quickfix.py +18 -18
gwaslab/viz_aux_reposition_text.py +3 -3
gwaslab/viz_aux_save_figure.py +14 -5
gwaslab/viz_plot_compare_af.py +29 -30
gwaslab/viz_plot_compare_effect.py +63 -71
gwaslab/viz_plot_miamiplot2.py +6 -6
gwaslab/viz_plot_mqqplot.py +17 -3
gwaslab/viz_plot_qqplot.py +1 -1
gwaslab/viz_plot_regionalplot.py +33 -32
gwaslab/viz_plot_rg_heatmap.py +28 -26
gwaslab/viz_plot_stackedregional.py +40 -21
gwaslab/viz_plot_trumpetplot.py +50 -55
gwaslab-3.4.39.dist-info/LICENSE +674 -0
{gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/METADATA +4 -3
gwaslab-3.4.39.dist-info/RECORD +80 -0
gwaslab-3.4.38.dist-info/RECORD +0 -72
/gwaslab-3.4.38.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
{gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
{gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0

gwaslab/viz_aux_annotate_plot.py CHANGED Viewed

@@ -58,7 +58,7 @@ def annotate_single(
                 annotation_col="CHR:POS"
         elif anno:
                 annotation_col=anno
-        if verbose: log.write(" -Annotating using column "+annotation_col+"...")
+        log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
         ## calculate y span
         if region is not None:
@@ -66,7 +66,7 @@ def annotate_single(
         else:
             y_span = sumstats["i"].max()-sumstats["i"].min()
-        if verbose: log.write(" -Adjusting text positions with repel_force={}...".format(repel_force))
+        log.write(" -Adjusting text positions with repel_force={}...".format(repel_force), verbose=verbose)
         if anno_style == "expand" :
             to_annotate.loc[:, "ADJUSTED_i"] = adjust_text_position(to_annotate["i"].values.copy(), y_span, repel_force,max_iter=anno_max_iter,log=log,amode=amode,verbose=verbose)
         ##  iterate through variants to be annotated
@@ -214,7 +214,7 @@ def annotate_single(
             anno_count +=1
         #anno_adjust_keyargs = {"arrowprops":dict(arrowstyle='->', color='grey', linewidth=0.1,relpos=(0.5,0.5))}
         if anno_adjust==True:
-            if verbose: log.write(" -Auto-adjusting text positions...")
+            log.write(" -Auto-adjusting text positions...", verbose=verbose)
             adjust_text(texts = anno_to_adjust_list,
                         autoalign=False,
                         only_move={'points':'x', 'text':'x', 'objects':'x'},
@@ -232,7 +232,7 @@ def annotate_single(
                         )
     else:
-        if verbose: log.write(" -Skip annotating")
+        log.write(" -Skip annotating", verbose=verbose)
     return ax1
@@ -275,7 +275,7 @@ def annotate_pair(
         for index,ax,to_annotate_df,anno_d, anno_alias in [(0,ax1,to_annotate1,anno_d1,anno_alias1),(1,ax5,to_annotate5,anno_d2,anno_alias2)]:
             ###################### annotate() args
             if to_annotate_df.empty is True:
-                if verbose: log.write(" -Skipping annotation...")
+                log.write(" -Skipping annotation...", verbose=verbose)
                 continue
             fontweight = "normal"
@@ -313,7 +313,7 @@ def annotate_pair(
                             annotation_col=anno
                         else:
                             annotation_col=anno+"_"+str(index+1)
-                if verbose: log.write(" -Annotating using column "+annotation_col+"...")
+                log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
                 ## calculate y span
                 if region is not None:
@@ -453,7 +453,7 @@ def annotate_pair(
                     anno_count +=1
             if anno_adjust==True:
-                if verbose: log.write(" -Auto-adjusting text positions for plot {}...".format(index))
+                log.write(" -Auto-adjusting text positions for plot {}...".format(index), verbose=verbose)
                 if index==0:
                     va="bottom"
                     ha='left'
@@ -476,7 +476,7 @@ def annotate_pair(
                             lim =anno_max_iter
                             )
     else:
-        if verbose: log.write(" -Skip annotating")
+        log.write(" -Skip annotating", verbose=verbose)
     return ax1,ax5
@@ -521,7 +521,7 @@ def annotate_subtype(
                 annotation_col="CHR:POS"
         elif anno:
                 annotation_col=anno
-        if verbose: log.write(" -Annotating using column "+annotation_col+"...")
+        log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
         ## calculate y span
         if region is not None:
@@ -529,7 +529,7 @@ def annotate_subtype(
         else:
             y_span = sumstats["i"].max()-sumstats["i"].min()
-        if verbose: log.write(" -Adjusting text positions with repel_force={}...".format(repel_force))
+        log.write(" -Adjusting text positions with repel_force={}...".format(repel_force), verbose=verbose)
         if anno_style == "expand" :
             to_annotate.loc[:, "ADJUSTED_i"] = adjust_text_position(to_annotate["i"].values.copy(), y_span, repel_force,max_iter=anno_max_iter,log=log,verbose=verbose)
         ##  iterate through variants to be annotated
@@ -656,7 +656,7 @@ def annotate_subtype(
             anno_count +=1
         #anno_adjust_keyargs = {"arrowprops":dict(arrowstyle='->', color='grey', linewidth=0.1,relpos=(0.5,0.5))}
         if anno_adjust==True:
-            if verbose: log.write(" -Auto-adjusting text positions...")
+            log.write(" -Auto-adjusting text positions...", verbose=verbose)
             adjust_text(texts = anno_to_adjust_list,
                         autoalign=False,
                         only_move={'points':'x', 'text':'x', 'objects':'x'},
@@ -674,7 +674,7 @@ def annotate_subtype(
                         )
     else:
-        if verbose: log.write(" -Skip annotating")
+        log.write(" -Skip annotating", verbose=verbose)
     return ax1

gwaslab/viz_aux_quickfix.py CHANGED Viewed

@@ -31,16 +31,16 @@ def _quick_fix_p_value(sumstats, p="P", mlog10p="MLOG10P", scaled=False,verbose=
     '''
     if scaled==True:
         # if scaled, add scaled P and P col
-        if verbose:log.write(" -P values are already scaled...")
-        if verbose:log.write(" -Sumstats -log10(P) values are being converted to P...")
+        log.write(" -P values are already scaled...", verbose=verbose)
+        log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
         sumstats["scaled_P"] = sumstats[mlog10p].copy()
         sumstats[p]= np.power(10,-sumstats[mlog10p].astype("float64"))
         return sumstats
     # bad p : na and outside (0,1]
     bad_p_value = (sumstats[p].isna()) | (sumstats[p] > 1) | (sumstats[p] <= 0)
-    if verbose:
-        log.write(" -Sanity check after conversion: " + str(sum(bad_p_value)) +
-                  " variants with P value outside of (0,1] will be removed...")
+    log.write(" -Sanity check after conversion: " + str(sum(bad_p_value)) +
+                  " variants with P value outside of (0,1] will be removed...", verbose=verbose)
     sumstats = sumstats.loc[~bad_p_value, :]
     return sumstats
@@ -51,15 +51,15 @@ def _quick_fix_mlog10p(insumstats,p="P", mlog10p="MLOG10P", scaled=False, log=Lo
     '''
     sumstats = insumstats.copy()
     if scaled != True:
-        if verbose:log.write(" -Sumstats P values are being converted to -log10(P)...")
+        log.write(" -Sumstats P values are being converted to -log10(P)...", verbose=verbose)
         sumstats["scaled_P"] = -np.log10(sumstats[p].astype("float64"))
     #with pd.option_context('mode.use_inf_as_na', True):
     #    is_na = sumstats["scaled_P"].isna()
     if_inf_na = np.isinf(sumstats["scaled_P"]) | sumstats["scaled_P"].isna()
-    if verbose:
-        log.write(" -Sanity check: "+str(sum(if_inf_na)) +
-                  " na/inf/-inf variants will be removed...")
+    log.write(" -Sanity check: "+str(sum(if_inf_na)) +
+                  " na/inf/-inf variants will be removed...", verbose=verbose)
     sumstats = sumstats.loc[~if_inf_na, :]
     return sumstats
@@ -105,7 +105,7 @@ def _get_largenumber(*args,log=Log(), verbose=True):
         else:
             break
         if i == 7:
-            log.write(" -Warning: max POS is too large!")
+            log.warning("Max POS is too large!")
     return large_number
@@ -276,9 +276,9 @@ def _quick_extract_snp_in_region(sumstats, region, chrom="CHR",pos="POS",log=Log
     region_chr = region[0]
     region_start = region[1]
     region_end = region[2]
-    if verbose:log.write(" -Extract SNPs in region : chr"+str(region_chr)+":"+str(region[1])+"-"+str(region[2])+ "...")
+    log.write(" -Extract SNPs in region : chr"+str(region_chr)+":"+str(region[1])+"-"+str(region[2])+ "...", verbose=verbose)
     is_in_region_snp = (sumstats[chrom]==region_chr) &(sumstats[pos]<region_end) &(sumstats[pos]>region_start)
-    if verbose:log.write(" -Extract SNPs in specified regions: "+str(sum(is_in_region_snp)))
+    log.write(" -Extract SNPs in specified regions: "+str(sum(is_in_region_snp)), verbose=verbose)
     sumstats = sumstats.loc[is_in_region_snp,:]
     return sumstats
@@ -289,18 +289,18 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
     maxy = series.max()
     series = series.copy()
     if "b" not in mode:
-        if verbose: log.write(" -Maximum -log10(P) value is "+str(maxy) +" .")
+        log.write(" -Maximum -log10(P) value is "+str(maxy) +" .", verbose=verbose)
     elif "b" in mode:
-        if verbose: log.write(" -Maximum DENSITY value is "+str(maxy) +" .")
+        log.write(" -Maximum DENSITY value is "+str(maxy) +" .", verbose=verbose)
     maxticker=int(np.round(series.max(skipna=True)))
     if cut:
         # auto mode : determine curline and cut factor
         if cut==True:
-            if verbose: log.write(" -Cut Auto mode is activated...")
+            log.write(" -Cut Auto mode is activated...", verbose=verbose)
             if maxy<30:
-                if verbose: log.write(" - maxy <30 , no need to cut.")
+                log.write(" - maxy <30 , no need to cut.", verbose=verbose)
                 cut=0
             else:
                 cut = 20
@@ -327,9 +327,9 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
             else:
                 # cut linear mode
                 if "b" not in mode:
-                    if verbose: log.write(" -Minus log10(P) values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...")
+                    log.write(" -Minus log10(P) values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...", verbose=verbose)
                 else:
-                    if verbose: log.write(" -Minus DENSITY values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...")
+                    log.write(" -Minus DENSITY values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...", verbose=verbose)
                 maxticker=int(np.round(series.max(skipna=True)))

gwaslab/viz_aux_reposition_text.py CHANGED Viewed

@@ -6,10 +6,10 @@ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode=
     # check the number of variants to annotate
     #if repel_force>0:
     #    if 1/(repel_force*2 +0.01) < len(positions):
-    #        if verbose: log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants")
+    #        log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants")
     #else:
     if len(positions)>30:
-        if verbose: log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants")
+        log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants",verbose=verbose)
     # calculate the steps
     if amode=="int":
@@ -47,7 +47,7 @@ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode=
                 move_position_from_center_float(positions, index, step)
     # when reaching maximum iteration, return anyway
-    if verbose: log.write(" -Reaching maximum iteration: {}; Skipping...".format(max_iter))
+    log.write(" -Reaching maximum iteration: {}; Skipping...".format(max_iter),verbose=verbose)
     if amode=="int":
         return np.floor(pd.to_numeric(positions, errors='coerce')).astype('Int64').copy()
     elif amode=="log":

gwaslab/viz_aux_save_figure.py CHANGED Viewed

@@ -7,18 +7,27 @@ def save_figure(fig, save, keyword, save_args=None, log = Log(), verbose=True):
     log.write("Start to save figure..." ,verbose=verbose)
     if save_args is None:
         save_args = {}
     if save:
         if save==True:
             default_path = get_default_path(keyword)
             fig.savefig(default_path, bbox_inches="tight",**save_args)
             log.write(" -Saved to "+ default_path + " successfully!" ,verbose=verbose)
         else:
-            if os.path.exists(save):
-                fig.savefig(save,bbox_inches="tight",**save_args)
-                log.write(" -Saved to "+ save + " successfully! (overwrite)" ,verbose=verbose)
+            if save[-3:]=="pdf":
+                if os.path.exists(save):
+                    fig.savefig(save, **save_args)
+                    log.write(" -Saved to "+ save + " successfully! (pdf, overwrite)" ,verbose=verbose)
+                else:
+                    fig.savefig(save, **save_args)
+                    log.write(" -Saved to "+ save + " successfully! (pdf)" ,verbose=verbose)
             else:
-                fig.savefig(save,bbox_inches="tight",**save_args)
-                log.write(" -Saved to "+ save + " successfully!" ,verbose=verbose)
+                if os.path.exists(save):
+                    fig.savefig(save,bbox_inches="tight",**save_args)
+                    log.write(" -Saved to "+ save + " successfully! (overwrite)" ,verbose=verbose)
+                else:
+                    fig.savefig(save,bbox_inches="tight",**save_args)
+                    log.write(" -Saved to "+ save + " successfully!" ,verbose=verbose)
     else:
         log.write(" -Skip saving figure!" ,verbose=verbose)
     log.write("Finished saving figure..." ,verbose=verbose)

gwaslab/viz_plot_compare_af.py CHANGED Viewed

@@ -10,6 +10,7 @@ from gwaslab.viz_aux_save_figure import save_figure
 def plotdaf(sumstats,
              eaf="EAF",
              daf="DAF",
+             raf="RAF",
              threshold=0.16,
              xlabel="Alternative Allele Frequency in Reference Population (RAF)",
              ylabel="Effect Allele Frequency in Sumstats (EAF)",
@@ -43,7 +44,7 @@ def plotdaf(sumstats,
     if plt_args is None:
         plt_args={"figsize":(8,4),"dpi":300}
     if histplot_args is None:
-        histplot_args={"log_scale":(False,True)}
+        histplot_args={"log_scale":(False,False)}
     if reg_line_args is None:
         reg_line_args={"color":'#cccccc', "linestyle":'--'}
     if threshold_line_args is None:
@@ -55,8 +56,9 @@ def plotdaf(sumstats,
     if save_args is None:
         save_args =  {}
-    if verbose: log.write("Start to plot Reference frequency vs Effect allele frequency plot...")
-    if not ((eaf in sumstats.columns) and (daf in sumstats.columns)):
+    log.write("Start to plot allele frequency comparison plot...", verbose=verbose)
+    if not ((eaf in sumstats.columns) and ((daf in sumstats.columns)) or (raf in sumstats.columns)):
         raise ValueError("EAF and/or DAF columns were not detected.")
     if "SNPID" in sumstats.columns:
@@ -70,40 +72,42 @@ def plotdaf(sumstats,
     if "NEA" in sumstats.columns:
         alleles.append("NEA")
+    if daf not in sumstats.columns:
+        sumstats[daf] = sumstats[eaf] - sumstats[raf]
     sumstats = sumstats.loc[(~sumstats[eaf].isna())&(~sumstats[daf].isna()),[snpid,eaf,daf]+alleles].copy()
     sumstats[daf] = sumstats[daf].astype("float")
     sumstats[eaf] = sumstats[eaf].astype("float")
-    if verbose: log.write(" -Plotting valriants:" + str(len(sumstats)))
-    sumstats["RAF"]=sumstats[eaf] - sumstats[daf]
+    log.write(" -Plotting valriants:" + str(len(sumstats)), verbose=verbose)
+    if raf not in sumstats.columns:
+        sumstats[raf] = sumstats[eaf] - sumstats[daf]
     sns.set_style("ticks")
-    fig, (ax1, ax2) = plt.subplots(1, 2,**plt_args)
-    ax1.scatter(sumstats["RAF"],sumstats[eaf],label="Non-outlier", **scatter_args)
+    fig, [ax1, ax2] = plt.subplots(1, 2,**plt_args)
+    ax1.scatter(sumstats[raf],sumstats[eaf],label="Non-outlier", **scatter_args)
     if is_threshold is True:
         is_outliers = sumstats[daf].abs() > threshold
         if sum(is_outliers)>0:
-            ax1.scatter(sumstats.loc[is_outliers, "RAF"],sumstats.loc[is_outliers, eaf],label="Outlier", **scatter_args_outlier)
+            ax1.scatter(sumstats.loc[is_outliers, raf],sumstats.loc[is_outliers, eaf],label="Outlier", **scatter_args_outlier)
     if legend1 ==True:
         ax1.legend()
     if is_reg is True:
-        if verbose: log.write(" -Plotting regression line...")
-        reg = ss.linregress(sumstats["RAF"],sumstats[eaf])
-        if verbose:log.write(" -Beta = ", reg[0])
-        if verbose:log.write(" -Intercept = ", reg[1])
-        if verbose:log.write(" -R2 = ", reg[2])
+        log.write(" -Plotting regression line...", verbose=verbose)
+        reg = ss.linregress(sumstats[raf],sumstats[eaf])
+        log.write(" -Beta = ", reg[0], verbose=verbose)
+        log.write(" -Intercept = ", reg[1], verbose=verbose)
+        log.write(" -R2 = ", reg[2], verbose=verbose)
         ax1.axline(xy1=(0,reg[1]),slope=reg[0],zorder=1,**reg_line_args)
         if r2 is True:
             ax1.text(0.98,0.02, "$R^2 = {:.3f}$".format(reg[2]), transform=ax1.transAxes, **r2_args)
     if is_threshold is True:
-        if verbose: log.write(" -Threshold : " + str(threshold))
+        log.write(" -Threshold : " + str(threshold), verbose=verbose)
         num = sum(np.abs(sumstats[daf])>threshold )
-        if verbose: log.write(" -Variants with relatively large DAF : ",num )
-        if verbose: log.write(" -Percentage for variants with relatively large DAF : ",num/len(sumstats) )
+        log.write(" -Variants with relatively large DAF : ",num , verbose=verbose)
+        log.write(" -Percentage for variants with relatively large DAF : ",num/len(sumstats) , verbose=verbose)
         ax1.axline(xy1=(0,threshold),slope=1,zorder=1,**threshold_line_args)
         ax1.axline(xy1=(threshold,0),slope=1,zorder=1,**threshold_line_args)
@@ -121,23 +125,18 @@ def plotdaf(sumstats,
     sumstats["ID"] = sumstats.index
-    to_plot = pd.melt(sumstats,id_vars=['ID'], value_vars=['EAF',"RAF"], var_name='Types', value_name='Allele Frequency').dropna()
-    sns.histplot(data=to_plot, x="Allele Frequency", hue="Types", fill=True, ax=ax2, legend = legend2 ,**histplot_args)
-    ax2.set_xlabel("Allele Frequency",**font_args)
+    to_plot = pd.melt(sumstats,id_vars=['ID'], value_vars=[eaf,raf], var_name='Types', value_name='Allele Frequency').dropna()
+    sns.histplot(data=to_plot, x="Allele Frequency",
+                 hue="Types", fill=True,
+                 ax=ax2, legend = legend2,
+                 **histplot_args)
+    ax2.set_xlabel("Allele Frequency",**font_args)
     plt.tight_layout()
     save_figure(fig, save, keyword="afc",save_args=save_args, log=log, verbose=verbose)
-    #if save:
-    #    if verbose: log.write("Saving plot:")
-    #    if save==True:
-    #        fig.savefig("./allele_frequency_comparison.png",bbox_inches="tight",**save_args)
-    #        log.write(" -Saved to "+ "./allele_frequency_comparison.png" + " successfully!" )
-    #    else:
-    #        fig.savefig(save,bbox_inches="tight",**save_args)
-    #        log.write(" -Saved to "+ save + " successfully!" )
     sumstats = sumstats.drop(columns="ID")
     return fig, sumstats[is_outliers].copy()

gwaslab 3.4.38__py3-none-any.whl → 3.4.39__py3-none-any.whl

Potentially problematic release.

gwaslab 3.4.38py3-none-any.whl → 3.4.39py3-none-any.whl