gwaslab 3.4.38__py3-none-any.whl → 3.4.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/bd_common_data.py +6 -3
- gwaslab/bd_download.py +9 -9
- gwaslab/bd_get_hapmap3.py +43 -9
- gwaslab/g_Log.py +14 -5
- gwaslab/g_Sumstats.py +86 -18
- gwaslab/g_SumstatsPair.py +70 -23
- gwaslab/g_SumstatsT.py +2 -2
- gwaslab/g_version.py +10 -10
- gwaslab/hm_casting.py +9 -4
- gwaslab/hm_harmonize_sumstats.py +88 -83
- gwaslab/io_preformat_input.py +14 -14
- gwaslab/io_read_ldsc.py +49 -1
- gwaslab/ldsc_irwls.py +198 -0
- gwaslab/ldsc_jackknife.py +514 -0
- gwaslab/ldsc_ldscore.py +417 -0
- gwaslab/ldsc_parse.py +294 -0
- gwaslab/ldsc_regressions.py +747 -0
- gwaslab/ldsc_sumstats.py +629 -0
- gwaslab/qc_check_datatype.py +1 -1
- gwaslab/qc_fix_sumstats.py +163 -161
- gwaslab/util_ex_calculate_ldmatrix.py +2 -2
- gwaslab/util_ex_gwascatalog.py +24 -24
- gwaslab/util_ex_ldproxyfinder.py +9 -9
- gwaslab/util_ex_ldsc.py +189 -0
- gwaslab/util_in_calculate_gc.py +6 -6
- gwaslab/util_in_calculate_power.py +42 -43
- gwaslab/util_in_convert_h2.py +8 -8
- gwaslab/util_in_fill_data.py +28 -28
- gwaslab/util_in_filter_value.py +91 -52
- gwaslab/util_in_get_density.py +8 -8
- gwaslab/util_in_get_sig.py +407 -65
- gwaslab/viz_aux_annotate_plot.py +12 -12
- gwaslab/viz_aux_quickfix.py +18 -18
- gwaslab/viz_aux_reposition_text.py +3 -3
- gwaslab/viz_aux_save_figure.py +14 -5
- gwaslab/viz_plot_compare_af.py +29 -30
- gwaslab/viz_plot_compare_effect.py +63 -71
- gwaslab/viz_plot_miamiplot2.py +6 -6
- gwaslab/viz_plot_mqqplot.py +17 -3
- gwaslab/viz_plot_qqplot.py +1 -1
- gwaslab/viz_plot_regionalplot.py +33 -32
- gwaslab/viz_plot_rg_heatmap.py +28 -26
- gwaslab/viz_plot_stackedregional.py +40 -21
- gwaslab/viz_plot_trumpetplot.py +50 -55
- gwaslab-3.4.39.dist-info/LICENSE +674 -0
- {gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/METADATA +4 -3
- gwaslab-3.4.39.dist-info/RECORD +80 -0
- gwaslab-3.4.38.dist-info/RECORD +0 -72
- /gwaslab-3.4.38.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
- {gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0
gwaslab/viz_aux_annotate_plot.py
CHANGED
|
@@ -58,7 +58,7 @@ def annotate_single(
|
|
|
58
58
|
annotation_col="CHR:POS"
|
|
59
59
|
elif anno:
|
|
60
60
|
annotation_col=anno
|
|
61
|
-
|
|
61
|
+
log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
|
|
62
62
|
|
|
63
63
|
## calculate y span
|
|
64
64
|
if region is not None:
|
|
@@ -66,7 +66,7 @@ def annotate_single(
|
|
|
66
66
|
else:
|
|
67
67
|
y_span = sumstats["i"].max()-sumstats["i"].min()
|
|
68
68
|
|
|
69
|
-
|
|
69
|
+
log.write(" -Adjusting text positions with repel_force={}...".format(repel_force), verbose=verbose)
|
|
70
70
|
if anno_style == "expand" :
|
|
71
71
|
to_annotate.loc[:, "ADJUSTED_i"] = adjust_text_position(to_annotate["i"].values.copy(), y_span, repel_force,max_iter=anno_max_iter,log=log,amode=amode,verbose=verbose)
|
|
72
72
|
## iterate through variants to be annotated
|
|
@@ -214,7 +214,7 @@ def annotate_single(
|
|
|
214
214
|
anno_count +=1
|
|
215
215
|
#anno_adjust_keyargs = {"arrowprops":dict(arrowstyle='->', color='grey', linewidth=0.1,relpos=(0.5,0.5))}
|
|
216
216
|
if anno_adjust==True:
|
|
217
|
-
|
|
217
|
+
log.write(" -Auto-adjusting text positions...", verbose=verbose)
|
|
218
218
|
adjust_text(texts = anno_to_adjust_list,
|
|
219
219
|
autoalign=False,
|
|
220
220
|
only_move={'points':'x', 'text':'x', 'objects':'x'},
|
|
@@ -232,7 +232,7 @@ def annotate_single(
|
|
|
232
232
|
)
|
|
233
233
|
|
|
234
234
|
else:
|
|
235
|
-
|
|
235
|
+
log.write(" -Skip annotating", verbose=verbose)
|
|
236
236
|
|
|
237
237
|
return ax1
|
|
238
238
|
|
|
@@ -275,7 +275,7 @@ def annotate_pair(
|
|
|
275
275
|
for index,ax,to_annotate_df,anno_d, anno_alias in [(0,ax1,to_annotate1,anno_d1,anno_alias1),(1,ax5,to_annotate5,anno_d2,anno_alias2)]:
|
|
276
276
|
###################### annotate() args
|
|
277
277
|
if to_annotate_df.empty is True:
|
|
278
|
-
|
|
278
|
+
log.write(" -Skipping annotation...", verbose=verbose)
|
|
279
279
|
continue
|
|
280
280
|
|
|
281
281
|
fontweight = "normal"
|
|
@@ -313,7 +313,7 @@ def annotate_pair(
|
|
|
313
313
|
annotation_col=anno
|
|
314
314
|
else:
|
|
315
315
|
annotation_col=anno+"_"+str(index+1)
|
|
316
|
-
|
|
316
|
+
log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
|
|
317
317
|
|
|
318
318
|
## calculate y span
|
|
319
319
|
if region is not None:
|
|
@@ -453,7 +453,7 @@ def annotate_pair(
|
|
|
453
453
|
anno_count +=1
|
|
454
454
|
|
|
455
455
|
if anno_adjust==True:
|
|
456
|
-
|
|
456
|
+
log.write(" -Auto-adjusting text positions for plot {}...".format(index), verbose=verbose)
|
|
457
457
|
if index==0:
|
|
458
458
|
va="bottom"
|
|
459
459
|
ha='left'
|
|
@@ -476,7 +476,7 @@ def annotate_pair(
|
|
|
476
476
|
lim =anno_max_iter
|
|
477
477
|
)
|
|
478
478
|
else:
|
|
479
|
-
|
|
479
|
+
log.write(" -Skip annotating", verbose=verbose)
|
|
480
480
|
return ax1,ax5
|
|
481
481
|
|
|
482
482
|
|
|
@@ -521,7 +521,7 @@ def annotate_subtype(
|
|
|
521
521
|
annotation_col="CHR:POS"
|
|
522
522
|
elif anno:
|
|
523
523
|
annotation_col=anno
|
|
524
|
-
|
|
524
|
+
log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
|
|
525
525
|
|
|
526
526
|
## calculate y span
|
|
527
527
|
if region is not None:
|
|
@@ -529,7 +529,7 @@ def annotate_subtype(
|
|
|
529
529
|
else:
|
|
530
530
|
y_span = sumstats["i"].max()-sumstats["i"].min()
|
|
531
531
|
|
|
532
|
-
|
|
532
|
+
log.write(" -Adjusting text positions with repel_force={}...".format(repel_force), verbose=verbose)
|
|
533
533
|
if anno_style == "expand" :
|
|
534
534
|
to_annotate.loc[:, "ADJUSTED_i"] = adjust_text_position(to_annotate["i"].values.copy(), y_span, repel_force,max_iter=anno_max_iter,log=log,verbose=verbose)
|
|
535
535
|
## iterate through variants to be annotated
|
|
@@ -656,7 +656,7 @@ def annotate_subtype(
|
|
|
656
656
|
anno_count +=1
|
|
657
657
|
#anno_adjust_keyargs = {"arrowprops":dict(arrowstyle='->', color='grey', linewidth=0.1,relpos=(0.5,0.5))}
|
|
658
658
|
if anno_adjust==True:
|
|
659
|
-
|
|
659
|
+
log.write(" -Auto-adjusting text positions...", verbose=verbose)
|
|
660
660
|
adjust_text(texts = anno_to_adjust_list,
|
|
661
661
|
autoalign=False,
|
|
662
662
|
only_move={'points':'x', 'text':'x', 'objects':'x'},
|
|
@@ -674,7 +674,7 @@ def annotate_subtype(
|
|
|
674
674
|
)
|
|
675
675
|
|
|
676
676
|
else:
|
|
677
|
-
|
|
677
|
+
log.write(" -Skip annotating", verbose=verbose)
|
|
678
678
|
|
|
679
679
|
return ax1
|
|
680
680
|
|
gwaslab/viz_aux_quickfix.py
CHANGED
|
@@ -31,16 +31,16 @@ def _quick_fix_p_value(sumstats, p="P", mlog10p="MLOG10P", scaled=False,verbose=
|
|
|
31
31
|
'''
|
|
32
32
|
if scaled==True:
|
|
33
33
|
# if scaled, add scaled P and P col
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
log.write(" -P values are already scaled...", verbose=verbose)
|
|
35
|
+
log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
|
|
36
36
|
sumstats["scaled_P"] = sumstats[mlog10p].copy()
|
|
37
37
|
sumstats[p]= np.power(10,-sumstats[mlog10p].astype("float64"))
|
|
38
38
|
return sumstats
|
|
39
39
|
# bad p : na and outside (0,1]
|
|
40
40
|
bad_p_value = (sumstats[p].isna()) | (sumstats[p] > 1) | (sumstats[p] <= 0)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
" variants with P value outside of (0,1] will be removed...")
|
|
41
|
+
|
|
42
|
+
log.write(" -Sanity check after conversion: " + str(sum(bad_p_value)) +
|
|
43
|
+
" variants with P value outside of (0,1] will be removed...", verbose=verbose)
|
|
44
44
|
sumstats = sumstats.loc[~bad_p_value, :]
|
|
45
45
|
return sumstats
|
|
46
46
|
|
|
@@ -51,15 +51,15 @@ def _quick_fix_mlog10p(insumstats,p="P", mlog10p="MLOG10P", scaled=False, log=Lo
|
|
|
51
51
|
'''
|
|
52
52
|
sumstats = insumstats.copy()
|
|
53
53
|
if scaled != True:
|
|
54
|
-
|
|
54
|
+
log.write(" -Sumstats P values are being converted to -log10(P)...", verbose=verbose)
|
|
55
55
|
sumstats["scaled_P"] = -np.log10(sumstats[p].astype("float64"))
|
|
56
56
|
|
|
57
57
|
#with pd.option_context('mode.use_inf_as_na', True):
|
|
58
58
|
# is_na = sumstats["scaled_P"].isna()
|
|
59
59
|
if_inf_na = np.isinf(sumstats["scaled_P"]) | sumstats["scaled_P"].isna()
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
" na/inf/-inf variants will be removed...")
|
|
60
|
+
|
|
61
|
+
log.write(" -Sanity check: "+str(sum(if_inf_na)) +
|
|
62
|
+
" na/inf/-inf variants will be removed...", verbose=verbose)
|
|
63
63
|
sumstats = sumstats.loc[~if_inf_na, :]
|
|
64
64
|
return sumstats
|
|
65
65
|
|
|
@@ -105,7 +105,7 @@ def _get_largenumber(*args,log=Log(), verbose=True):
|
|
|
105
105
|
else:
|
|
106
106
|
break
|
|
107
107
|
if i == 7:
|
|
108
|
-
log.
|
|
108
|
+
log.warning("Max POS is too large!")
|
|
109
109
|
return large_number
|
|
110
110
|
|
|
111
111
|
|
|
@@ -276,9 +276,9 @@ def _quick_extract_snp_in_region(sumstats, region, chrom="CHR",pos="POS",log=Log
|
|
|
276
276
|
region_chr = region[0]
|
|
277
277
|
region_start = region[1]
|
|
278
278
|
region_end = region[2]
|
|
279
|
-
|
|
279
|
+
log.write(" -Extract SNPs in region : chr"+str(region_chr)+":"+str(region[1])+"-"+str(region[2])+ "...", verbose=verbose)
|
|
280
280
|
is_in_region_snp = (sumstats[chrom]==region_chr) &(sumstats[pos]<region_end) &(sumstats[pos]>region_start)
|
|
281
|
-
|
|
281
|
+
log.write(" -Extract SNPs in specified regions: "+str(sum(is_in_region_snp)), verbose=verbose)
|
|
282
282
|
sumstats = sumstats.loc[is_in_region_snp,:]
|
|
283
283
|
return sumstats
|
|
284
284
|
|
|
@@ -289,18 +289,18 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
|
|
|
289
289
|
maxy = series.max()
|
|
290
290
|
series = series.copy()
|
|
291
291
|
if "b" not in mode:
|
|
292
|
-
|
|
292
|
+
log.write(" -Maximum -log10(P) value is "+str(maxy) +" .", verbose=verbose)
|
|
293
293
|
elif "b" in mode:
|
|
294
|
-
|
|
294
|
+
log.write(" -Maximum DENSITY value is "+str(maxy) +" .", verbose=verbose)
|
|
295
295
|
|
|
296
296
|
maxticker=int(np.round(series.max(skipna=True)))
|
|
297
297
|
|
|
298
298
|
if cut:
|
|
299
299
|
# auto mode : determine curline and cut factor
|
|
300
300
|
if cut==True:
|
|
301
|
-
|
|
301
|
+
log.write(" -Cut Auto mode is activated...", verbose=verbose)
|
|
302
302
|
if maxy<30:
|
|
303
|
-
|
|
303
|
+
log.write(" - maxy <30 , no need to cut.", verbose=verbose)
|
|
304
304
|
cut=0
|
|
305
305
|
else:
|
|
306
306
|
cut = 20
|
|
@@ -327,9 +327,9 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
|
|
|
327
327
|
else:
|
|
328
328
|
# cut linear mode
|
|
329
329
|
if "b" not in mode:
|
|
330
|
-
|
|
330
|
+
log.write(" -Minus log10(P) values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...", verbose=verbose)
|
|
331
331
|
else:
|
|
332
|
-
|
|
332
|
+
log.write(" -Minus DENSITY values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...", verbose=verbose)
|
|
333
333
|
|
|
334
334
|
maxticker=int(np.round(series.max(skipna=True)))
|
|
335
335
|
|
|
@@ -6,10 +6,10 @@ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode=
|
|
|
6
6
|
# check the number of variants to annotate
|
|
7
7
|
#if repel_force>0:
|
|
8
8
|
# if 1/(repel_force*2 +0.01) < len(positions):
|
|
9
|
-
#
|
|
9
|
+
# log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants")
|
|
10
10
|
#else:
|
|
11
11
|
if len(positions)>30:
|
|
12
|
-
|
|
12
|
+
log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants",verbose=verbose)
|
|
13
13
|
|
|
14
14
|
# calculate the steps
|
|
15
15
|
if amode=="int":
|
|
@@ -47,7 +47,7 @@ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode=
|
|
|
47
47
|
move_position_from_center_float(positions, index, step)
|
|
48
48
|
|
|
49
49
|
# when reaching maximum iteration, return anyway
|
|
50
|
-
|
|
50
|
+
log.write(" -Reaching maximum iteration: {}; Skipping...".format(max_iter),verbose=verbose)
|
|
51
51
|
if amode=="int":
|
|
52
52
|
return np.floor(pd.to_numeric(positions, errors='coerce')).astype('Int64').copy()
|
|
53
53
|
elif amode=="log":
|
gwaslab/viz_aux_save_figure.py
CHANGED
|
@@ -7,18 +7,27 @@ def save_figure(fig, save, keyword, save_args=None, log = Log(), verbose=True):
|
|
|
7
7
|
log.write("Start to save figure..." ,verbose=verbose)
|
|
8
8
|
if save_args is None:
|
|
9
9
|
save_args = {}
|
|
10
|
+
|
|
10
11
|
if save:
|
|
11
12
|
if save==True:
|
|
12
13
|
default_path = get_default_path(keyword)
|
|
13
14
|
fig.savefig(default_path, bbox_inches="tight",**save_args)
|
|
14
15
|
log.write(" -Saved to "+ default_path + " successfully!" ,verbose=verbose)
|
|
15
16
|
else:
|
|
16
|
-
if
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
if save[-3:]=="pdf":
|
|
18
|
+
if os.path.exists(save):
|
|
19
|
+
fig.savefig(save, **save_args)
|
|
20
|
+
log.write(" -Saved to "+ save + " successfully! (pdf, overwrite)" ,verbose=verbose)
|
|
21
|
+
else:
|
|
22
|
+
fig.savefig(save, **save_args)
|
|
23
|
+
log.write(" -Saved to "+ save + " successfully! (pdf)" ,verbose=verbose)
|
|
19
24
|
else:
|
|
20
|
-
|
|
21
|
-
|
|
25
|
+
if os.path.exists(save):
|
|
26
|
+
fig.savefig(save,bbox_inches="tight",**save_args)
|
|
27
|
+
log.write(" -Saved to "+ save + " successfully! (overwrite)" ,verbose=verbose)
|
|
28
|
+
else:
|
|
29
|
+
fig.savefig(save,bbox_inches="tight",**save_args)
|
|
30
|
+
log.write(" -Saved to "+ save + " successfully!" ,verbose=verbose)
|
|
22
31
|
else:
|
|
23
32
|
log.write(" -Skip saving figure!" ,verbose=verbose)
|
|
24
33
|
log.write("Finished saving figure..." ,verbose=verbose)
|
gwaslab/viz_plot_compare_af.py
CHANGED
|
@@ -10,6 +10,7 @@ from gwaslab.viz_aux_save_figure import save_figure
|
|
|
10
10
|
def plotdaf(sumstats,
|
|
11
11
|
eaf="EAF",
|
|
12
12
|
daf="DAF",
|
|
13
|
+
raf="RAF",
|
|
13
14
|
threshold=0.16,
|
|
14
15
|
xlabel="Alternative Allele Frequency in Reference Population (RAF)",
|
|
15
16
|
ylabel="Effect Allele Frequency in Sumstats (EAF)",
|
|
@@ -43,7 +44,7 @@ def plotdaf(sumstats,
|
|
|
43
44
|
if plt_args is None:
|
|
44
45
|
plt_args={"figsize":(8,4),"dpi":300}
|
|
45
46
|
if histplot_args is None:
|
|
46
|
-
histplot_args={"log_scale":(False,
|
|
47
|
+
histplot_args={"log_scale":(False,False)}
|
|
47
48
|
if reg_line_args is None:
|
|
48
49
|
reg_line_args={"color":'#cccccc', "linestyle":'--'}
|
|
49
50
|
if threshold_line_args is None:
|
|
@@ -55,8 +56,9 @@ def plotdaf(sumstats,
|
|
|
55
56
|
if save_args is None:
|
|
56
57
|
save_args = {}
|
|
57
58
|
|
|
58
|
-
|
|
59
|
-
|
|
59
|
+
log.write("Start to plot allele frequency comparison plot...", verbose=verbose)
|
|
60
|
+
|
|
61
|
+
if not ((eaf in sumstats.columns) and ((daf in sumstats.columns)) or (raf in sumstats.columns)):
|
|
60
62
|
raise ValueError("EAF and/or DAF columns were not detected.")
|
|
61
63
|
|
|
62
64
|
if "SNPID" in sumstats.columns:
|
|
@@ -70,40 +72,42 @@ def plotdaf(sumstats,
|
|
|
70
72
|
if "NEA" in sumstats.columns:
|
|
71
73
|
alleles.append("NEA")
|
|
72
74
|
|
|
75
|
+
if daf not in sumstats.columns:
|
|
76
|
+
sumstats[daf] = sumstats[eaf] - sumstats[raf]
|
|
73
77
|
|
|
74
78
|
sumstats = sumstats.loc[(~sumstats[eaf].isna())&(~sumstats[daf].isna()),[snpid,eaf,daf]+alleles].copy()
|
|
75
79
|
sumstats[daf] = sumstats[daf].astype("float")
|
|
76
80
|
sumstats[eaf] = sumstats[eaf].astype("float")
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
81
|
+
log.write(" -Plotting valriants:" + str(len(sumstats)), verbose=verbose)
|
|
82
|
+
if raf not in sumstats.columns:
|
|
83
|
+
sumstats[raf] = sumstats[eaf] - sumstats[daf]
|
|
80
84
|
sns.set_style("ticks")
|
|
81
|
-
fig,
|
|
82
|
-
ax1.scatter(sumstats[
|
|
85
|
+
fig, [ax1, ax2] = plt.subplots(1, 2,**plt_args)
|
|
86
|
+
ax1.scatter(sumstats[raf],sumstats[eaf],label="Non-outlier", **scatter_args)
|
|
83
87
|
|
|
84
88
|
if is_threshold is True:
|
|
85
89
|
is_outliers = sumstats[daf].abs() > threshold
|
|
86
90
|
if sum(is_outliers)>0:
|
|
87
|
-
ax1.scatter(sumstats.loc[is_outliers,
|
|
91
|
+
ax1.scatter(sumstats.loc[is_outliers, raf],sumstats.loc[is_outliers, eaf],label="Outlier", **scatter_args_outlier)
|
|
88
92
|
|
|
89
93
|
if legend1 ==True:
|
|
90
94
|
ax1.legend()
|
|
91
95
|
|
|
92
96
|
if is_reg is True:
|
|
93
|
-
|
|
94
|
-
reg = ss.linregress(sumstats[
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
97
|
+
log.write(" -Plotting regression line...", verbose=verbose)
|
|
98
|
+
reg = ss.linregress(sumstats[raf],sumstats[eaf])
|
|
99
|
+
log.write(" -Beta = ", reg[0], verbose=verbose)
|
|
100
|
+
log.write(" -Intercept = ", reg[1], verbose=verbose)
|
|
101
|
+
log.write(" -R2 = ", reg[2], verbose=verbose)
|
|
98
102
|
ax1.axline(xy1=(0,reg[1]),slope=reg[0],zorder=1,**reg_line_args)
|
|
99
103
|
if r2 is True:
|
|
100
104
|
ax1.text(0.98,0.02, "$R^2 = {:.3f}$".format(reg[2]), transform=ax1.transAxes, **r2_args)
|
|
101
105
|
|
|
102
106
|
if is_threshold is True:
|
|
103
|
-
|
|
107
|
+
log.write(" -Threshold : " + str(threshold), verbose=verbose)
|
|
104
108
|
num = sum(np.abs(sumstats[daf])>threshold )
|
|
105
|
-
|
|
106
|
-
|
|
109
|
+
log.write(" -Variants with relatively large DAF : ",num , verbose=verbose)
|
|
110
|
+
log.write(" -Percentage for variants with relatively large DAF : ",num/len(sumstats) , verbose=verbose)
|
|
107
111
|
ax1.axline(xy1=(0,threshold),slope=1,zorder=1,**threshold_line_args)
|
|
108
112
|
ax1.axline(xy1=(threshold,0),slope=1,zorder=1,**threshold_line_args)
|
|
109
113
|
|
|
@@ -121,23 +125,18 @@ def plotdaf(sumstats,
|
|
|
121
125
|
|
|
122
126
|
sumstats["ID"] = sumstats.index
|
|
123
127
|
|
|
124
|
-
to_plot = pd.melt(sumstats,id_vars=['ID'], value_vars=[
|
|
125
|
-
|
|
126
|
-
sns.histplot(data=to_plot, x="Allele Frequency", hue="Types", fill=True, ax=ax2, legend = legend2 ,**histplot_args)
|
|
127
|
-
ax2.set_xlabel("Allele Frequency",**font_args)
|
|
128
|
+
to_plot = pd.melt(sumstats,id_vars=['ID'], value_vars=[eaf,raf], var_name='Types', value_name='Allele Frequency').dropna()
|
|
128
129
|
|
|
130
|
+
sns.histplot(data=to_plot, x="Allele Frequency",
|
|
131
|
+
hue="Types", fill=True,
|
|
132
|
+
ax=ax2, legend = legend2,
|
|
133
|
+
**histplot_args)
|
|
134
|
+
|
|
135
|
+
ax2.set_xlabel("Allele Frequency",**font_args)
|
|
129
136
|
|
|
130
137
|
plt.tight_layout()
|
|
131
138
|
save_figure(fig, save, keyword="afc",save_args=save_args, log=log, verbose=verbose)
|
|
132
|
-
|
|
133
|
-
#if save:
|
|
134
|
-
# if verbose: log.write("Saving plot:")
|
|
135
|
-
# if save==True:
|
|
136
|
-
# fig.savefig("./allele_frequency_comparison.png",bbox_inches="tight",**save_args)
|
|
137
|
-
# log.write(" -Saved to "+ "./allele_frequency_comparison.png" + " successfully!" )
|
|
138
|
-
# else:
|
|
139
|
-
# fig.savefig(save,bbox_inches="tight",**save_args)
|
|
140
|
-
# log.write(" -Saved to "+ save + " successfully!" )
|
|
141
139
|
sumstats = sumstats.drop(columns="ID")
|
|
140
|
+
|
|
142
141
|
return fig, sumstats[is_outliers].copy()
|
|
143
142
|
|