gwaslab 3.4.37__py3-none-any.whl → 3.4.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/bd_common_data.py +6 -3
- gwaslab/bd_download.py +9 -9
- gwaslab/bd_get_hapmap3.py +43 -9
- gwaslab/data/formatbook.json +722 -721
- gwaslab/g_Log.py +22 -5
- gwaslab/g_Sumstats.py +110 -163
- gwaslab/g_SumstatsPair.py +76 -25
- gwaslab/g_SumstatsT.py +2 -2
- gwaslab/g_Sumstats_summary.py +3 -3
- gwaslab/g_version.py +10 -10
- gwaslab/hm_casting.py +36 -17
- gwaslab/hm_harmonize_sumstats.py +354 -221
- gwaslab/hm_rsid_to_chrpos.py +1 -1
- gwaslab/io_preformat_input.py +49 -43
- gwaslab/io_read_ldsc.py +49 -1
- gwaslab/io_to_formats.py +428 -295
- gwaslab/ldsc_irwls.py +198 -0
- gwaslab/ldsc_jackknife.py +514 -0
- gwaslab/ldsc_ldscore.py +417 -0
- gwaslab/ldsc_parse.py +294 -0
- gwaslab/ldsc_regressions.py +747 -0
- gwaslab/ldsc_sumstats.py +629 -0
- gwaslab/qc_check_datatype.py +3 -3
- gwaslab/qc_fix_sumstats.py +891 -778
- gwaslab/util_ex_calculate_ldmatrix.py +31 -13
- gwaslab/util_ex_gwascatalog.py +25 -25
- gwaslab/util_ex_ldproxyfinder.py +10 -10
- gwaslab/util_ex_ldsc.py +189 -0
- gwaslab/util_ex_process_ref.py +3 -3
- gwaslab/util_ex_run_coloc.py +26 -4
- gwaslab/util_in_calculate_gc.py +6 -6
- gwaslab/util_in_calculate_power.py +42 -43
- gwaslab/util_in_convert_h2.py +8 -8
- gwaslab/util_in_fill_data.py +30 -30
- gwaslab/util_in_filter_value.py +201 -74
- gwaslab/util_in_get_density.py +10 -10
- gwaslab/util_in_get_sig.py +445 -71
- gwaslab/viz_aux_annotate_plot.py +12 -12
- gwaslab/viz_aux_quickfix.py +42 -37
- gwaslab/viz_aux_reposition_text.py +10 -7
- gwaslab/viz_aux_save_figure.py +18 -8
- gwaslab/viz_plot_compare_af.py +32 -33
- gwaslab/viz_plot_compare_effect.py +63 -71
- gwaslab/viz_plot_miamiplot2.py +34 -26
- gwaslab/viz_plot_mqqplot.py +126 -75
- gwaslab/viz_plot_qqplot.py +11 -8
- gwaslab/viz_plot_regionalplot.py +36 -33
- gwaslab/viz_plot_rg_heatmap.py +28 -26
- gwaslab/viz_plot_stackedregional.py +40 -21
- gwaslab/viz_plot_trumpetplot.py +65 -61
- gwaslab-3.4.39.dist-info/LICENSE +674 -0
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/METADATA +5 -4
- gwaslab-3.4.39.dist-info/RECORD +80 -0
- gwaslab-3.4.37.dist-info/RECORD +0 -72
- /gwaslab-3.4.37.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0
gwaslab/viz_aux_annotate_plot.py
CHANGED
|
@@ -58,7 +58,7 @@ def annotate_single(
|
|
|
58
58
|
annotation_col="CHR:POS"
|
|
59
59
|
elif anno:
|
|
60
60
|
annotation_col=anno
|
|
61
|
-
|
|
61
|
+
log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
|
|
62
62
|
|
|
63
63
|
## calculate y span
|
|
64
64
|
if region is not None:
|
|
@@ -66,7 +66,7 @@ def annotate_single(
|
|
|
66
66
|
else:
|
|
67
67
|
y_span = sumstats["i"].max()-sumstats["i"].min()
|
|
68
68
|
|
|
69
|
-
|
|
69
|
+
log.write(" -Adjusting text positions with repel_force={}...".format(repel_force), verbose=verbose)
|
|
70
70
|
if anno_style == "expand" :
|
|
71
71
|
to_annotate.loc[:, "ADJUSTED_i"] = adjust_text_position(to_annotate["i"].values.copy(), y_span, repel_force,max_iter=anno_max_iter,log=log,amode=amode,verbose=verbose)
|
|
72
72
|
## iterate through variants to be annotated
|
|
@@ -214,7 +214,7 @@ def annotate_single(
|
|
|
214
214
|
anno_count +=1
|
|
215
215
|
#anno_adjust_keyargs = {"arrowprops":dict(arrowstyle='->', color='grey', linewidth=0.1,relpos=(0.5,0.5))}
|
|
216
216
|
if anno_adjust==True:
|
|
217
|
-
|
|
217
|
+
log.write(" -Auto-adjusting text positions...", verbose=verbose)
|
|
218
218
|
adjust_text(texts = anno_to_adjust_list,
|
|
219
219
|
autoalign=False,
|
|
220
220
|
only_move={'points':'x', 'text':'x', 'objects':'x'},
|
|
@@ -232,7 +232,7 @@ def annotate_single(
|
|
|
232
232
|
)
|
|
233
233
|
|
|
234
234
|
else:
|
|
235
|
-
|
|
235
|
+
log.write(" -Skip annotating", verbose=verbose)
|
|
236
236
|
|
|
237
237
|
return ax1
|
|
238
238
|
|
|
@@ -275,7 +275,7 @@ def annotate_pair(
|
|
|
275
275
|
for index,ax,to_annotate_df,anno_d, anno_alias in [(0,ax1,to_annotate1,anno_d1,anno_alias1),(1,ax5,to_annotate5,anno_d2,anno_alias2)]:
|
|
276
276
|
###################### annotate() args
|
|
277
277
|
if to_annotate_df.empty is True:
|
|
278
|
-
|
|
278
|
+
log.write(" -Skipping annotation...", verbose=verbose)
|
|
279
279
|
continue
|
|
280
280
|
|
|
281
281
|
fontweight = "normal"
|
|
@@ -313,7 +313,7 @@ def annotate_pair(
|
|
|
313
313
|
annotation_col=anno
|
|
314
314
|
else:
|
|
315
315
|
annotation_col=anno+"_"+str(index+1)
|
|
316
|
-
|
|
316
|
+
log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
|
|
317
317
|
|
|
318
318
|
## calculate y span
|
|
319
319
|
if region is not None:
|
|
@@ -453,7 +453,7 @@ def annotate_pair(
|
|
|
453
453
|
anno_count +=1
|
|
454
454
|
|
|
455
455
|
if anno_adjust==True:
|
|
456
|
-
|
|
456
|
+
log.write(" -Auto-adjusting text positions for plot {}...".format(index), verbose=verbose)
|
|
457
457
|
if index==0:
|
|
458
458
|
va="bottom"
|
|
459
459
|
ha='left'
|
|
@@ -476,7 +476,7 @@ def annotate_pair(
|
|
|
476
476
|
lim =anno_max_iter
|
|
477
477
|
)
|
|
478
478
|
else:
|
|
479
|
-
|
|
479
|
+
log.write(" -Skip annotating", verbose=verbose)
|
|
480
480
|
return ax1,ax5
|
|
481
481
|
|
|
482
482
|
|
|
@@ -521,7 +521,7 @@ def annotate_subtype(
|
|
|
521
521
|
annotation_col="CHR:POS"
|
|
522
522
|
elif anno:
|
|
523
523
|
annotation_col=anno
|
|
524
|
-
|
|
524
|
+
log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
|
|
525
525
|
|
|
526
526
|
## calculate y span
|
|
527
527
|
if region is not None:
|
|
@@ -529,7 +529,7 @@ def annotate_subtype(
|
|
|
529
529
|
else:
|
|
530
530
|
y_span = sumstats["i"].max()-sumstats["i"].min()
|
|
531
531
|
|
|
532
|
-
|
|
532
|
+
log.write(" -Adjusting text positions with repel_force={}...".format(repel_force), verbose=verbose)
|
|
533
533
|
if anno_style == "expand" :
|
|
534
534
|
to_annotate.loc[:, "ADJUSTED_i"] = adjust_text_position(to_annotate["i"].values.copy(), y_span, repel_force,max_iter=anno_max_iter,log=log,verbose=verbose)
|
|
535
535
|
## iterate through variants to be annotated
|
|
@@ -656,7 +656,7 @@ def annotate_subtype(
|
|
|
656
656
|
anno_count +=1
|
|
657
657
|
#anno_adjust_keyargs = {"arrowprops":dict(arrowstyle='->', color='grey', linewidth=0.1,relpos=(0.5,0.5))}
|
|
658
658
|
if anno_adjust==True:
|
|
659
|
-
|
|
659
|
+
log.write(" -Auto-adjusting text positions...", verbose=verbose)
|
|
660
660
|
adjust_text(texts = anno_to_adjust_list,
|
|
661
661
|
autoalign=False,
|
|
662
662
|
only_move={'points':'x', 'text':'x', 'objects':'x'},
|
|
@@ -674,7 +674,7 @@ def annotate_subtype(
|
|
|
674
674
|
)
|
|
675
675
|
|
|
676
676
|
else:
|
|
677
|
-
|
|
677
|
+
log.write(" -Skip annotating", verbose=verbose)
|
|
678
678
|
|
|
679
679
|
return ax1
|
|
680
680
|
|
gwaslab/viz_aux_quickfix.py
CHANGED
|
@@ -5,7 +5,7 @@ from gwaslab.bd_common_data import get_chr_to_number
|
|
|
5
5
|
from gwaslab.bd_common_data import get_number_to_chr
|
|
6
6
|
from math import ceil
|
|
7
7
|
|
|
8
|
-
def _quick_fix(sumstats, chr_dict=get_chr_to_number(), scaled=False, chrom="CHR", pos="POS", p="P", mlog10p="MLOG10P",
|
|
8
|
+
def _quick_fix(sumstats, chr_dict=get_chr_to_number(), scaled=False, chrom="CHR", pos="POS", p="P", mlog10p="MLOG10P",log=Log(), verbose=True):
|
|
9
9
|
'''
|
|
10
10
|
quick sanity check for input sumstats
|
|
11
11
|
'''
|
|
@@ -31,39 +31,40 @@ def _quick_fix_p_value(sumstats, p="P", mlog10p="MLOG10P", scaled=False,verbose=
|
|
|
31
31
|
'''
|
|
32
32
|
if scaled==True:
|
|
33
33
|
# if scaled, add scaled P and P col
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
log.write(" -P values are already scaled...", verbose=verbose)
|
|
35
|
+
log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
|
|
36
36
|
sumstats["scaled_P"] = sumstats[mlog10p].copy()
|
|
37
37
|
sumstats[p]= np.power(10,-sumstats[mlog10p].astype("float64"))
|
|
38
38
|
return sumstats
|
|
39
39
|
# bad p : na and outside (0,1]
|
|
40
40
|
bad_p_value = (sumstats[p].isna()) | (sumstats[p] > 1) | (sumstats[p] <= 0)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
" variants with P value outside of (0,1] will be removed...")
|
|
41
|
+
|
|
42
|
+
log.write(" -Sanity check after conversion: " + str(sum(bad_p_value)) +
|
|
43
|
+
" variants with P value outside of (0,1] will be removed...", verbose=verbose)
|
|
44
44
|
sumstats = sumstats.loc[~bad_p_value, :]
|
|
45
45
|
return sumstats
|
|
46
46
|
|
|
47
47
|
|
|
48
|
-
def _quick_fix_mlog10p(
|
|
48
|
+
def _quick_fix_mlog10p(insumstats,p="P", mlog10p="MLOG10P", scaled=False, log=Log(), verbose=True):
|
|
49
49
|
'''
|
|
50
50
|
drop variants with bad -log10(P) values
|
|
51
51
|
'''
|
|
52
|
+
sumstats = insumstats.copy()
|
|
52
53
|
if scaled != True:
|
|
53
|
-
|
|
54
|
+
log.write(" -Sumstats P values are being converted to -log10(P)...", verbose=verbose)
|
|
54
55
|
sumstats["scaled_P"] = -np.log10(sumstats[p].astype("float64"))
|
|
55
56
|
|
|
56
57
|
#with pd.option_context('mode.use_inf_as_na', True):
|
|
57
58
|
# is_na = sumstats["scaled_P"].isna()
|
|
58
59
|
if_inf_na = np.isinf(sumstats["scaled_P"]) | sumstats["scaled_P"].isna()
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
" na/inf/-inf variants will be removed...")
|
|
60
|
+
|
|
61
|
+
log.write(" -Sanity check: "+str(sum(if_inf_na)) +
|
|
62
|
+
" na/inf/-inf variants will be removed...", verbose=verbose)
|
|
62
63
|
sumstats = sumstats.loc[~if_inf_na, :]
|
|
63
64
|
return sumstats
|
|
64
65
|
|
|
65
66
|
|
|
66
|
-
def _quick_fix_eaf(seires,
|
|
67
|
+
def _quick_fix_eaf(seires,log=Log(), verbose=True):
|
|
67
68
|
'''
|
|
68
69
|
conversion of eaf to maf
|
|
69
70
|
'''
|
|
@@ -73,7 +74,7 @@ def _quick_fix_eaf(seires, verbose=True, log=Log()):
|
|
|
73
74
|
return seires.copy()
|
|
74
75
|
|
|
75
76
|
|
|
76
|
-
def _quick_fix_chr(seires, chr_dict,
|
|
77
|
+
def _quick_fix_chr(seires, chr_dict,log=Log(), verbose=True):
|
|
77
78
|
'''
|
|
78
79
|
conversion and check for chr
|
|
79
80
|
'''
|
|
@@ -84,7 +85,7 @@ def _quick_fix_chr(seires, chr_dict, verbose=True, log=Log()):
|
|
|
84
85
|
return seires
|
|
85
86
|
|
|
86
87
|
|
|
87
|
-
def _quick_fix_pos(seires,
|
|
88
|
+
def _quick_fix_pos(seires,log=Log(), verbose=True):
|
|
88
89
|
'''
|
|
89
90
|
force conversion for pos
|
|
90
91
|
'''
|
|
@@ -92,7 +93,7 @@ def _quick_fix_pos(seires, verbose=True, log=Log()):
|
|
|
92
93
|
return seires
|
|
93
94
|
|
|
94
95
|
|
|
95
|
-
def _get_largenumber(*args,
|
|
96
|
+
def _get_largenumber(*args,log=Log(), verbose=True):
|
|
96
97
|
'''
|
|
97
98
|
get a helper large number, >> max(pos)
|
|
98
99
|
'''
|
|
@@ -104,11 +105,11 @@ def _get_largenumber(*args, log=Log()):
|
|
|
104
105
|
else:
|
|
105
106
|
break
|
|
106
107
|
if i == 7:
|
|
107
|
-
log.
|
|
108
|
+
log.warning("Max POS is too large!")
|
|
108
109
|
return large_number
|
|
109
110
|
|
|
110
111
|
|
|
111
|
-
def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000, dropchrpos=False,
|
|
112
|
+
def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000, dropchrpos=False,log=Log(), verbose=True):
|
|
112
113
|
sumstats["TCHR+POS"] = sumstats["CHR"]*large_number + sumstats["POS"]
|
|
113
114
|
sumstats["TCHR+POS"] = sumstats["TCHR+POS"].astype('Int64')
|
|
114
115
|
if dropchrpos == True:
|
|
@@ -117,7 +118,7 @@ def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000,
|
|
|
117
118
|
return sumstats
|
|
118
119
|
|
|
119
120
|
|
|
120
|
-
def _quick_merge_sumstats(sumstats1, sumstats2):
|
|
121
|
+
def _quick_merge_sumstats(sumstats1, sumstats2, log=Log(), verbose=True):
|
|
121
122
|
merged_sumstats = pd.merge(sumstats1, sumstats2, on="TCHR+POS", how="outer", suffixes=('_1', '_2'))
|
|
122
123
|
merged_sumstats["CHR"] = merged_sumstats["CHR_1"]
|
|
123
124
|
merged_sumstats["POS"] = merged_sumstats["POS_1"]
|
|
@@ -126,7 +127,7 @@ def _quick_merge_sumstats(sumstats1, sumstats2):
|
|
|
126
127
|
merged_sumstats = merged_sumstats.drop(labels=["CHR_1", "CHR_2", "POS_1", "POS_2"],axis=1)
|
|
127
128
|
return merged_sumstats
|
|
128
129
|
|
|
129
|
-
def _quick_assign_i(sumstats, chrom="CHR",pos="POS"):
|
|
130
|
+
def _quick_assign_i(sumstats, chrom="CHR",pos="POS",log=Log(), verbose=True):
|
|
130
131
|
# sort by CHR an POS
|
|
131
132
|
sumstats = sumstats.sort_values([chrom,pos])
|
|
132
133
|
# set new id
|
|
@@ -158,7 +159,7 @@ def _quick_assign_i(sumstats, chrom="CHR",pos="POS"):
|
|
|
158
159
|
sumstats["i"] = np.floor(pd.to_numeric(sumstats["i"], errors='coerce')).astype('Int64')
|
|
159
160
|
return sumstats, chrom_df
|
|
160
161
|
|
|
161
|
-
def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos="POS",drop_chr_start=False,_posdiccul=None):
|
|
162
|
+
def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos="POS",drop_chr_start=False,_posdiccul=None,log=Log(), verbose=True):
|
|
162
163
|
# align all variants on a single axis (i)
|
|
163
164
|
sumstats = sumstats.sort_values([chrom,pos])
|
|
164
165
|
if use_rank is True:
|
|
@@ -218,7 +219,7 @@ def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos=
|
|
|
218
219
|
sumstats["i"] = np.floor(pd.to_numeric(sumstats["i"], errors='coerce')).astype('Int64')
|
|
219
220
|
return sumstats, chrom_df
|
|
220
221
|
|
|
221
|
-
def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_level=5e-6, lower_level=5e-4):
|
|
222
|
+
def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_level=5e-6, lower_level=5e-4,log=Log(), verbose=True):
|
|
222
223
|
size_series = series.copy()
|
|
223
224
|
size_series[:] = 1
|
|
224
225
|
|
|
@@ -231,7 +232,7 @@ def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_
|
|
|
231
232
|
size_series[is_sig_level] = 4
|
|
232
233
|
return size_series
|
|
233
234
|
|
|
234
|
-
def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SNPID",chrom="CHR",pos="POS",
|
|
235
|
+
def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SNPID",chrom="CHR",pos="POS",log=Log(), verbose=True):
|
|
235
236
|
to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
|
|
236
237
|
#assign colors: 0 is hightlight color
|
|
237
238
|
for i,row in to_highlight.iterrows():
|
|
@@ -243,7 +244,7 @@ def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SN
|
|
|
243
244
|
sumstats.loc[right_chr&up_pos&low_pos,"HUE"]="0"
|
|
244
245
|
return sumstats
|
|
245
246
|
|
|
246
|
-
def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight_windowkb, chrom="CHR",pos="POS",
|
|
247
|
+
def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight_windowkb, chrom="CHR",pos="POS",log=Log(), verbose=True):
|
|
247
248
|
#assign colors: 0 is hightlight color
|
|
248
249
|
to_highlight1 = pd.DataFrame()
|
|
249
250
|
to_highlight2 = pd.DataFrame()
|
|
@@ -271,34 +272,35 @@ def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight
|
|
|
271
272
|
sumstats.loc[right_chr&up_pos&low_pos,"HUE2"]="0"
|
|
272
273
|
return sumstats, to_highlight1, to_highlight2
|
|
273
274
|
|
|
274
|
-
def _quick_extract_snp_in_region(sumstats, region, chrom="CHR",pos="POS",
|
|
275
|
+
def _quick_extract_snp_in_region(sumstats, region, chrom="CHR",pos="POS",log=Log(), verbose=True):
|
|
275
276
|
region_chr = region[0]
|
|
276
277
|
region_start = region[1]
|
|
277
278
|
region_end = region[2]
|
|
278
|
-
|
|
279
|
+
log.write(" -Extract SNPs in region : chr"+str(region_chr)+":"+str(region[1])+"-"+str(region[2])+ "...", verbose=verbose)
|
|
279
280
|
is_in_region_snp = (sumstats[chrom]==region_chr) &(sumstats[pos]<region_end) &(sumstats[pos]>region_start)
|
|
280
|
-
|
|
281
|
+
log.write(" -Extract SNPs in specified regions: "+str(sum(is_in_region_snp)), verbose=verbose)
|
|
281
282
|
sumstats = sumstats.loc[is_in_region_snp,:]
|
|
282
283
|
return sumstats
|
|
283
284
|
|
|
284
|
-
def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose,lines_to_plot, log):
|
|
285
|
+
def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_plot, log):
|
|
286
|
+
log.write(" -Converting data above cut line...",verbose=verbose)
|
|
285
287
|
if ylabels is not None:
|
|
286
288
|
ylabels = pd.Series(ylabels)
|
|
287
289
|
maxy = series.max()
|
|
288
290
|
series = series.copy()
|
|
289
291
|
if "b" not in mode:
|
|
290
|
-
|
|
292
|
+
log.write(" -Maximum -log10(P) value is "+str(maxy) +" .", verbose=verbose)
|
|
291
293
|
elif "b" in mode:
|
|
292
|
-
|
|
294
|
+
log.write(" -Maximum DENSITY value is "+str(maxy) +" .", verbose=verbose)
|
|
293
295
|
|
|
294
296
|
maxticker=int(np.round(series.max(skipna=True)))
|
|
295
297
|
|
|
296
298
|
if cut:
|
|
297
299
|
# auto mode : determine curline and cut factor
|
|
298
300
|
if cut==True:
|
|
299
|
-
|
|
301
|
+
log.write(" -Cut Auto mode is activated...", verbose=verbose)
|
|
300
302
|
if maxy<30:
|
|
301
|
-
|
|
303
|
+
log.write(" - maxy <30 , no need to cut.", verbose=verbose)
|
|
302
304
|
cut=0
|
|
303
305
|
else:
|
|
304
306
|
cut = 20
|
|
@@ -325,9 +327,9 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose,lines_to_plo
|
|
|
325
327
|
else:
|
|
326
328
|
# cut linear mode
|
|
327
329
|
if "b" not in mode:
|
|
328
|
-
|
|
330
|
+
log.write(" -Minus log10(P) values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...", verbose=verbose)
|
|
329
331
|
else:
|
|
330
|
-
|
|
332
|
+
log.write(" -Minus DENSITY values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...", verbose=verbose)
|
|
331
333
|
|
|
332
334
|
maxticker=int(np.round(series.max(skipna=True)))
|
|
333
335
|
|
|
@@ -340,7 +342,7 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose,lines_to_plo
|
|
|
340
342
|
#sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"] = (sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"]-cut)/cutfactor + cut
|
|
341
343
|
|
|
342
344
|
maxy = (maxticker-cut)/cutfactor + cut
|
|
343
|
-
|
|
345
|
+
|
|
344
346
|
return series, maxy, maxticker, cut, cutfactor,ylabels,lines_to_plot
|
|
345
347
|
|
|
346
348
|
#def _cut_line(level, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, log):
|
|
@@ -362,9 +364,11 @@ def _set_yticklabels(cut,
|
|
|
362
364
|
font_family,
|
|
363
365
|
ytick3,
|
|
364
366
|
ylabels,
|
|
365
|
-
ylabels_converted
|
|
367
|
+
ylabels_converted,
|
|
368
|
+
log=Log(),
|
|
369
|
+
verbose=True
|
|
366
370
|
):
|
|
367
|
-
|
|
371
|
+
log.write(" -Processing Y tick lables...",verbose=verbose)
|
|
368
372
|
# if no cut
|
|
369
373
|
if cut == 0:
|
|
370
374
|
ax1.set_ylim(skip, ceil(maxy*1.2) )
|
|
@@ -430,7 +434,8 @@ def _set_yticklabels(cut,
|
|
|
430
434
|
|
|
431
435
|
return ax1
|
|
432
436
|
|
|
433
|
-
def _jagged_y(cut,skip,ax1,mode,mqqratio,jagged_len,jagged_wid):
|
|
437
|
+
def _jagged_y(cut,skip,ax1,mode,mqqratio,jagged_len,jagged_wid, log=Log(), verbose=True):
|
|
438
|
+
log.write(" -Processing jagged Y axis...",verbose=verbose)
|
|
434
439
|
tycut = cut +0.3 #(cut - skip)/ (ax1.get_ylim()[1] - skip) + 0.002
|
|
435
440
|
dy= jagged_len * (cut - skip)
|
|
436
441
|
x0 = 0
|
|
@@ -2,23 +2,25 @@ import pandas as pd
|
|
|
2
2
|
import numpy as np
|
|
3
3
|
from gwaslab.g_Log import Log
|
|
4
4
|
|
|
5
|
-
def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode="int",log=Log(),verbose=True):
|
|
5
|
+
def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode="int",log=Log(),verbose=True, min_factor=None):
|
|
6
6
|
# check the number of variants to annotate
|
|
7
7
|
#if repel_force>0:
|
|
8
8
|
# if 1/(repel_force*2 +0.01) < len(positions):
|
|
9
|
-
#
|
|
9
|
+
# log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants")
|
|
10
10
|
#else:
|
|
11
11
|
if len(positions)>30:
|
|
12
|
-
|
|
12
|
+
log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants",verbose=verbose)
|
|
13
13
|
|
|
14
14
|
# calculate the steps
|
|
15
15
|
if amode=="int":
|
|
16
16
|
step = int(yspan*repel_force)
|
|
17
17
|
elif amode=="log":
|
|
18
|
-
min_factor
|
|
18
|
+
if min_factor is None:
|
|
19
|
+
min_factor = np.min(positions)
|
|
19
20
|
#(1, max) -> (0, log(max)))
|
|
20
|
-
positions = np.
|
|
21
|
+
positions = np.log2(positions/min_factor)
|
|
21
22
|
step = max(positions)*repel_force
|
|
23
|
+
|
|
22
24
|
else:
|
|
23
25
|
step = yspan*repel_force
|
|
24
26
|
|
|
@@ -33,7 +35,8 @@ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode=
|
|
|
33
35
|
if amode=="int":
|
|
34
36
|
return np.floor(pd.to_numeric(positions, errors='coerce')).astype('Int64').copy()
|
|
35
37
|
elif amode=="log":
|
|
36
|
-
|
|
38
|
+
|
|
39
|
+
return np.power(2, pd.to_numeric(positions, errors='coerce'))* min_factor
|
|
37
40
|
else:
|
|
38
41
|
return pd.to_numeric(positions, errors='coerce')
|
|
39
42
|
else:
|
|
@@ -44,7 +47,7 @@ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode=
|
|
|
44
47
|
move_position_from_center_float(positions, index, step)
|
|
45
48
|
|
|
46
49
|
# when reaching maximum iteration, return anyway
|
|
47
|
-
|
|
50
|
+
log.write(" -Reaching maximum iteration: {}; Skipping...".format(max_iter),verbose=verbose)
|
|
48
51
|
if amode=="int":
|
|
49
52
|
return np.floor(pd.to_numeric(positions, errors='coerce')).astype('Int64').copy()
|
|
50
53
|
elif amode=="log":
|
gwaslab/viz_aux_save_figure.py
CHANGED
|
@@ -4,23 +4,33 @@ import time
|
|
|
4
4
|
import os.path
|
|
5
5
|
|
|
6
6
|
def save_figure(fig, save, keyword, save_args=None, log = Log(), verbose=True):
|
|
7
|
+
log.write("Start to save figure..." ,verbose=verbose)
|
|
7
8
|
if save_args is None:
|
|
8
9
|
save_args = {}
|
|
10
|
+
|
|
9
11
|
if save:
|
|
10
|
-
if verbose: log.write("Saving plot:")
|
|
11
12
|
if save==True:
|
|
12
13
|
default_path = get_default_path(keyword)
|
|
13
14
|
fig.savefig(default_path, bbox_inches="tight",**save_args)
|
|
14
|
-
log.write(" -Saved to "+ default_path + " successfully!" )
|
|
15
|
+
log.write(" -Saved to "+ default_path + " successfully!" ,verbose=verbose)
|
|
15
16
|
else:
|
|
16
|
-
if
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
if save[-3:]=="pdf":
|
|
18
|
+
if os.path.exists(save):
|
|
19
|
+
fig.savefig(save, **save_args)
|
|
20
|
+
log.write(" -Saved to "+ save + " successfully! (pdf, overwrite)" ,verbose=verbose)
|
|
21
|
+
else:
|
|
22
|
+
fig.savefig(save, **save_args)
|
|
23
|
+
log.write(" -Saved to "+ save + " successfully! (pdf)" ,verbose=verbose)
|
|
19
24
|
else:
|
|
20
|
-
|
|
21
|
-
|
|
25
|
+
if os.path.exists(save):
|
|
26
|
+
fig.savefig(save,bbox_inches="tight",**save_args)
|
|
27
|
+
log.write(" -Saved to "+ save + " successfully! (overwrite)" ,verbose=verbose)
|
|
28
|
+
else:
|
|
29
|
+
fig.savefig(save,bbox_inches="tight",**save_args)
|
|
30
|
+
log.write(" -Saved to "+ save + " successfully!" ,verbose=verbose)
|
|
22
31
|
else:
|
|
23
|
-
log.write(" -Skip saving
|
|
32
|
+
log.write(" -Skip saving figure!" ,verbose=verbose)
|
|
33
|
+
log.write("Finished saving figure..." ,verbose=verbose)
|
|
24
34
|
|
|
25
35
|
def get_default_path(keyword,fmt="png"):
|
|
26
36
|
path_dictionary = {
|
gwaslab/viz_plot_compare_af.py
CHANGED
|
@@ -10,6 +10,7 @@ from gwaslab.viz_aux_save_figure import save_figure
|
|
|
10
10
|
def plotdaf(sumstats,
|
|
11
11
|
eaf="EAF",
|
|
12
12
|
daf="DAF",
|
|
13
|
+
raf="RAF",
|
|
13
14
|
threshold=0.16,
|
|
14
15
|
xlabel="Alternative Allele Frequency in Reference Population (RAF)",
|
|
15
16
|
ylabel="Effect Allele Frequency in Sumstats (EAF)",
|
|
@@ -43,7 +44,7 @@ def plotdaf(sumstats,
|
|
|
43
44
|
if plt_args is None:
|
|
44
45
|
plt_args={"figsize":(8,4),"dpi":300}
|
|
45
46
|
if histplot_args is None:
|
|
46
|
-
histplot_args={"log_scale":(False,
|
|
47
|
+
histplot_args={"log_scale":(False,False)}
|
|
47
48
|
if reg_line_args is None:
|
|
48
49
|
reg_line_args={"color":'#cccccc', "linestyle":'--'}
|
|
49
50
|
if threshold_line_args is None:
|
|
@@ -55,8 +56,9 @@ def plotdaf(sumstats,
|
|
|
55
56
|
if save_args is None:
|
|
56
57
|
save_args = {}
|
|
57
58
|
|
|
58
|
-
|
|
59
|
-
|
|
59
|
+
log.write("Start to plot allele frequency comparison plot...", verbose=verbose)
|
|
60
|
+
|
|
61
|
+
if not ((eaf in sumstats.columns) and ((daf in sumstats.columns)) or (raf in sumstats.columns)):
|
|
60
62
|
raise ValueError("EAF and/or DAF columns were not detected.")
|
|
61
63
|
|
|
62
64
|
if "SNPID" in sumstats.columns:
|
|
@@ -70,40 +72,42 @@ def plotdaf(sumstats,
|
|
|
70
72
|
if "NEA" in sumstats.columns:
|
|
71
73
|
alleles.append("NEA")
|
|
72
74
|
|
|
75
|
+
if daf not in sumstats.columns:
|
|
76
|
+
sumstats[daf] = sumstats[eaf] - sumstats[raf]
|
|
73
77
|
|
|
74
78
|
sumstats = sumstats.loc[(~sumstats[eaf].isna())&(~sumstats[daf].isna()),[snpid,eaf,daf]+alleles].copy()
|
|
75
|
-
sumstats
|
|
76
|
-
sumstats
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
79
|
+
sumstats[daf] = sumstats[daf].astype("float")
|
|
80
|
+
sumstats[eaf] = sumstats[eaf].astype("float")
|
|
81
|
+
log.write(" -Plotting valriants:" + str(len(sumstats)), verbose=verbose)
|
|
82
|
+
if raf not in sumstats.columns:
|
|
83
|
+
sumstats[raf] = sumstats[eaf] - sumstats[daf]
|
|
80
84
|
sns.set_style("ticks")
|
|
81
|
-
fig,
|
|
82
|
-
ax1.scatter(sumstats[
|
|
85
|
+
fig, [ax1, ax2] = plt.subplots(1, 2,**plt_args)
|
|
86
|
+
ax1.scatter(sumstats[raf],sumstats[eaf],label="Non-outlier", **scatter_args)
|
|
83
87
|
|
|
84
88
|
if is_threshold is True:
|
|
85
89
|
is_outliers = sumstats[daf].abs() > threshold
|
|
86
90
|
if sum(is_outliers)>0:
|
|
87
|
-
ax1.scatter(sumstats.loc[is_outliers,
|
|
91
|
+
ax1.scatter(sumstats.loc[is_outliers, raf],sumstats.loc[is_outliers, eaf],label="Outlier", **scatter_args_outlier)
|
|
88
92
|
|
|
89
93
|
if legend1 ==True:
|
|
90
94
|
ax1.legend()
|
|
91
95
|
|
|
92
96
|
if is_reg is True:
|
|
93
|
-
|
|
94
|
-
reg = ss.linregress(sumstats[
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
97
|
+
log.write(" -Plotting regression line...", verbose=verbose)
|
|
98
|
+
reg = ss.linregress(sumstats[raf],sumstats[eaf])
|
|
99
|
+
log.write(" -Beta = ", reg[0], verbose=verbose)
|
|
100
|
+
log.write(" -Intercept = ", reg[1], verbose=verbose)
|
|
101
|
+
log.write(" -R2 = ", reg[2], verbose=verbose)
|
|
98
102
|
ax1.axline(xy1=(0,reg[1]),slope=reg[0],zorder=1,**reg_line_args)
|
|
99
103
|
if r2 is True:
|
|
100
104
|
ax1.text(0.98,0.02, "$R^2 = {:.3f}$".format(reg[2]), transform=ax1.transAxes, **r2_args)
|
|
101
105
|
|
|
102
106
|
if is_threshold is True:
|
|
103
|
-
|
|
107
|
+
log.write(" -Threshold : " + str(threshold), verbose=verbose)
|
|
104
108
|
num = sum(np.abs(sumstats[daf])>threshold )
|
|
105
|
-
|
|
106
|
-
|
|
109
|
+
log.write(" -Variants with relatively large DAF : ",num , verbose=verbose)
|
|
110
|
+
log.write(" -Percentage for variants with relatively large DAF : ",num/len(sumstats) , verbose=verbose)
|
|
107
111
|
ax1.axline(xy1=(0,threshold),slope=1,zorder=1,**threshold_line_args)
|
|
108
112
|
ax1.axline(xy1=(threshold,0),slope=1,zorder=1,**threshold_line_args)
|
|
109
113
|
|
|
@@ -119,25 +123,20 @@ def plotdaf(sumstats,
|
|
|
119
123
|
ax1.set_ylim([0,1])
|
|
120
124
|
|
|
121
125
|
|
|
122
|
-
sumstats
|
|
126
|
+
sumstats["ID"] = sumstats.index
|
|
123
127
|
|
|
124
|
-
to_plot = pd.melt(sumstats,id_vars=['ID'], value_vars=[
|
|
125
|
-
|
|
126
|
-
sns.histplot(data=to_plot, x="Allele Frequency", hue="Types", fill=True, ax=ax2, legend = legend2 ,**histplot_args)
|
|
127
|
-
ax2.set_xlabel("Allele Frequency",**font_args)
|
|
128
|
+
to_plot = pd.melt(sumstats,id_vars=['ID'], value_vars=[eaf,raf], var_name='Types', value_name='Allele Frequency').dropna()
|
|
128
129
|
|
|
130
|
+
sns.histplot(data=to_plot, x="Allele Frequency",
|
|
131
|
+
hue="Types", fill=True,
|
|
132
|
+
ax=ax2, legend = legend2,
|
|
133
|
+
**histplot_args)
|
|
134
|
+
|
|
135
|
+
ax2.set_xlabel("Allele Frequency",**font_args)
|
|
129
136
|
|
|
130
137
|
plt.tight_layout()
|
|
131
138
|
save_figure(fig, save, keyword="afc",save_args=save_args, log=log, verbose=verbose)
|
|
132
|
-
|
|
133
|
-
#if save:
|
|
134
|
-
# if verbose: log.write("Saving plot:")
|
|
135
|
-
# if save==True:
|
|
136
|
-
# fig.savefig("./allele_frequency_comparison.png",bbox_inches="tight",**save_args)
|
|
137
|
-
# log.write(" -Saved to "+ "./allele_frequency_comparison.png" + " successfully!" )
|
|
138
|
-
# else:
|
|
139
|
-
# fig.savefig(save,bbox_inches="tight",**save_args)
|
|
140
|
-
# log.write(" -Saved to "+ save + " successfully!" )
|
|
141
139
|
sumstats = sumstats.drop(columns="ID")
|
|
140
|
+
|
|
142
141
|
return fig, sumstats[is_outliers].copy()
|
|
143
142
|
|