gwaslab 3.4.49__py3-none-any.whl → 3.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/bd_get_hapmap3.py +3 -1
- gwaslab/g_Sumstats.py +22 -10
- gwaslab/g_version.py +2 -2
- gwaslab/io_process_args.py +23 -0
- gwaslab/qc_fix_sumstats.py +9 -5
- gwaslab/util_in_get_sig.py +18 -2
- gwaslab/vis_plot_credible sets.py +0 -0
- gwaslab/viz_aux_annotate_plot.py +83 -242
- gwaslab/viz_aux_property.py +2 -0
- gwaslab/viz_aux_quickfix.py +9 -2
- gwaslab/viz_aux_save_figure.py +2 -1
- gwaslab/viz_plot_compare_effect.py +870 -523
- gwaslab/viz_plot_mqqplot.py +74 -21
- gwaslab/viz_plot_phe_heatmap.py +260 -0
- gwaslab/viz_plot_stackedregional.py +1 -1
- {gwaslab-3.4.49.dist-info → gwaslab-3.5.1.dist-info}/METADATA +3 -3
- {gwaslab-3.4.49.dist-info → gwaslab-3.5.1.dist-info}/RECORD +21 -17
- {gwaslab-3.4.49.dist-info → gwaslab-3.5.1.dist-info}/WHEEL +1 -1
- {gwaslab-3.4.49.dist-info → gwaslab-3.5.1.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.49.dist-info → gwaslab-3.5.1.dist-info}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.4.49.dist-info → gwaslab-3.5.1.dist-info}/top_level.txt +0 -0
|
@@ -4,16 +4,19 @@ import matplotlib.pyplot as plt
|
|
|
4
4
|
import scipy.stats as ss
|
|
5
5
|
import seaborn as sns
|
|
6
6
|
import gc
|
|
7
|
+
import math
|
|
7
8
|
import scipy.stats as ss
|
|
8
9
|
from matplotlib.patches import Rectangle
|
|
9
10
|
from adjustText import adjust_text
|
|
10
11
|
from gwaslab.viz_aux_save_figure import save_figure
|
|
11
12
|
from gwaslab.util_in_get_sig import getsig
|
|
13
|
+
from gwaslab.util_in_get_sig import annogene
|
|
12
14
|
from gwaslab.g_Log import Log
|
|
13
15
|
from gwaslab.util_in_correct_winnerscurse import wc_correct
|
|
14
16
|
from gwaslab.util_in_correct_winnerscurse import wc_correct_test
|
|
15
17
|
from gwaslab.g_Sumstats import Sumstats
|
|
16
|
-
|
|
18
|
+
from gwaslab.io_process_args import _merge_and_sync_dic
|
|
19
|
+
from gwaslab.io_process_args import _extract_kwargs
|
|
17
20
|
#20220422
|
|
18
21
|
def compare_effect(path1,
|
|
19
22
|
path2,
|
|
@@ -30,6 +33,7 @@ def compare_effect(path1,
|
|
|
30
33
|
anno_min1=0,
|
|
31
34
|
anno_min2=0,
|
|
32
35
|
anno_diff=0,
|
|
36
|
+
anno_args=None,
|
|
33
37
|
scaled=False,
|
|
34
38
|
scaled1=False,
|
|
35
39
|
scaled2=False,
|
|
@@ -58,29 +62,41 @@ def compare_effect(path1,
|
|
|
58
62
|
plt_args=None,
|
|
59
63
|
xylabel_prefix="Per-allele effect size in ",
|
|
60
64
|
helper_line_args=None,
|
|
65
|
+
adjust_text_kwargs = None,
|
|
66
|
+
adjust_text_kwargs_l = None,
|
|
67
|
+
adjust_text_kwargs_r = None,
|
|
68
|
+
font_args=None,
|
|
61
69
|
fontargs=None,
|
|
70
|
+
build="19",
|
|
62
71
|
r_or_r2="r",
|
|
63
|
-
#
|
|
64
72
|
errargs=None,
|
|
65
73
|
legend_args=None,
|
|
66
74
|
sep=["\t","\t"],
|
|
67
75
|
log = Log(),
|
|
68
76
|
save=False,
|
|
69
77
|
save_args=None,
|
|
70
|
-
verbose=False
|
|
71
|
-
|
|
78
|
+
verbose=False,
|
|
79
|
+
**kwargs):
|
|
80
|
+
|
|
72
81
|
#[snpid,p,ea,nea] ,[effect,se]
|
|
73
82
|
#[snpid,p,ea,nea,chr,pos],[effect,se]
|
|
74
83
|
#[snpid,p,ea,nea,chr,pos],[OR,OR_l,OR_h]
|
|
75
84
|
if scaled == True:
|
|
76
85
|
scaled1 = True
|
|
77
86
|
scaled2 = True
|
|
87
|
+
|
|
88
|
+
if legend_title== r'$ P < 5 x 10^{-8}$ in:' and sig_level!=5e-8:
|
|
89
|
+
|
|
90
|
+
exponent = math.floor(math.log10(sig_level))
|
|
91
|
+
mantissa = sig_level / 10**exponent
|
|
92
|
+
|
|
93
|
+
legend_title = '$ P < {} x 10^{{{}}}$ in:'.format(mantissa, exponent)
|
|
94
|
+
|
|
78
95
|
if is_q_mc=="fdr" or is_q_mc=="bon":
|
|
79
96
|
is_q = True
|
|
80
|
-
|
|
81
97
|
if is_q == True:
|
|
82
98
|
if is_q_mc not in [False,"fdr","bon","non"]:
|
|
83
|
-
raise ValueError(
|
|
99
|
+
raise ValueError('Please select either "fdr" or "bon" or "non"/False for is_q_mc.')
|
|
84
100
|
if save_args is None:
|
|
85
101
|
save_args = {"dpi":300,"facecolor":"white"}
|
|
86
102
|
if reg_box is None:
|
|
@@ -89,6 +105,10 @@ def compare_effect(path1,
|
|
|
89
105
|
sep = ["\t","\t"]
|
|
90
106
|
if get_lead_args is None:
|
|
91
107
|
get_lead_args = {}
|
|
108
|
+
if anno=="GENENAME":
|
|
109
|
+
get_lead_args["anno"]=True
|
|
110
|
+
if anno_args is None:
|
|
111
|
+
anno_args = {}
|
|
92
112
|
if errargs is None:
|
|
93
113
|
errargs={"ecolor":"#cccccc","elinewidth":1}
|
|
94
114
|
if fontargs is None:
|
|
@@ -103,101 +123,90 @@ def compare_effect(path1,
|
|
|
103
123
|
label = ["Sumstats_1","Sumstats_2","Both","None"]
|
|
104
124
|
if anno_het ==True:
|
|
105
125
|
is_q=True
|
|
106
|
-
|
|
107
|
-
log.write("Start to process the raw sumstats for plotting...")
|
|
108
|
-
|
|
109
|
-
######### 1 check the value used to plot
|
|
110
|
-
if mode not in ["Beta","beta","BETA","OR","or"]:
|
|
111
|
-
raise ValueError("Please input Beta or OR")
|
|
112
126
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
if cols_name_list_1 is None:
|
|
116
|
-
cols_name_list_1 = ["SNPID","P","EA","NEA","CHR","POS"]
|
|
117
|
-
if effect_cols_list_1 is None:
|
|
118
|
-
if mode=="beta":
|
|
119
|
-
effect_cols_list_1 = ["BETA","SE"]
|
|
120
|
-
else:
|
|
121
|
-
effect_cols_list_1 = ["OR","OR_95L","OR_95U"]
|
|
122
|
-
elif type(path1) is pd.DataFrame:
|
|
123
|
-
log.write("Path1 is pandas DataFrame object...")
|
|
127
|
+
adjust_text_kwargs_r_default = {"autoalign":False,"precision":0.001,"lim":1000,"ha":"left","va":"top","expand_text":(1,1.8),"expand_objects":(0.1,0.1),"expand_points":(1.8,1.8),"force_objects":(0.8,0.8),"arrowprops":dict(arrowstyle='-|>', color='grey')}
|
|
128
|
+
adjust_text_kwargs_l_default = {"autoalign":False,"precision":0.001,"lim":1000,"ha":"right","va":"bottom","expand_text":(1,1.8),"expand_objects":(0.1,0.1),"expand_points":(1.8,1.8),"force_objects":(0.8,0.8),"arrowprops":dict(arrowstyle='-|>', color='grey')}
|
|
124
129
|
|
|
125
|
-
if
|
|
126
|
-
|
|
127
|
-
if cols_name_list_2 is None:
|
|
128
|
-
cols_name_list_2 = ["SNPID","P","EA","NEA","CHR","POS"]
|
|
129
|
-
if effect_cols_list_2 is None:
|
|
130
|
-
if mode=="beta":
|
|
131
|
-
effect_cols_list_2 = ["BETA","SE"]
|
|
132
|
-
else:
|
|
133
|
-
effect_cols_list_2 = ["OR","OR_95L","OR_95U"]
|
|
134
|
-
elif type(path2) is pd.DataFrame:
|
|
135
|
-
log.write("Path2 is pandas DataFrame object...")
|
|
136
|
-
|
|
137
|
-
######### 2 extract snplist2
|
|
138
|
-
log.write(" -Loading "+label[1]+" SNP list in memory...")
|
|
139
|
-
|
|
140
|
-
if type(path2) is Sumstats:
|
|
141
|
-
sumstats = path2.data[[cols_name_list_2[0]]].copy()
|
|
142
|
-
elif type(path2) is pd.DataFrame:
|
|
143
|
-
sumstats = path2[[cols_name_list_2[0]]].copy()
|
|
130
|
+
if adjust_text_kwargs_l is None:
|
|
131
|
+
adjust_text_kwargs_l = adjust_text_kwargs_l_default
|
|
144
132
|
else:
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1]]
|
|
133
|
+
for key, value in adjust_text_kwargs_l_default.items():
|
|
134
|
+
if key not in adjust_text_kwargs_l:
|
|
135
|
+
adjust_text_kwargs_l[key] = value
|
|
136
|
+
|
|
137
|
+
if adjust_text_kwargs_r is None:
|
|
138
|
+
adjust_text_kwargs_r = adjust_text_kwargs_r_default
|
|
152
139
|
else:
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
log.write(" -Loading sumstats for "+label[0]+":",",".join(cols_to_extract))
|
|
140
|
+
for key, value in adjust_text_kwargs_r_default.items():
|
|
141
|
+
if key not in adjust_text_kwargs_r:
|
|
142
|
+
adjust_text_kwargs_r[key] = value
|
|
157
143
|
|
|
158
|
-
if
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
144
|
+
if adjust_text_kwargs is not None:
|
|
145
|
+
for key, value in adjust_text_kwargs.items():
|
|
146
|
+
adjust_text_kwargs_l[key] = value
|
|
147
|
+
adjust_text_kwargs_r[key] = value
|
|
162
148
|
else:
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
gc.collect()
|
|
149
|
+
adjust_text_kwargs = {}
|
|
166
150
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
151
|
+
|
|
152
|
+
save_kwargs = _extract_kwargs("save", save_args, locals())
|
|
153
|
+
anno_kwargs = _extract_kwargs("anno", anno_args, locals())
|
|
154
|
+
err_kwargs = _extract_kwargs("err", errargs, locals())
|
|
155
|
+
plt_kwargs = _extract_kwargs("plt", plt_args, locals())
|
|
156
|
+
scatter_kwargs = _extract_kwargs("scatter", scatterargs, locals())
|
|
157
|
+
font_kwargs = _extract_kwargs("font",fontargs, locals())
|
|
158
|
+
|
|
159
|
+
log.write("Start to process the raw sumstats for plotting...", verbose=verbose)
|
|
172
160
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
161
|
+
# configure headers
|
|
162
|
+
cols_name_list_1,cols_name_list_2, effect_cols_list_1, effect_cols_list_2 = configure_headers(mode,
|
|
163
|
+
path1,
|
|
164
|
+
path2,
|
|
165
|
+
cols_name_list_1,
|
|
166
|
+
cols_name_list_2,
|
|
167
|
+
effect_cols_list_1,
|
|
168
|
+
effect_cols_list_2,
|
|
169
|
+
scaled1,
|
|
170
|
+
scaled2,
|
|
171
|
+
log,
|
|
172
|
+
verbose)
|
|
177
173
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
174
|
+
# extract common variants / load sumstats 1
|
|
175
|
+
sumstats, common_snp_set = configure_common_snp_set(path1,path2,
|
|
176
|
+
snplist,
|
|
177
|
+
label,
|
|
178
|
+
cols_name_list_1,
|
|
179
|
+
cols_name_list_2,
|
|
180
|
+
sep,
|
|
181
|
+
scaled1,
|
|
182
|
+
scaled2,
|
|
183
|
+
log,verbose)
|
|
181
184
|
|
|
182
|
-
sumstats
|
|
185
|
+
# rename sumstats headers -> keywords in gwaslab
|
|
186
|
+
sumstats = rename_sumtats(sumstats=sumstats,
|
|
187
|
+
cols_name_list = cols_name_list_1,
|
|
188
|
+
scaled=scaled1,
|
|
189
|
+
snplist=snplist)
|
|
183
190
|
|
|
184
|
-
|
|
191
|
+
# exctract only available variants from sumstats1
|
|
185
192
|
sumstats = sumstats.loc[sumstats["SNPID"].isin(common_snp_set),:]
|
|
193
|
+
log.write(" -Using only variants available for both datasets...", verbose=verbose)
|
|
186
194
|
|
|
187
|
-
log.write(" -Using only variants available for both datasets...")
|
|
188
195
|
######### 8 extact SNPs for comparison
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
196
|
+
sig_list_1 = extract_snp_for_comparison(sumstats,
|
|
197
|
+
snplist,
|
|
198
|
+
label=label[0],
|
|
199
|
+
get_lead_args=get_lead_args,
|
|
200
|
+
build=build,
|
|
201
|
+
drop=drop,
|
|
202
|
+
anno=anno,
|
|
203
|
+
sig_level=sig_level,
|
|
204
|
+
scaled = scaled1,
|
|
205
|
+
log = log,
|
|
206
|
+
verbose = verbose)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
######### load sumstats1
|
|
201
210
|
|
|
202
211
|
######### 9 extract snplist2
|
|
203
212
|
if snplist is not None:
|
|
@@ -205,158 +214,97 @@ def compare_effect(path1,
|
|
|
205
214
|
else:
|
|
206
215
|
cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1],cols_name_list_2[4],cols_name_list_2[5]]
|
|
207
216
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
else:
|
|
215
|
-
sumstats = pd.read_table(path2,sep=sep[1],usecols=cols_to_extract)
|
|
216
|
-
|
|
217
|
+
sumstats = load_sumstats(path=path2,
|
|
218
|
+
usecols=cols_to_extract,
|
|
219
|
+
label=label[1],
|
|
220
|
+
log=log,
|
|
221
|
+
verbose= verbose,
|
|
222
|
+
sep=sep[1])
|
|
217
223
|
gc.collect()
|
|
218
224
|
|
|
219
|
-
if scaled2==True:
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
rename_dict[cols_name_list_2[4]]="CHR"
|
|
227
|
-
rename_dict[cols_name_list_2[5]]="POS"
|
|
228
|
-
sumstats.rename(columns=rename_dict,inplace=True)
|
|
229
|
-
|
|
225
|
+
#if scaled2==True:
|
|
226
|
+
# sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
|
|
227
|
+
|
|
228
|
+
sumstats = rename_sumtats(sumstats=sumstats,
|
|
229
|
+
cols_name_list = cols_name_list_2,
|
|
230
|
+
scaled=scaled2,
|
|
231
|
+
snplist=snplist)
|
|
230
232
|
######### 11 exctract only overlapping variants from sumstats2
|
|
231
233
|
sumstats = sumstats.loc[sumstats["SNPID"].isin(common_snp_set),:]
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
if drop==True:
|
|
244
|
-
sig_list_2 = drop_duplicate_and_na(sig_list_2, sort_by="P", log=log ,verbose=verbose)
|
|
234
|
+
sig_list_2 = extract_snp_for_comparison(sumstats,
|
|
235
|
+
snplist,
|
|
236
|
+
label=label[1],
|
|
237
|
+
get_lead_args=get_lead_args,
|
|
238
|
+
build=build,
|
|
239
|
+
drop=drop,
|
|
240
|
+
anno=anno,
|
|
241
|
+
sig_level=sig_level,
|
|
242
|
+
scaled = scaled2,
|
|
243
|
+
log = log,
|
|
244
|
+
verbose = verbose)
|
|
245
245
|
|
|
246
246
|
######### 13 Merge two list using SNPID
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
#1 rs6704312 0.652104 0.143750
|
|
247
|
+
sig_list_merged = merge_list(sig_list_1,
|
|
248
|
+
sig_list_2,
|
|
249
|
+
anno = anno,
|
|
250
|
+
labels=label,
|
|
251
|
+
log=log,
|
|
252
|
+
verbose=verbose)
|
|
254
253
|
|
|
255
254
|
###############################################################################
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
if len(eaf)>0: cols_to_extract.append(eaf[0])
|
|
268
|
-
log.write(" -Extract statistics of selected variants from "+label[0]+" : ",",".join(cols_to_extract) )
|
|
269
|
-
|
|
270
|
-
if type(path1) is Sumstats:
|
|
271
|
-
sumstats = path1.data[cols_to_extract].copy()
|
|
272
|
-
elif type(path1) is pd.DataFrame:
|
|
273
|
-
sumstats = path1[cols_to_extract].copy()
|
|
274
|
-
else:
|
|
275
|
-
sumstats = pd.read_table(path1,sep=sep[0],usecols=cols_to_extract)
|
|
276
|
-
|
|
277
|
-
if scaled1==True:
|
|
278
|
-
sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
|
|
279
|
-
|
|
280
|
-
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
281
|
-
rename_dict = { cols_name_list_1[0]:"SNPID",
|
|
282
|
-
cols_name_list_1[1]:"P_1",
|
|
283
|
-
cols_name_list_1[2]:"EA_1",
|
|
284
|
-
cols_name_list_1[3]:"NEA_1",
|
|
285
|
-
effect_cols_list_1[0]:"EFFECT_1",
|
|
286
|
-
effect_cols_list_1[1]:"SE_1",
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
else:
|
|
290
|
-
# if or
|
|
291
|
-
rename_dict = { cols_name_list_1[0]:"SNPID",
|
|
292
|
-
cols_name_list_1[1]:"P_1",
|
|
293
|
-
cols_name_list_1[2]:"EA_1",
|
|
294
|
-
cols_name_list_1[3]:"NEA_1",
|
|
295
|
-
effect_cols_list_1[0]:"OR_1",
|
|
296
|
-
effect_cols_list_1[1]:"OR_L_1",
|
|
297
|
-
effect_cols_list_1[2]:"OR_H_1"
|
|
298
|
-
}
|
|
299
|
-
## check if eaf column is provided.
|
|
300
|
-
if len(eaf)>0: rename_dict[eaf[0]]="EAF_1"
|
|
301
|
-
sumstats.rename(columns=rename_dict, inplace=True)
|
|
255
|
+
cols_to_extract = configure_cols_to_extract(mode=mode,
|
|
256
|
+
cols_name_list = cols_name_list_1,
|
|
257
|
+
effect_cols_list= effect_cols_list_1,
|
|
258
|
+
eaf = eaf)
|
|
259
|
+
sumstats = load_sumstats(path=path1,
|
|
260
|
+
usecols=cols_to_extract,
|
|
261
|
+
label=label[0],
|
|
262
|
+
log=log,
|
|
263
|
+
verbose= verbose,
|
|
264
|
+
sep=sep[0])
|
|
302
265
|
|
|
303
|
-
#
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
266
|
+
#if scaled1==True:
|
|
267
|
+
# sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
|
|
268
|
+
sumstats = rename_sumstats_full(mode, sumstats,
|
|
269
|
+
index=1,
|
|
270
|
+
cols_name_list = cols_name_list_1,
|
|
271
|
+
effect_cols_list = effect_cols_list_1,
|
|
272
|
+
eaf = eaf,
|
|
273
|
+
drop = drop,
|
|
274
|
+
scaled=scaled1,
|
|
275
|
+
log=log, verbose=verbose)
|
|
307
276
|
|
|
308
277
|
log.write(" -Merging "+label[0]+" effect information...", verbose=verbose)
|
|
309
|
-
|
|
310
278
|
sig_list_merged = pd.merge(sig_list_merged,sumstats,
|
|
311
279
|
left_on="SNPID",right_on="SNPID",
|
|
312
280
|
how="left")
|
|
313
281
|
|
|
314
282
|
############ 15 merging sumstats2
|
|
283
|
+
cols_to_extract = configure_cols_to_extract(mode=mode,
|
|
284
|
+
cols_name_list = cols_name_list_2,
|
|
285
|
+
effect_cols_list= effect_cols_list_2,
|
|
286
|
+
eaf = eaf)
|
|
315
287
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
if len(eaf)>0: cols_to_extract.append(eaf[1])
|
|
323
|
-
|
|
324
|
-
log.write(" -Extract statistics of selected variants from "+label[1]+" : ",",".join(cols_to_extract), verbose=verbose )
|
|
325
|
-
if type(path2) is Sumstats:
|
|
326
|
-
sumstats = path2.data[cols_to_extract].copy()
|
|
327
|
-
elif type(path2) is pd.DataFrame:
|
|
328
|
-
sumstats = path2[cols_to_extract].copy()
|
|
329
|
-
else:
|
|
330
|
-
sumstats = pd.read_table(path2,sep=sep[1],usecols=cols_to_extract)
|
|
288
|
+
sumstats = load_sumstats(path=path2,
|
|
289
|
+
usecols=cols_to_extract,
|
|
290
|
+
label=label[1],
|
|
291
|
+
log=log,
|
|
292
|
+
verbose= verbose,
|
|
293
|
+
sep=sep[1])
|
|
331
294
|
|
|
332
|
-
if scaled2==True:
|
|
333
|
-
|
|
295
|
+
#if scaled2==True:
|
|
296
|
+
# sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
|
|
334
297
|
|
|
335
298
|
gc.collect()
|
|
336
299
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
else:
|
|
346
|
-
rename_dict = { cols_name_list_2[0]:"SNPID",
|
|
347
|
-
cols_name_list_2[1]:"P_2",
|
|
348
|
-
cols_name_list_2[2]:"EA_2",
|
|
349
|
-
cols_name_list_2[3]:"NEA_2",
|
|
350
|
-
effect_cols_list_2[0]:"OR_2",
|
|
351
|
-
effect_cols_list_2[1]:"OR_L_2",
|
|
352
|
-
effect_cols_list_2[2]:"OR_H_2"
|
|
353
|
-
}
|
|
354
|
-
if len(eaf)>0: rename_dict[eaf[1]]="EAF_2"
|
|
355
|
-
sumstats.rename(columns=rename_dict, inplace=True)
|
|
356
|
-
# drop na and duplicate
|
|
357
|
-
if drop==True:
|
|
358
|
-
sumstats = drop_duplicate_and_na(sumstats, sort_by="P_2", log=log, verbose=verbose)
|
|
359
|
-
sumstats.drop("P_2",axis=1,inplace=True)
|
|
300
|
+
sumstats = rename_sumstats_full(mode, sumstats,
|
|
301
|
+
index=2,
|
|
302
|
+
cols_name_list = cols_name_list_2,
|
|
303
|
+
effect_cols_list = effect_cols_list_2,
|
|
304
|
+
eaf = eaf,
|
|
305
|
+
drop = drop,
|
|
306
|
+
scaled=scaled2,
|
|
307
|
+
log=log, verbose=verbose)
|
|
360
308
|
|
|
361
309
|
log.write(" -Merging "+label[1]+" effect information...", verbose=verbose)
|
|
362
310
|
sig_list_merged = pd.merge(sig_list_merged,sumstats,
|
|
@@ -366,130 +314,49 @@ def compare_effect(path1,
|
|
|
366
314
|
sig_list_merged.set_index("SNPID",inplace=True)
|
|
367
315
|
|
|
368
316
|
################ 16 update sumstats1
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
},
|
|
382
|
-
inplace=True)
|
|
383
|
-
# drop na and duplicate
|
|
384
|
-
if drop==True:
|
|
385
|
-
sumstats = drop_duplicate_and_na(sumstats, sort_by="P_1", log=log, verbose=verbose)
|
|
386
|
-
|
|
387
|
-
sumstats.set_index("SNPID",inplace=True)
|
|
388
|
-
sig_list_merged.update(sumstats)
|
|
317
|
+
|
|
318
|
+
sig_list_merged = update_stats(sig_list_merged = sig_list_merged,
|
|
319
|
+
path = path1,
|
|
320
|
+
cols_name_list = cols_name_list_1,
|
|
321
|
+
index=1,
|
|
322
|
+
sep=sep[0],
|
|
323
|
+
snplist = snplist,
|
|
324
|
+
label=label[0],
|
|
325
|
+
drop = drop,
|
|
326
|
+
scaled=scaled1,
|
|
327
|
+
log=log,
|
|
328
|
+
verbose = verbose)
|
|
389
329
|
|
|
390
330
|
################# 17 update sumstats2
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
331
|
+
sig_list_merged = update_stats(sig_list_merged = sig_list_merged,
|
|
332
|
+
path = path2,
|
|
333
|
+
cols_name_list = cols_name_list_2,
|
|
334
|
+
index=2,
|
|
335
|
+
sep=sep[1],
|
|
336
|
+
snplist = snplist,
|
|
337
|
+
label=label[1],
|
|
338
|
+
drop = drop,
|
|
339
|
+
scaled=scaled2,
|
|
340
|
+
log=log,
|
|
341
|
+
verbose = verbose)
|
|
398
342
|
|
|
399
|
-
if
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
inplace=True)
|
|
406
|
-
# drop na and duplicate
|
|
407
|
-
if drop==True:
|
|
408
|
-
sumstats = drop_duplicate_and_na(sumstats, sort_by="P_2", log=log, verbose=verbose)
|
|
343
|
+
#if scaled1 ==True :
|
|
344
|
+
# log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
|
|
345
|
+
# sig_list_merged["P_1"] = np.power(10,-sig_list_merged["P_1"])
|
|
346
|
+
#if scaled2 ==True :
|
|
347
|
+
# log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
|
|
348
|
+
# sig_list_merged["P_2"] = np.power(10,-sig_list_merged["P_2"])
|
|
409
349
|
|
|
410
|
-
sumstats.set_index("SNPID",inplace=True)
|
|
411
|
-
sig_list_merged.update(sumstats)
|
|
412
|
-
|
|
413
|
-
if scaled1 ==True :
|
|
414
|
-
log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
|
|
415
|
-
sig_list_merged["P_1"] = np.power(10,-sig_list_merged["P_1"])
|
|
416
|
-
if scaled2 ==True :
|
|
417
|
-
log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
|
|
418
|
-
sig_list_merged["P_2"] = np.power(10,-sig_list_merged["P_2"])
|
|
419
|
-
####
|
|
420
350
|
#################################################################################
|
|
421
|
-
|
|
422
|
-
log.write(" -Assigning indicator ...", verbose=verbose)
|
|
423
|
-
# 0-> 0
|
|
424
|
-
# 1 -> sig in sumstats1
|
|
425
|
-
# 2 -> sig in sumsatts2
|
|
426
|
-
# 3-> sig in both sumstats1 + sumstats2
|
|
427
|
-
sig_list_merged["indicator"] = 0
|
|
428
|
-
sig_list_merged.loc[sig_list_merged["P_1"]<sig_level,"indicator"]=1+sig_list_merged.loc[sig_list_merged["P_1"]<sig_level,"indicator"]
|
|
429
|
-
sig_list_merged.loc[sig_list_merged["P_2"]<sig_level,"indicator"]=2+sig_list_merged.loc[sig_list_merged["P_2"]<sig_level,"indicator"]
|
|
430
|
-
|
|
431
|
-
if snplist is None:
|
|
432
|
-
sig_list_merged["CHR"]=np.max(sig_list_merged[["CHR_1","CHR_2"]], axis=1).astype(int)
|
|
433
|
-
sig_list_merged["POS"]=np.max(sig_list_merged[["POS_1","POS_2"]], axis=1).astype(int)
|
|
434
|
-
sig_list_merged.drop(labels=['CHR_1', 'CHR_2','POS_1', 'POS_2'], axis=1,inplace=True)
|
|
351
|
+
sig_list_merged = assign_indicator(sig_list_merged, snplist, sig_level, scaled1, scaled2, log, verbose)
|
|
435
352
|
|
|
436
|
-
|
|
437
|
-
############### 19 align allele effect with sumstats 1
|
|
438
|
-
sig_list_merged["EA_1"]=sig_list_merged["EA_1"].astype("string")
|
|
439
|
-
sig_list_merged["EA_2"]=sig_list_merged["EA_2"].astype("string")
|
|
440
|
-
sig_list_merged["NEA_1"]=sig_list_merged["NEA_1"].astype("string")
|
|
441
|
-
sig_list_merged["NEA_2"]=sig_list_merged["NEA_2"].astype("string")
|
|
442
|
-
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
443
|
-
# copy raw
|
|
444
|
-
sig_list_merged["EA_2_aligned"]=sig_list_merged["EA_2"]
|
|
445
|
-
sig_list_merged["NEA_2_aligned"]=sig_list_merged["NEA_2"]
|
|
446
|
-
sig_list_merged["EFFECT_2_aligned"]=sig_list_merged["EFFECT_2"]
|
|
447
|
-
|
|
448
|
-
#filp ea/nea and beta for sumstats2
|
|
449
|
-
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EA_2_aligned"]= sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"NEA_2"]
|
|
450
|
-
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"NEA_2_aligned"]= sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EA_2"]
|
|
451
|
-
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EFFECT_2_aligned"]= -sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EFFECT_2"]
|
|
452
|
-
else:
|
|
453
|
-
#flip for OR or - +
|
|
353
|
+
sig_list_merged = align_alleles(sig_list_merged, label, mode, eaf, log, verbose)
|
|
454
354
|
|
|
455
|
-
|
|
456
|
-
sig_list_merged["NEA_2_aligned"]=sig_list_merged["NEA_2"]
|
|
457
|
-
sig_list_merged["OR_2_aligned"]=sig_list_merged["OR_2"]
|
|
458
|
-
sig_list_merged["OR_L_2_aligned"]=sig_list_merged["OR_L_2"]
|
|
459
|
-
sig_list_merged["OR_H_2_aligned"]=sig_list_merged["OR_H_2"]
|
|
355
|
+
sig_list_merged = check_allele_match(sig_list_merged, allele_match, label, log,verbose)
|
|
460
356
|
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_H_2_aligned"]= 1/sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_L_2"]
|
|
465
|
-
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_L_2_aligned"]= 1/sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_H_2"]
|
|
466
|
-
|
|
467
|
-
sig_list_merged["BETA_1"]=np.log(sig_list_merged["OR_1"])
|
|
468
|
-
sig_list_merged["BETA_2_aligned"]=np.log(sig_list_merged["OR_2_aligned"])
|
|
469
|
-
sig_list_merged["SE_1"]=(np.log(sig_list_merged["OR_H_1"]) - np.log(sig_list_merged["OR_1"]))/ss.norm.ppf(0.975)
|
|
470
|
-
sig_list_merged["SE_2"]=(np.log(sig_list_merged["OR_H_2_aligned"]) - np.log(sig_list_merged["OR_2_aligned"]))/ss.norm.ppf(0.975)
|
|
471
|
-
|
|
472
|
-
sig_list_merged["OR_L_1_err"]=np.abs(sig_list_merged["OR_L_1"]-sig_list_merged["OR_1"])
|
|
473
|
-
sig_list_merged["OR_H_1_err"]=np.abs(sig_list_merged["OR_H_1"]-sig_list_merged["OR_1"])
|
|
474
|
-
sig_list_merged["OR_L_2_aligned_err"]=np.abs(sig_list_merged["OR_L_2_aligned"]-sig_list_merged["OR_2_aligned"])
|
|
475
|
-
sig_list_merged["OR_H_2_aligned_err"]=np.abs(sig_list_merged["OR_H_2_aligned"]-sig_list_merged["OR_2_aligned"])
|
|
476
|
-
|
|
477
|
-
if len(eaf)>0:
|
|
478
|
-
# flip eaf
|
|
479
|
-
sig_list_merged["EAF_2_aligned"]=sig_list_merged["EAF_2"]
|
|
480
|
-
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EAF_2_aligned"]= 1 -sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EAF_2"]
|
|
481
|
-
|
|
482
|
-
# checking effect allele matching
|
|
483
|
-
nonmatch = np.nansum(sig_list_merged["EA_1"] != sig_list_merged["EA_2_aligned"])
|
|
484
|
-
log.write(" -Aligned all EAs in {} with EAs in {} ...".format(label[1],label[0]), verbose=verbose)
|
|
485
|
-
if nonmatch>0:
|
|
486
|
-
log.warning("Alleles for {} variants do not match...".format(nonmatch))
|
|
487
|
-
if allele_match==True:
|
|
488
|
-
if nonmatch>0:
|
|
489
|
-
sig_list_merged = sig_list_merged.loc[sig_list_merged["EA_1"] == sig_list_merged["EA_2_aligned"]]
|
|
490
|
-
else:
|
|
491
|
-
log.write(" -No variants with EA not matching...", verbose=verbose)
|
|
492
|
-
if fdr==True:
|
|
357
|
+
sig_list_merged = filter_by_maf(sig_list_merged, eaf, maf_level, log, verbose)
|
|
358
|
+
|
|
359
|
+
if fdr==True and scaled==False:
|
|
493
360
|
log.write(" -Using FDR...", verbose=verbose)
|
|
494
361
|
#sig_list_merged["P_1"] = fdrcorrection(sig_list_merged["P_1"])[1]
|
|
495
362
|
#sig_list_merged["P_2"] = fdrcorrection(sig_list_merged["P_2"])[1]
|
|
@@ -498,64 +365,28 @@ def compare_effect(path1,
|
|
|
498
365
|
|
|
499
366
|
####################################################################################################################################
|
|
500
367
|
## winner's curse correction using aligned beta
|
|
501
|
-
|
|
502
|
-
if wc_correction == "all":
|
|
503
|
-
log.write(" -Correcting BETA for winner's curse with threshold at {} for all variants...".format(sig_level), verbose=verbose)
|
|
504
|
-
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
505
|
-
sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
|
|
506
|
-
|
|
507
|
-
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(~sig_list_merged["EFFECT_1"].isna())), verbose=verbose)
|
|
508
|
-
sig_list_merged["EFFECT_1"] = sig_list_merged[["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
|
|
509
|
-
|
|
510
|
-
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(~sig_list_merged["EFFECT_2_aligned"].isna())), verbose=verbose)
|
|
511
|
-
sig_list_merged["EFFECT_2_aligned"] = sig_list_merged[["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
|
|
512
|
-
|
|
513
|
-
elif wc_correction == "sig" :
|
|
514
|
-
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants...".format(sig_level), verbose=verbose)
|
|
515
|
-
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
516
|
-
sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
|
|
517
|
-
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(sig_list_merged["P_1"]<sig_level)), verbose=verbose)
|
|
518
|
-
sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, "EFFECT_1"] = sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
519
|
-
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(sig_list_merged["P_2"]<sig_level)), verbose=verbose)
|
|
520
|
-
sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, "EFFECT_2_aligned"] = sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
521
|
-
|
|
522
|
-
elif wc_correction == "sumstats1" :
|
|
523
|
-
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats1...".format(sig_level), verbose=verbose)
|
|
524
|
-
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
525
|
-
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(sig_list_merged["P_1"]<sig_level)), verbose=verbose)
|
|
526
|
-
sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, "EFFECT_1"] = sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
527
|
-
|
|
528
|
-
elif wc_correction == "sumstats2" :
|
|
529
|
-
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats2...".format(sig_level), verbose=verbose)
|
|
530
|
-
sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
|
|
531
|
-
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(sig_list_merged["P_2"]<sig_level)), verbose=verbose)
|
|
532
|
-
sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, "EFFECT_2_aligned"] = sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
368
|
+
sig_list_merged = winnerscurse_correction(sig_list_merged, mode, wc_correction, sig_level,scaled1, scaled2, log, verbose)
|
|
533
369
|
|
|
534
370
|
########################## Het test############################################################
|
|
535
371
|
## heterogeneity test
|
|
536
|
-
if (is_q
|
|
372
|
+
if (is_q == True):
|
|
537
373
|
log.write(" -Calculating Cochran's Q statistics and peform chisq test...", verbose=verbose)
|
|
538
374
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
539
375
|
sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
|
|
540
376
|
else:
|
|
541
377
|
sig_list_merged = test_q(sig_list_merged,"BETA_1","SE_1","BETA_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
|
|
542
378
|
|
|
379
|
+
# heterogeneity summary
|
|
380
|
+
log.write(" -Significant het:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:]), verbose=verbose)
|
|
381
|
+
log.write(" -All sig:" ,len(sig_list_merged), verbose=verbose)
|
|
382
|
+
log.write(" -Het rate:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:])/len(sig_list_merged), verbose=verbose)
|
|
383
|
+
|
|
543
384
|
######################### save ###############################################################
|
|
544
385
|
## save the merged data
|
|
545
386
|
save_path = label[0]+"_"+label[1]+"_beta_sig_list_merged.tsv"
|
|
546
387
|
log.write(" -Saving the merged data to:",save_path, verbose=verbose)
|
|
547
|
-
sig_list_merged
|
|
548
|
-
|
|
549
|
-
########################## maf_threshold#############################################################
|
|
550
|
-
if (len(eaf)>0) and (maf_level is not None):
|
|
551
|
-
both_eaf_clear = (sig_list_merged["EAF_1"]>maf_level)&(sig_list_merged["EAF_1"]<1-maf_level)&(sig_list_merged["EAF_2"]>maf_level)&(sig_list_merged["EAF_2"]<1-maf_level)
|
|
552
|
-
log.write(" -Exclude "+str(len(sig_list_merged) -sum(both_eaf_clear))+ " variants with maf <",maf_level, verbose=verbose)
|
|
553
|
-
sig_list_merged = sig_list_merged.loc[both_eaf_clear,:]
|
|
554
|
-
# heterogeneity summary
|
|
555
|
-
if (is_q is True):
|
|
556
|
-
log.write(" -Significant het:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:]), verbose=verbose)
|
|
557
|
-
log.write(" -All sig:" ,len(sig_list_merged), verbose=verbose)
|
|
558
|
-
log.write(" -Het rate:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:])/len(sig_list_merged), verbose=verbose)
|
|
388
|
+
sig_list_merged = reorder_columns(sig_list_merged)
|
|
389
|
+
sig_list_merged.to_csv(save_path,sep="\t")
|
|
559
390
|
|
|
560
391
|
# extract group
|
|
561
392
|
if include_all==True:
|
|
@@ -582,62 +413,640 @@ def compare_effect(path1,
|
|
|
582
413
|
log.write("Creating the scatter plot for effect sizes comparison...", verbose=verbose)
|
|
583
414
|
#plt.style.use("ggplot")
|
|
584
415
|
sns.set_style("ticks")
|
|
585
|
-
fig,ax = plt.subplots(**
|
|
416
|
+
fig,ax = plt.subplots(**plt_kwargs)
|
|
586
417
|
legend_elements=[]
|
|
587
418
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
588
419
|
if len(sum0)>0:
|
|
589
420
|
ax.errorbar(sum0["EFFECT_1"],sum0["EFFECT_2_aligned"], xerr=sum0["SE_1"],yerr=sum0["SE_2"],
|
|
590
|
-
linewidth=0,zorder=1,**
|
|
421
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
591
422
|
|
|
592
|
-
ax.scatter(sum0["EFFECT_1"],sum0["EFFECT_2_aligned"],label=label[3],zorder=2,color="#cccccc",edgecolors=sum0["Edge_color"],marker=".",**
|
|
423
|
+
ax.scatter(sum0["EFFECT_1"],sum0["EFFECT_2_aligned"],label=label[3],zorder=2,color="#cccccc",edgecolors=sum0["Edge_color"],marker=".",**scatter_kwargs)
|
|
593
424
|
#legend_elements.append(mpatches.Circle(facecolor='#cccccc', edgecolor='white', label=label[3]))
|
|
594
425
|
legend_elements.append(label[3])
|
|
595
426
|
if len(sum1only)>0:
|
|
596
427
|
ax.errorbar(sum1only["EFFECT_1"],sum1only["EFFECT_2_aligned"], xerr=sum1only["SE_1"],yerr=sum1only["SE_2"],
|
|
597
|
-
linewidth=0,zorder=1,**
|
|
598
|
-
ax.scatter(sum1only["EFFECT_1"],sum1only["EFFECT_2_aligned"],label=label[0],zorder=2,color="#e6320e",edgecolors=sum1only["Edge_color"],marker="^",**
|
|
428
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
429
|
+
ax.scatter(sum1only["EFFECT_1"],sum1only["EFFECT_2_aligned"],label=label[0],zorder=2,color="#e6320e",edgecolors=sum1only["Edge_color"],marker="^",**scatter_kwargs)
|
|
599
430
|
#legend_elements.append(mpatches.Patch(facecolor='#e6320e', edgecolor='white', label=label[0]))
|
|
600
431
|
legend_elements.append(label[0])
|
|
601
432
|
if len(sum2only)>0:
|
|
602
433
|
ax.errorbar(sum2only["EFFECT_1"],sum2only["EFFECT_2_aligned"], xerr=sum2only["SE_1"],yerr=sum2only["SE_2"],
|
|
603
|
-
linewidth=0,zorder=1,**
|
|
604
|
-
ax.scatter(sum2only["EFFECT_1"],sum2only["EFFECT_2_aligned"],label=label[1],zorder=2,color="#41e620",edgecolors=sum2only["Edge_color"],marker="o",**
|
|
434
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
435
|
+
ax.scatter(sum2only["EFFECT_1"],sum2only["EFFECT_2_aligned"],label=label[1],zorder=2,color="#41e620",edgecolors=sum2only["Edge_color"],marker="o",**scatter_kwargs)
|
|
605
436
|
#legend_elements.append(mpatches.Circle(facecolor='#41e620', edgecolor='white', label=label[1]))
|
|
606
437
|
legend_elements.append(label[1])
|
|
607
438
|
if len(both)>0:
|
|
608
439
|
ax.errorbar(both["EFFECT_1"],both["EFFECT_2_aligned"], xerr=both["SE_1"],yerr=both["SE_2"],
|
|
609
|
-
linewidth=0,zorder=1,**
|
|
610
|
-
ax.scatter(both["EFFECT_1"],both["EFFECT_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**
|
|
440
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
441
|
+
ax.scatter(both["EFFECT_1"],both["EFFECT_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**scatter_kwargs)
|
|
611
442
|
#legend_elements.append(mpatches.Patch(facecolor='#205be6', edgecolor='white', label=label[2]))
|
|
612
443
|
legend_elements.append(label[2])
|
|
613
444
|
else:
|
|
614
445
|
## if OR
|
|
615
446
|
if len(sum0)>0:
|
|
616
447
|
ax.errorbar(sum0["OR_1"],sum0["OR_2_aligned"], xerr=sum0[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum0[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
|
|
617
|
-
linewidth=0,zorder=1,**
|
|
618
|
-
ax.scatter(sum0["OR_1"],sum0["OR_2_aligned"],label=label[3],zorder=2,color="#cccccc",edgecolors=sum0["Edge_color"],marker=".",**
|
|
448
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
449
|
+
ax.scatter(sum0["OR_1"],sum0["OR_2_aligned"],label=label[3],zorder=2,color="#cccccc",edgecolors=sum0["Edge_color"],marker=".",**scatter_kwargs)
|
|
619
450
|
legend_elements.append(label[3])
|
|
620
451
|
if len(sum1only)>0:
|
|
621
452
|
ax.errorbar(sum1only["OR_1"],sum1only["OR_2_aligned"], xerr=sum1only[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum1only[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
|
|
622
|
-
linewidth=0,zorder=1,**
|
|
623
|
-
ax.scatter(sum1only["OR_1"],sum1only["OR_2_aligned"],label=label[0],zorder=2,color="#e6320e",edgecolors=sum1only["Edge_color"],marker="^",**
|
|
453
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
454
|
+
ax.scatter(sum1only["OR_1"],sum1only["OR_2_aligned"],label=label[0],zorder=2,color="#e6320e",edgecolors=sum1only["Edge_color"],marker="^",**scatter_kwargs)
|
|
624
455
|
legend_elements.append(label[0])
|
|
625
456
|
if len(sum2only)>0:
|
|
626
457
|
ax.errorbar(sum2only["OR_1"],sum2only["OR_2_aligned"], xerr=sum2only[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum2only[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
|
|
627
|
-
linewidth=0,zorder=1,**
|
|
628
|
-
ax.scatter(sum2only["OR_1"],sum2only["OR_2_aligned"],label=label[1],zorder=2,color="#41e620",edgecolors=sum2only["Edge_color"],marker="o",**
|
|
458
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
459
|
+
ax.scatter(sum2only["OR_1"],sum2only["OR_2_aligned"],label=label[1],zorder=2,color="#41e620",edgecolors=sum2only["Edge_color"],marker="o",**scatter_kwargs)
|
|
629
460
|
legend_elements.append(label[1])
|
|
630
461
|
if len(both)>0:
|
|
631
462
|
ax.errorbar(both["OR_1"],both["OR_2_aligned"], xerr=both[["OR_L_1_err","OR_H_1_err"]].T,yerr=both[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
|
|
632
|
-
linewidth=0,zorder=1,**
|
|
633
|
-
ax.scatter(both["OR_1"],both["OR_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**
|
|
463
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
464
|
+
ax.scatter(both["OR_1"],both["OR_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**scatter_kwargs)
|
|
634
465
|
legend_elements.append(label[2])
|
|
635
466
|
## annotation #################################################################################################################
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
467
|
+
ax = scatter_annotation(ax, sig_list_merged,anno, anno_het, is_q, mode,
|
|
468
|
+
anno_min,anno_min1,anno_min2,anno_diff,anno_kwargs,adjust_text_kwargs_l,adjust_text_kwargs_r,
|
|
469
|
+
log,verbose
|
|
470
|
+
)
|
|
471
|
+
#################################################################################################################################
|
|
472
|
+
|
|
473
|
+
# plot x=0,y=0, and a 45 degree line
|
|
474
|
+
xl,xh=ax.get_xlim()
|
|
475
|
+
yl,yh=ax.get_ylim()
|
|
476
|
+
|
|
477
|
+
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
478
|
+
#if using beta
|
|
479
|
+
ax.axhline(y=0, zorder=1,**helper_line_args)
|
|
480
|
+
ax.axvline(x=0, zorder=1,**helper_line_args)
|
|
481
|
+
else:
|
|
482
|
+
#if using OR
|
|
483
|
+
ax.axhline(y=1, zorder=1,**helper_line_args)
|
|
484
|
+
ax.axvline(x=1, zorder=1,**helper_line_args)
|
|
485
|
+
|
|
486
|
+
for spine in ['top', 'right']:
|
|
487
|
+
ax.spines[spine].set_visible(False)
|
|
488
|
+
|
|
489
|
+
###regression line##############################################################################################################################
|
|
490
|
+
ax = confire_regression_line(is_reg,reg_box, sig_list_merged, ax, mode,xl,yl,xh,yh, null_beta, r_se,
|
|
491
|
+
is_45_helper_line,helper_line_args, font_kwargs,
|
|
492
|
+
log, verbose)
|
|
640
493
|
|
|
494
|
+
|
|
495
|
+
ax.set_xlabel(xylabel_prefix+label[0],**font_kwargs)
|
|
496
|
+
ax.set_ylabel(xylabel_prefix+label[1],**font_kwargs)
|
|
497
|
+
|
|
498
|
+
ax = configure_legend(fig, ax, legend_mode, is_q, is_q_mc, legend_elements, legend_pos, q_level,
|
|
499
|
+
font_kwargs,scatterargs,legend_args,
|
|
500
|
+
legend_title, legend_title2 )
|
|
501
|
+
##plot finished########################################################################################
|
|
502
|
+
gc.collect()
|
|
503
|
+
|
|
504
|
+
save_figure(fig, save, keyword="esc",save_args=save_kwargs, log=log, verbose=verbose)
|
|
505
|
+
|
|
506
|
+
sig_list_merged = reorder_columns(sig_list_merged)
|
|
507
|
+
|
|
508
|
+
return [sig_list_merged, fig,log]
|
|
509
|
+
|
|
510
|
+
###############################################################################################
|
|
511
|
+
###############################################################################################
|
|
512
|
+
###############################################################################################
|
|
513
|
+
###############################################################################################
|
|
514
|
+
###############################################################################################
|
|
515
|
+
###############################################################################################
|
|
516
|
+
###############################################################################################
|
|
517
|
+
###############################################################################################
|
|
518
|
+
###############################################################################################
|
|
519
|
+
###############################################################################################
|
|
520
|
+
###############################################################################################
|
|
521
|
+
###############################################################################################
|
|
522
|
+
###############################################################################################
|
|
523
|
+
|
|
524
|
+
def load_sumstats(path, usecols, label, log, verbose, sep):
|
|
525
|
+
if type(usecols) is not list:
|
|
526
|
+
usecols = [usecols]
|
|
527
|
+
|
|
528
|
+
log.write(" -Loading sumstats for {} : {}".format(label,",".join(usecols)), verbose=verbose)
|
|
529
|
+
#log.write(" -Loading {} SNP list in memory...".format(label), verbose=verbose)
|
|
530
|
+
|
|
531
|
+
if type(path) is Sumstats:
|
|
532
|
+
sumstats = path.data.loc[:,usecols].copy()
|
|
533
|
+
elif type(path) is pd.DataFrame:
|
|
534
|
+
sumstats = path.loc[:,usecols].copy()
|
|
535
|
+
else:
|
|
536
|
+
sumstats=pd.read_table(path,sep=sep,usecols=usecols)
|
|
537
|
+
return sumstats
|
|
538
|
+
|
|
539
|
+
def configure_headers(mode,
|
|
540
|
+
path1,
|
|
541
|
+
path2,
|
|
542
|
+
cols_name_list_1,
|
|
543
|
+
cols_name_list_2,
|
|
544
|
+
effect_cols_list_1,
|
|
545
|
+
effect_cols_list_2,
|
|
546
|
+
scaled1,
|
|
547
|
+
scaled2,
|
|
548
|
+
log,
|
|
549
|
+
verbose):
|
|
550
|
+
|
|
551
|
+
if mode not in ["Beta","beta","BETA","OR","or"]:
|
|
552
|
+
raise ValueError("Please input Beta or OR")
|
|
553
|
+
|
|
554
|
+
if type(path1) is Sumstats:
|
|
555
|
+
log.write("Path1 is gwaslab Sumstats object...", verbose=verbose)
|
|
556
|
+
if cols_name_list_1 is None:
|
|
557
|
+
cols_name_list_1 = ["SNPID","P","EA","NEA","CHR","POS"]
|
|
558
|
+
if scaled1==True:
|
|
559
|
+
cols_name_list_1 = ["SNPID","MLOG10P","EA","NEA","CHR","POS"]
|
|
560
|
+
if effect_cols_list_1 is None:
|
|
561
|
+
if mode=="beta":
|
|
562
|
+
effect_cols_list_1 = ["BETA","SE"]
|
|
563
|
+
else:
|
|
564
|
+
effect_cols_list_1 = ["OR","OR_95L","OR_95U"]
|
|
565
|
+
elif type(path1) is pd.DataFrame:
|
|
566
|
+
log.write("Path1 is pandas DataFrame object...", verbose=verbose)
|
|
567
|
+
|
|
568
|
+
if type(path2) is Sumstats:
|
|
569
|
+
log.write("Path2 is gwaslab Sumstats object...", verbose=verbose)
|
|
570
|
+
if cols_name_list_2 is None:
|
|
571
|
+
cols_name_list_2 = ["SNPID","P","EA","NEA","CHR","POS"]
|
|
572
|
+
if scaled2==True:
|
|
573
|
+
cols_name_list_2 = ["SNPID","MLOG10P","EA","NEA","CHR","POS"]
|
|
574
|
+
if effect_cols_list_2 is None:
|
|
575
|
+
if mode=="beta":
|
|
576
|
+
effect_cols_list_2 = ["BETA","SE"]
|
|
577
|
+
else:
|
|
578
|
+
effect_cols_list_2 = ["OR","OR_95L","OR_95U"]
|
|
579
|
+
elif type(path2) is pd.DataFrame:
|
|
580
|
+
log.write("Path2 is pandas DataFrame object...", verbose=verbose)
|
|
581
|
+
|
|
582
|
+
return cols_name_list_1,cols_name_list_2, effect_cols_list_1, effect_cols_list_2
|
|
583
|
+
|
|
584
|
+
def configure_common_snp_set(path1,path2,
|
|
585
|
+
snplist,
|
|
586
|
+
label,
|
|
587
|
+
cols_name_list_1,cols_name_list_2,
|
|
588
|
+
sep,
|
|
589
|
+
scaled1,
|
|
590
|
+
scaled2,
|
|
591
|
+
log,verbose):
|
|
592
|
+
|
|
593
|
+
######### load sumstats2
|
|
594
|
+
sumstats = load_sumstats(path=path2,
|
|
595
|
+
usecols=cols_name_list_2[0],
|
|
596
|
+
label=label[1],
|
|
597
|
+
log=log,
|
|
598
|
+
verbose= verbose,
|
|
599
|
+
sep=sep[1])
|
|
600
|
+
|
|
601
|
+
common_snp_set=set(sumstats[cols_name_list_2[0]].values)
|
|
602
|
+
|
|
603
|
+
######### extract snplist1
|
|
604
|
+
if snplist is not None:
|
|
605
|
+
#use only SNPID, P
|
|
606
|
+
cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1]]
|
|
607
|
+
else:
|
|
608
|
+
# use SNPID, P, chr pos
|
|
609
|
+
cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1],cols_name_list_1[4],cols_name_list_1[5]]
|
|
610
|
+
|
|
611
|
+
######### load sumstats1
|
|
612
|
+
sumstats = load_sumstats(path=path1,
|
|
613
|
+
usecols=cols_to_extract,
|
|
614
|
+
label=label[0],
|
|
615
|
+
log=log,
|
|
616
|
+
verbose= verbose,
|
|
617
|
+
sep=sep[0])
|
|
618
|
+
|
|
619
|
+
gc.collect()
|
|
620
|
+
|
|
621
|
+
#if scaled1==True:
|
|
622
|
+
# sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
|
|
623
|
+
######### 5 extract the common set
|
|
624
|
+
|
|
625
|
+
common_snp_set = common_snp_set.intersection(sumstats[cols_name_list_1[0]].values)
|
|
626
|
+
|
|
627
|
+
log.write(" -Counting variants available for both datasets:",len(common_snp_set)," variants...", verbose=verbose)
|
|
628
|
+
|
|
629
|
+
return sumstats, common_snp_set
|
|
630
|
+
|
|
631
|
+
def rename_sumtats(sumstats, cols_name_list, snplist, scaled,suffix=""):
|
|
632
|
+
######### 6 rename the sumstats
|
|
633
|
+
rename_dict = { cols_name_list[0]:"SNPID",
|
|
634
|
+
cols_name_list[1]:"P{}".format(suffix),
|
|
635
|
+
}
|
|
636
|
+
if scaled==True:
|
|
637
|
+
rename_dict[cols_name_list[1]] = "MLOG10P{}".format(suffix)
|
|
638
|
+
|
|
639
|
+
if snplist is None:
|
|
640
|
+
rename_dict[cols_name_list[4]]="CHR"
|
|
641
|
+
rename_dict[cols_name_list[5]]="POS"
|
|
642
|
+
|
|
643
|
+
sumstats = sumstats.rename(columns=rename_dict)
|
|
644
|
+
return sumstats
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
def extract_snp_for_comparison(sumstats, snplist, label,
|
|
648
|
+
get_lead_args, build, drop, anno,
|
|
649
|
+
sig_level,scaled, log, verbose):
|
|
650
|
+
######### 8 extact SNPs for comparison
|
|
651
|
+
if snplist is not None:
|
|
652
|
+
######### 8.1 if a snplist is provided, use the snp list
|
|
653
|
+
log.write(" -Extract variants in the given list from "+label+"...")
|
|
654
|
+
sig_list = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
|
|
655
|
+
if anno=="GENENAME":
|
|
656
|
+
sig_list = annogene(sig_list,"SNPID","CHR","POS", build=build, verbose=verbose, **get_lead_args)
|
|
657
|
+
else:
|
|
658
|
+
######### 8,2 otherwise use the automatically detected lead SNPs
|
|
659
|
+
log.write(" -Extract lead variants from "+label +"...", verbose=verbose)
|
|
660
|
+
sig_list = getsig(sumstats,"SNPID","CHR","POS","P","MLOG10P", build=build, verbose=verbose,sig_level=sig_level,**get_lead_args)
|
|
661
|
+
|
|
662
|
+
if drop==True:
|
|
663
|
+
if scaled==True:
|
|
664
|
+
sig_list = drop_duplicate_and_na(sig_list, sort_by="MLOG10P",ascending=False, log=log , verbose=verbose)
|
|
665
|
+
else:
|
|
666
|
+
sig_list = drop_duplicate_and_na(sig_list, sort_by="P", ascending=True, log=log , verbose=verbose)
|
|
667
|
+
|
|
668
|
+
return sig_list
|
|
669
|
+
|
|
670
|
+
def merge_list(sig_list_1, sig_list_2, anno,labels,log, verbose):
|
|
671
|
+
|
|
672
|
+
log.write("Merging snps from "+labels[0]+" and "+labels[1]+"...", verbose=verbose)
|
|
673
|
+
|
|
674
|
+
if anno == "GENENAME":
|
|
675
|
+
if "GENE" not in sig_list_1.columns:
|
|
676
|
+
sig_list_1["GENE"]=pd.NA
|
|
677
|
+
sig_list_1["LOCATION"]=pd.NA
|
|
678
|
+
if "GENE" not in sig_list_2.columns:
|
|
679
|
+
sig_list_2["GENE"]=pd.NA
|
|
680
|
+
sig_list_2["LOCATION"]=pd.NA
|
|
681
|
+
|
|
682
|
+
sig_list_merged = pd.merge(sig_list_1,sig_list_2,left_on="SNPID",right_on="SNPID",how="outer",suffixes=('_1', '_2'))
|
|
683
|
+
|
|
684
|
+
if anno == "GENENAME":
|
|
685
|
+
sig_list_merged.loc[sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENENAME"] = sig_list_merged.loc[sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENE_1"]
|
|
686
|
+
sig_list_merged.loc[~sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENENAME"] = sig_list_merged.loc[~sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENE_2"]
|
|
687
|
+
sig_list_merged = sig_list_merged.drop(columns=["GENE_1","GENE_2","LOCATION_1","LOCATION_2"])
|
|
688
|
+
# SNPID P_1 P_2
|
|
689
|
+
#0 rs117986209 0.142569 0.394455
|
|
690
|
+
#1 rs6704312 0.652104 0.143750
|
|
691
|
+
return sig_list_merged
|
|
692
|
+
|
|
693
|
+
def configure_cols_to_extract(mode,
|
|
694
|
+
cols_name_list,
|
|
695
|
+
effect_cols_list,
|
|
696
|
+
eaf):
|
|
697
|
+
|
|
698
|
+
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
699
|
+
#[snpid,p,ea,nea] ,[effect,se]
|
|
700
|
+
#[snpid,p,ea,nea,chr,pos],[effect,se]
|
|
701
|
+
#[snpid,p,ea,nea,chr,pos],[OR,OR_l,OR_h]
|
|
702
|
+
cols_to_extract = [cols_name_list[0],cols_name_list[1], cols_name_list[2],cols_name_list[3], effect_cols_list[0], effect_cols_list[1]]
|
|
703
|
+
else:
|
|
704
|
+
cols_to_extract = [cols_name_list[0],cols_name_list[1], cols_name_list[2],cols_name_list[3], effect_cols_list[0], effect_cols_list[1], effect_cols_list[2]]
|
|
705
|
+
|
|
706
|
+
if len(eaf)>0:
|
|
707
|
+
cols_to_extract.append(eaf[0])
|
|
708
|
+
|
|
709
|
+
return cols_to_extract
|
|
710
|
+
|
|
711
|
+
def rename_sumstats_full(mode, sumstats, cols_name_list, effect_cols_list, eaf, drop, index, scaled, log, verbose):
|
|
712
|
+
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
713
|
+
rename_dict = { cols_name_list[0]:"SNPID",
|
|
714
|
+
cols_name_list[1]:"P_{}".format(index),
|
|
715
|
+
cols_name_list[2]:"EA_{}".format(index),
|
|
716
|
+
cols_name_list[3]:"NEA_{}".format(index),
|
|
717
|
+
effect_cols_list[0]:"EFFECT_{}".format(index),
|
|
718
|
+
effect_cols_list[1]:"SE_{}".format(index)}
|
|
719
|
+
|
|
720
|
+
|
|
721
|
+
else:
|
|
722
|
+
# if or
|
|
723
|
+
rename_dict = { cols_name_list[0]:"SNPID",
|
|
724
|
+
cols_name_list[1]:"P_{}".format(index),
|
|
725
|
+
cols_name_list[2]:"EA_{}".format(index),
|
|
726
|
+
cols_name_list[3]:"NEA_{}".format(index),
|
|
727
|
+
effect_cols_list[0]:"OR_{}".format(index),
|
|
728
|
+
effect_cols_list[1]:"OR_L_{}".format(index),
|
|
729
|
+
effect_cols_list[2]:"OR_H_{}".format(index)}
|
|
730
|
+
if scaled==True:
|
|
731
|
+
rename_dict[cols_name_list[1]]="MLOG10P_{}".format(index)
|
|
732
|
+
## check if eaf column is provided.
|
|
733
|
+
if len(eaf)>0:
|
|
734
|
+
rename_dict[eaf[index-1]]="EAF_{}".format(index)
|
|
735
|
+
sumstats = sumstats.rename(columns=rename_dict)
|
|
736
|
+
|
|
737
|
+
# drop na and duplicate
|
|
738
|
+
if drop==True:
|
|
739
|
+
if scaled==True:
|
|
740
|
+
sumstats = drop_duplicate_and_na(sumstats, sort_by="MLOG10P_{}".format(index),ascending=False, log=log , verbose=verbose)
|
|
741
|
+
else:
|
|
742
|
+
sumstats = drop_duplicate_and_na(sumstats, sort_by="P_{}".format(index), ascending=True, log=log , verbose=verbose)
|
|
743
|
+
|
|
744
|
+
if scaled==True:
|
|
745
|
+
sumstats.drop("MLOG10P_{}".format(index),axis=1,inplace=True)
|
|
746
|
+
else:
|
|
747
|
+
sumstats.drop("P_{}".format(index),axis=1,inplace=True)
|
|
748
|
+
return sumstats
|
|
749
|
+
|
|
750
|
+
def update_stats(sig_list_merged,
|
|
751
|
+
path,
|
|
752
|
+
cols_name_list,
|
|
753
|
+
sep,
|
|
754
|
+
snplist,
|
|
755
|
+
label,
|
|
756
|
+
drop,
|
|
757
|
+
index,
|
|
758
|
+
scaled,
|
|
759
|
+
log,
|
|
760
|
+
verbose):
|
|
761
|
+
|
|
762
|
+
log.write(" -Updating missing information for "+label+" ...", verbose=verbose)
|
|
763
|
+
cols_to_extract = [cols_name_list[0], cols_name_list[1]]
|
|
764
|
+
|
|
765
|
+
sumstats = load_sumstats(path=path,
|
|
766
|
+
usecols=cols_to_extract,
|
|
767
|
+
label=label,
|
|
768
|
+
log=log,
|
|
769
|
+
verbose= verbose,
|
|
770
|
+
sep=sep)
|
|
771
|
+
#if scaled1==True:
|
|
772
|
+
# sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
|
|
773
|
+
|
|
774
|
+
sumstats = rename_sumtats(sumstats = sumstats,
|
|
775
|
+
cols_name_list = cols_name_list,
|
|
776
|
+
snplist = snplist,
|
|
777
|
+
scaled=scaled,
|
|
778
|
+
suffix="_{}".format(index))
|
|
779
|
+
# drop na and duplicate
|
|
780
|
+
if drop==True:
|
|
781
|
+
if scaled==True:
|
|
782
|
+
sumstats = drop_duplicate_and_na(sumstats, sort_by="MLOG10P_{}".format(index),ascending=False, log=log , verbose=verbose)
|
|
783
|
+
else:
|
|
784
|
+
sumstats = drop_duplicate_and_na(sumstats, sort_by="P_{}".format(index), ascending=True, log=log , verbose=verbose)
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+
sumstats = sumstats.set_index("SNPID")
|
|
788
|
+
sig_list_merged.update(sumstats)
|
|
789
|
+
|
|
790
|
+
return sig_list_merged
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
def assign_indicator(sig_list_merged, snplist, sig_level, scaled1, scaled2, log, verbose):
|
|
794
|
+
############## 18 init indicator
|
|
795
|
+
log.write(" -Assigning indicator ...", verbose=verbose)
|
|
796
|
+
# 0-> 0
|
|
797
|
+
# 1 -> sig in sumstats1
|
|
798
|
+
# 2 -> sig in sumsatts2
|
|
799
|
+
# 3-> sig in both sumstats1 + sumstats2
|
|
800
|
+
sig_list_merged["indicator"] = 0
|
|
801
|
+
|
|
802
|
+
if scaled1==True:
|
|
803
|
+
sig_list_merged.loc[sig_list_merged["MLOG10P_1"]>-np.log10(sig_level),"indicator"]=1+sig_list_merged.loc[sig_list_merged["MLOG10P_1"]>-np.log10(sig_level),"indicator"]
|
|
804
|
+
else:
|
|
805
|
+
sig_list_merged.loc[sig_list_merged["P_1"]<sig_level,"indicator"]=1+sig_list_merged.loc[sig_list_merged["P_1"]<sig_level,"indicator"]
|
|
806
|
+
|
|
807
|
+
if scaled2==True:
|
|
808
|
+
sig_list_merged.loc[sig_list_merged["MLOG10P_2"]>-np.log10(sig_level),"indicator"]=2+sig_list_merged.loc[sig_list_merged["MLOG10P_2"]>-np.log10(sig_level),"indicator"]
|
|
809
|
+
else:
|
|
810
|
+
sig_list_merged.loc[sig_list_merged["P_2"]<sig_level,"indicator"]=2+sig_list_merged.loc[sig_list_merged["P_2"]<sig_level,"indicator"]
|
|
811
|
+
|
|
812
|
+
if snplist is None:
|
|
813
|
+
sig_list_merged["CHR"]=np.max(sig_list_merged[["CHR_1","CHR_2"]], axis=1).astype(int)
|
|
814
|
+
sig_list_merged["POS"]=np.max(sig_list_merged[["POS_1","POS_2"]], axis=1).astype(int)
|
|
815
|
+
sig_list_merged.drop(labels=['CHR_1', 'CHR_2','POS_1', 'POS_2'], axis=1,inplace=True)
|
|
816
|
+
return sig_list_merged
|
|
817
|
+
|
|
818
|
+
def align_alleles(sig_list_merged, label,mode,eaf, log, verbose):
|
|
819
|
+
log.write(" -Aligning "+label[1]+" EA with "+label[0]+" EA ...", verbose=verbose)
|
|
820
|
+
############### 19 align allele effect with sumstats 1
|
|
821
|
+
sig_list_merged["EA_1"]=sig_list_merged["EA_1"].astype("string")
|
|
822
|
+
sig_list_merged["EA_2"]=sig_list_merged["EA_2"].astype("string")
|
|
823
|
+
sig_list_merged["NEA_1"]=sig_list_merged["NEA_1"].astype("string")
|
|
824
|
+
sig_list_merged["NEA_2"]=sig_list_merged["NEA_2"].astype("string")
|
|
825
|
+
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
826
|
+
# copy raw
|
|
827
|
+
sig_list_merged["EA_2_aligned"]=sig_list_merged["EA_2"]
|
|
828
|
+
sig_list_merged["NEA_2_aligned"]=sig_list_merged["NEA_2"]
|
|
829
|
+
sig_list_merged["EFFECT_2_aligned"]=sig_list_merged["EFFECT_2"]
|
|
830
|
+
|
|
831
|
+
#filp ea/nea and beta for sumstats2
|
|
832
|
+
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EA_2_aligned"]= sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"NEA_2"]
|
|
833
|
+
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"NEA_2_aligned"]= sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EA_2"]
|
|
834
|
+
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EFFECT_2_aligned"]= -sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EFFECT_2"]
|
|
835
|
+
else:
|
|
836
|
+
#flip for OR or - +
|
|
837
|
+
|
|
838
|
+
sig_list_merged["EA_2_aligned"]=sig_list_merged["EA_2"]
|
|
839
|
+
sig_list_merged["NEA_2_aligned"]=sig_list_merged["NEA_2"]
|
|
840
|
+
sig_list_merged["OR_2_aligned"]=sig_list_merged["OR_2"]
|
|
841
|
+
sig_list_merged["OR_L_2_aligned"]=sig_list_merged["OR_L_2"]
|
|
842
|
+
sig_list_merged["OR_H_2_aligned"]=sig_list_merged["OR_H_2"]
|
|
843
|
+
|
|
844
|
+
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EA_2_aligned"]= sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"NEA_2"]
|
|
845
|
+
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"NEA_2_aligned"]= sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EA_2"]
|
|
846
|
+
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_2_aligned"]= 1/sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_2"]
|
|
847
|
+
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_H_2_aligned"]= 1/sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_L_2"]
|
|
848
|
+
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_L_2_aligned"]= 1/sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_H_2"]
|
|
849
|
+
|
|
850
|
+
sig_list_merged["BETA_1"]=np.log(sig_list_merged["OR_1"])
|
|
851
|
+
sig_list_merged["BETA_2_aligned"]=np.log(sig_list_merged["OR_2_aligned"])
|
|
852
|
+
sig_list_merged["SE_1"]=(np.log(sig_list_merged["OR_H_1"]) - np.log(sig_list_merged["OR_1"]))/ss.norm.ppf(0.975)
|
|
853
|
+
sig_list_merged["SE_2"]=(np.log(sig_list_merged["OR_H_2_aligned"]) - np.log(sig_list_merged["OR_2_aligned"]))/ss.norm.ppf(0.975)
|
|
854
|
+
|
|
855
|
+
sig_list_merged["OR_L_1_err"]=np.abs(sig_list_merged["OR_L_1"]-sig_list_merged["OR_1"])
|
|
856
|
+
sig_list_merged["OR_H_1_err"]=np.abs(sig_list_merged["OR_H_1"]-sig_list_merged["OR_1"])
|
|
857
|
+
sig_list_merged["OR_L_2_aligned_err"]=np.abs(sig_list_merged["OR_L_2_aligned"]-sig_list_merged["OR_2_aligned"])
|
|
858
|
+
sig_list_merged["OR_H_2_aligned_err"]=np.abs(sig_list_merged["OR_H_2_aligned"]-sig_list_merged["OR_2_aligned"])
|
|
859
|
+
|
|
860
|
+
if len(eaf)>0:
|
|
861
|
+
# flip eaf
|
|
862
|
+
sig_list_merged["EAF_2_aligned"]=sig_list_merged["EAF_2"]
|
|
863
|
+
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EAF_2_aligned"]= 1 -sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EAF_2"]
|
|
864
|
+
return sig_list_merged
|
|
865
|
+
|
|
866
|
+
#########################################################################################################################
|
|
867
|
+
#########################################################################################################################
|
|
868
|
+
#########################################################################################################################
|
|
869
|
+
#########################################################################################################################
|
|
870
|
+
#########################################################################################################################
|
|
871
|
+
#########################################################################################################################
|
|
872
|
+
#########################################################################################################################
|
|
873
|
+
#########################################################################################################################
|
|
874
|
+
#########################################################################################################################
|
|
875
|
+
|
|
876
|
+
def check_allele_match(sig_list_merged, allele_match, label, log,verbose):
|
|
877
|
+
# checking effect allele matching
|
|
878
|
+
nonmatch = np.nansum(sig_list_merged["EA_1"] != sig_list_merged["EA_2_aligned"])
|
|
879
|
+
log.write(" -Aligned all EAs in {} with EAs in {} ...".format(label[1],label[0]), verbose=verbose)
|
|
880
|
+
if nonmatch>0:
|
|
881
|
+
log.warning("Alleles for {} variants do not match...".format(nonmatch))
|
|
882
|
+
if allele_match==True:
|
|
883
|
+
if nonmatch>0:
|
|
884
|
+
sig_list_merged = sig_list_merged.loc[sig_list_merged["EA_1"] == sig_list_merged["EA_2_aligned"]]
|
|
885
|
+
else:
|
|
886
|
+
log.write(" -No variants with EA not matching...", verbose=verbose)
|
|
887
|
+
return sig_list_merged
|
|
888
|
+
|
|
889
|
+
def winnerscurse_correction(sig_list_merged, mode, wc_correction, sig_level, scaled1, scaled2, log, verbose):
|
|
890
|
+
if mode=="beta":
|
|
891
|
+
if scaled1==True:
|
|
892
|
+
match1= sig_list_merged["MLOG10P_1"]>-np.log10(sig_level)
|
|
893
|
+
else:
|
|
894
|
+
match1 = sig_list_merged["P_1"]<sig_level
|
|
895
|
+
if scaled2==True:
|
|
896
|
+
match2= sig_list_merged["MLOG10P_2"]>-np.log10(sig_level)
|
|
897
|
+
else:
|
|
898
|
+
match2 = sig_list_merged["P_2"]<sig_level
|
|
899
|
+
|
|
900
|
+
if wc_correction == "all":
|
|
901
|
+
log.write(" -Correcting BETA for winner's curse with threshold at {} for all variants...".format(sig_level), verbose=verbose)
|
|
902
|
+
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
903
|
+
sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
|
|
904
|
+
|
|
905
|
+
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(~sig_list_merged["EFFECT_1"].isna())), verbose=verbose)
|
|
906
|
+
sig_list_merged["EFFECT_1"] = sig_list_merged[["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
|
|
907
|
+
|
|
908
|
+
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(~sig_list_merged["EFFECT_2_aligned"].isna())), verbose=verbose)
|
|
909
|
+
sig_list_merged["EFFECT_2_aligned"] = sig_list_merged[["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
|
|
910
|
+
|
|
911
|
+
elif wc_correction == "sig" :
|
|
912
|
+
|
|
913
|
+
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants...".format(sig_level), verbose=verbose)
|
|
914
|
+
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
915
|
+
sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
|
|
916
|
+
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(match1)), verbose=verbose)
|
|
917
|
+
sig_list_merged.loc[match1, "EFFECT_1"] = sig_list_merged.loc[match1, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
918
|
+
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(match2)), verbose=verbose)
|
|
919
|
+
sig_list_merged.loc[match2, "EFFECT_2_aligned"] = sig_list_merged.loc[match2, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
920
|
+
|
|
921
|
+
elif wc_correction == "sumstats1" :
|
|
922
|
+
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats1...".format(sig_level), verbose=verbose)
|
|
923
|
+
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
924
|
+
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(match1)), verbose=verbose)
|
|
925
|
+
sig_list_merged.loc[match1, "EFFECT_1"] = sig_list_merged.loc[match1, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
926
|
+
|
|
927
|
+
elif wc_correction == "sumstats2" :
|
|
928
|
+
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats2...".format(sig_level), verbose=verbose)
|
|
929
|
+
sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
|
|
930
|
+
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(match2)), verbose=verbose)
|
|
931
|
+
sig_list_merged.loc[match2, "EFFECT_2_aligned"] = sig_list_merged.loc[match2, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
932
|
+
return sig_list_merged
|
|
933
|
+
|
|
934
|
+
def filter_by_maf(sig_list_merged, eaf, maf_level, log, verbose):
|
|
935
|
+
if (len(eaf)>0) and (maf_level is not None):
|
|
936
|
+
both_eaf_clear = (sig_list_merged["EAF_1"]>maf_level)&(sig_list_merged["EAF_1"]<1-maf_level)&(sig_list_merged["EAF_2"]>maf_level)&(sig_list_merged["EAF_2"]<1-maf_level)
|
|
937
|
+
log.write(" -Exclude "+str(len(sig_list_merged) -sum(both_eaf_clear))+ " variants with maf <",maf_level, verbose=verbose)
|
|
938
|
+
sig_list_merged = sig_list_merged.loc[both_eaf_clear,:]
|
|
939
|
+
return sig_list_merged
|
|
940
|
+
|
|
941
|
+
|
|
942
|
+
|
|
943
|
+
|
|
944
|
+
|
|
945
|
+
def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose=False):
|
|
946
|
+
w1="Weight_1"
|
|
947
|
+
w2="Weight_2"
|
|
948
|
+
beta="BETA_FE"
|
|
949
|
+
q="Q"
|
|
950
|
+
pq="HetP"
|
|
951
|
+
rawpq="RAW_HetP"
|
|
952
|
+
i2="I2"
|
|
953
|
+
df[w1]=1/(df[se1])**2
|
|
954
|
+
df[w2]=1/(df[se2])**2
|
|
955
|
+
df[beta] =(df[w1]*df[beta1] + df[w2]*df[beta2])/(df[w1]+df[w2])
|
|
956
|
+
|
|
957
|
+
# Cochran(1954)
|
|
958
|
+
df[q] = df[w1]*(df[beta1]-df[beta])**2 + df[w2]*(df[beta2]-df[beta])**2
|
|
959
|
+
df[pq] = ss.chi2.sf(df[q], 1)
|
|
960
|
+
df["Edge_color"]="white"
|
|
961
|
+
|
|
962
|
+
if is_q_mc=="fdr":
|
|
963
|
+
log.write(" -FDR correction applied...", verbose=verbose)
|
|
964
|
+
df[rawpq] = df[pq]
|
|
965
|
+
df[pq] = ss.false_discovery_control(df[pq])
|
|
966
|
+
|
|
967
|
+
elif is_q_mc=="bon":
|
|
968
|
+
log.write(" -Bonferroni correction applied...", verbose=verbose)
|
|
969
|
+
df[rawpq] = df[pq]
|
|
970
|
+
df[pq] = df[pq] * len(df[pq])
|
|
971
|
+
|
|
972
|
+
df.loc[df[pq]<q_level,"Edge_color"]="black"
|
|
973
|
+
df.drop(columns=["Weight_1","Weight_2","BETA_FE"],inplace=True)
|
|
974
|
+
# Huedo-Medina, T. B., Sánchez-Meca, J., Marín-Martínez, F., & Botella, J. (2006). Assessing heterogeneity in meta-analysis: Q statistic or I² index?. Psychological methods, 11(2), 193.
|
|
975
|
+
|
|
976
|
+
# calculate I2
|
|
977
|
+
df[i2] = (df[q] - 1)/df[q]
|
|
978
|
+
df.loc[df[i2]<0,i2] = 0
|
|
979
|
+
|
|
980
|
+
return df
|
|
981
|
+
|
|
982
|
+
def jackknife_r(df,x="EFFECT_1",y="EFFECT_2_aligned"):
|
|
983
|
+
"""Jackknife estimation of se for rsq
|
|
984
|
+
|
|
985
|
+
"""
|
|
986
|
+
|
|
987
|
+
# dropna
|
|
988
|
+
df_nona = df.loc[:,[x,y]].dropna()
|
|
989
|
+
|
|
990
|
+
# non-empty entries
|
|
991
|
+
n=len(df)
|
|
992
|
+
|
|
993
|
+
# assign row number
|
|
994
|
+
df_nona["nrow"] = range(n)
|
|
995
|
+
|
|
996
|
+
# a list to store r2
|
|
997
|
+
r_list=[]
|
|
998
|
+
|
|
999
|
+
# estimate r
|
|
1000
|
+
for i in range(n):
|
|
1001
|
+
# exclude 1 record
|
|
1002
|
+
records_to_use = df_nona["nrow"]!=i
|
|
1003
|
+
# estimate r
|
|
1004
|
+
reg_jackknife = ss.linregress(df_nona.loc[records_to_use, x],df_nona.loc[records_to_use,y])
|
|
1005
|
+
# add r_i to list
|
|
1006
|
+
r_list.append(reg_jackknife[2])
|
|
1007
|
+
|
|
1008
|
+
# convert list to array
|
|
1009
|
+
rs = np.array(r_list)
|
|
1010
|
+
# https://en.wikipedia.org/wiki/Jackknife_resampling
|
|
1011
|
+
r_se = np.sqrt( (n-1)/n * np.sum((rs - np.mean(rs))**2) )
|
|
1012
|
+
return r_se
|
|
1013
|
+
|
|
1014
|
+
def drop_duplicate_and_na(df,snpid="SNPID",sort_by=False,log=Log(),ascending=True,verbose=True):
|
|
1015
|
+
|
|
1016
|
+
length_before = len(df)
|
|
1017
|
+
|
|
1018
|
+
if sort_by!=False:
|
|
1019
|
+
df.sort_values(by = sort_by, ascending=ascending, inplace=True)
|
|
1020
|
+
|
|
1021
|
+
df.dropna(axis="index",subset=[snpid],inplace=True)
|
|
1022
|
+
df.drop_duplicates(subset=[snpid], keep='first', inplace=True)
|
|
1023
|
+
|
|
1024
|
+
length_after= len(df)
|
|
1025
|
+
if length_before != length_after:
|
|
1026
|
+
log.write(" -Dropped {} duplicates or NAs...".format(length_before - length_after), verbose=verbose)
|
|
1027
|
+
return df
|
|
1028
|
+
|
|
1029
|
+
|
|
1030
|
+
|
|
1031
|
+
#########################################################################################################################
|
|
1032
|
+
#########################################################################################################################
|
|
1033
|
+
#########################################################################################################################
|
|
1034
|
+
#########################################################################################################################
|
|
1035
|
+
#########################################################################################################################
|
|
1036
|
+
#########################################################################################################################
|
|
1037
|
+
#########################################################################################################################
|
|
1038
|
+
#########################################################################################################################
|
|
1039
|
+
#########################################################################################################################
|
|
1040
|
+
|
|
1041
|
+
def scatter_annotation(ax, sig_list_merged,anno, anno_het, is_q, mode,
|
|
1042
|
+
anno_min,anno_min1,anno_min2,anno_diff,anno_kwargs,adjust_text_kwargs_l,adjust_text_kwargs_r,
|
|
1043
|
+
log,verbose
|
|
1044
|
+
):
|
|
1045
|
+
if anno==True or anno=="GENENAME":
|
|
1046
|
+
sig_list_toanno = sig_list_merged.dropna(axis=0)
|
|
1047
|
+
if is_q==True and anno_het == True:
|
|
1048
|
+
sig_list_toanno = sig_list_toanno.loc[sig_list_toanno["Edge_color"]=="black",:]
|
|
1049
|
+
|
|
641
1050
|
if mode=="beta":
|
|
642
1051
|
sig_list_toanno = sig_list_toanno.loc[sig_list_toanno["EFFECT_1"].abs() >=anno_min1 ,:]
|
|
643
1052
|
sig_list_toanno = sig_list_toanno.loc[sig_list_toanno["EFFECT_2_aligned"].abs() >=anno_min2 ,:]
|
|
@@ -651,22 +1060,38 @@ def compare_effect(path1,
|
|
|
651
1060
|
|
|
652
1061
|
texts_l=[]
|
|
653
1062
|
texts_r=[]
|
|
1063
|
+
|
|
1064
|
+
if anno==True:
|
|
1065
|
+
log.write("Annotating variants using {}".format("SNPID"), verbose=verbose)
|
|
1066
|
+
elif anno=="GENENAME":
|
|
1067
|
+
log.write("Annotating variants using {}".format("GENENAME"), verbose=verbose)
|
|
1068
|
+
|
|
654
1069
|
for index, row in sig_list_toanno.iterrows():
|
|
1070
|
+
#log.write("Annotating {}...".format(row), verbose=verbose)
|
|
1071
|
+
if anno==True:
|
|
1072
|
+
to_anno_text = index
|
|
1073
|
+
elif type(anno) is str:
|
|
1074
|
+
if not pd.isna(row[anno]):
|
|
1075
|
+
to_anno_text = row[anno]
|
|
1076
|
+
else:
|
|
1077
|
+
to_anno_text = index
|
|
1078
|
+
|
|
655
1079
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
656
1080
|
if row["EFFECT_1"] < row["EFFECT_2_aligned"]:
|
|
657
|
-
texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],
|
|
1081
|
+
texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],to_anno_text,ha="right",va="bottom", **anno_kwargs))
|
|
658
1082
|
else:
|
|
659
|
-
texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],
|
|
1083
|
+
texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],to_anno_text,ha="left",va="top", **anno_kwargs))
|
|
660
1084
|
else:
|
|
661
1085
|
if row["OR_1"] < row["OR_2_aligned"]:
|
|
662
|
-
texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],
|
|
1086
|
+
texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],to_anno_text, ha='right', va='bottom', **anno_kwargs))
|
|
663
1087
|
else:
|
|
664
|
-
texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
1088
|
+
texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],to_anno_text, ha='left', va='top', **anno_kwargs))
|
|
1089
|
+
if len(texts_l)>0:
|
|
1090
|
+
adjust_text(texts_l,ax=ax,**adjust_text_kwargs_l)
|
|
1091
|
+
if len(texts_r)>0:
|
|
1092
|
+
adjust_text(texts_r,ax=ax,**adjust_text_kwargs_r)
|
|
669
1093
|
elif type(anno) is dict:
|
|
1094
|
+
sig_list_toanno = sig_list_merged.dropna(axis=0)
|
|
670
1095
|
# if input is a dict
|
|
671
1096
|
sig_list_toanno = sig_list_toanno.loc[sig_list_toanno.index.isin(list(anno.keys())),:]
|
|
672
1097
|
if is_q==True and anno_het == True:
|
|
@@ -688,37 +1113,24 @@ def compare_effect(path1,
|
|
|
688
1113
|
for index, row in sig_list_toanno.iterrows():
|
|
689
1114
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
690
1115
|
if row["EFFECT_1"] < row["EFFECT_2_aligned"]:
|
|
691
|
-
texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="right",va="bottom"))
|
|
1116
|
+
texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="right",va="bottom", **anno_kwargs))
|
|
692
1117
|
else:
|
|
693
|
-
texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="left",va="top"))
|
|
1118
|
+
texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="left",va="top", **anno_kwargs))
|
|
694
1119
|
else:
|
|
695
1120
|
if row["OR_1"] < row["OR_2_aligned"]:
|
|
696
|
-
texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='right', va='bottom'))
|
|
1121
|
+
texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='right', va='bottom', **anno_kwargs))
|
|
697
1122
|
else:
|
|
698
|
-
texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='left', va='top'))
|
|
1123
|
+
texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='left', va='top', **anno_kwargs))
|
|
1124
|
+
if len(texts_l)>0:
|
|
1125
|
+
adjust_text(texts_l,ax=ax,**adjust_text_kwargs_l)
|
|
1126
|
+
if len(texts_r)>0:
|
|
1127
|
+
adjust_text(texts_r,ax=ax,**adjust_text_kwargs_r)
|
|
1128
|
+
return ax
|
|
699
1129
|
|
|
700
|
-
adjust_text(texts_l,autoalign =False,precision =0.001,lim=1000, ha="right",va="bottom", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects=(0.8,0.8) ,arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
|
|
701
|
-
adjust_text(texts_r,autoalign =False,precision =0.001,lim=1000, ha="left",va="top", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects =(0.8,0.8),arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
|
|
702
|
-
#################################################################################################################################
|
|
703
|
-
|
|
704
|
-
# plot x=0,y=0, and a 45 degree line
|
|
705
|
-
xl,xh=ax.get_xlim()
|
|
706
|
-
yl,yh=ax.get_ylim()
|
|
707
|
-
|
|
708
|
-
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
709
|
-
#if using beta
|
|
710
|
-
ax.axhline(y=0, zorder=1,**helper_line_args)
|
|
711
|
-
ax.axvline(x=0, zorder=1,**helper_line_args)
|
|
712
|
-
else:
|
|
713
|
-
#if using OR
|
|
714
|
-
ax.axhline(y=1, zorder=1,**helper_line_args)
|
|
715
|
-
ax.axvline(x=1, zorder=1,**helper_line_args)
|
|
716
|
-
|
|
717
|
-
for spine in ['top', 'right']:
|
|
718
|
-
ax.spines[spine].set_visible(False)
|
|
719
|
-
|
|
720
1130
|
|
|
721
|
-
|
|
1131
|
+
def confire_regression_line(is_reg, reg_box, sig_list_merged, ax, mode,xl,yl,xh,yh, null_beta, r_se,
|
|
1132
|
+
is_45_helper_line,helper_line_args, font_kwargs,
|
|
1133
|
+
log, verbose):
|
|
722
1134
|
if len(sig_list_merged)<3: is_reg=False
|
|
723
1135
|
if is_reg is True:
|
|
724
1136
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
@@ -764,7 +1176,7 @@ def compare_effect(path1,
|
|
|
764
1176
|
pe="0"
|
|
765
1177
|
p_text="$p = " + p12 + " \\times 10^{"+pe+"}$"
|
|
766
1178
|
p_latex= f'{p_text}'
|
|
767
|
-
ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $+$ "+ "{:.2f}".format(reg[0])+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes, bbox=reg_box, **
|
|
1179
|
+
ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $+$ "+ "{:.2f}".format(reg[0])+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes, bbox=reg_box, **font_kwargs)
|
|
768
1180
|
else:
|
|
769
1181
|
#if regression coeeficient <0 : auxiliary line slope = -1
|
|
770
1182
|
if is_45_helper_line is True:
|
|
@@ -781,7 +1193,7 @@ def compare_effect(path1,
|
|
|
781
1193
|
pe="0"
|
|
782
1194
|
p_text="$p = " + p12 + " \\times 10^{"+pe+"}$"
|
|
783
1195
|
p_latex= f'{p_text}'
|
|
784
|
-
ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $-$ "+ "{:.2f}".format(abs(reg[0]))+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes,bbox=reg_box,**
|
|
1196
|
+
ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $-$ "+ "{:.2f}".format(abs(reg[0]))+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes,bbox=reg_box,**font_kwargs)
|
|
785
1197
|
|
|
786
1198
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
787
1199
|
middle = sig_list_merged["EFFECT_1"].mean()
|
|
@@ -792,11 +1204,12 @@ def compare_effect(path1,
|
|
|
792
1204
|
ax.axline(xy1=(0,reg[1]),slope=reg[0],color="#cccccc",linestyle='--',zorder=1)
|
|
793
1205
|
else:
|
|
794
1206
|
ax.axline(xy1=(1,reg[0]+reg[1]),slope=reg[0],color="#cccccc",linestyle='--',zorder=1)
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
1207
|
+
return ax
|
|
1208
|
+
|
|
1209
|
+
|
|
1210
|
+
def configure_legend(fig, ax, legend_mode, is_q, is_q_mc, legend_elements, legend_pos, q_level,
|
|
1211
|
+
font_kwargs,scatterargs,legend_args,
|
|
1212
|
+
legend_title, legend_title2 ):
|
|
800
1213
|
legend_args_to_use ={
|
|
801
1214
|
"framealpha":1,
|
|
802
1215
|
"handlelength":0.7,
|
|
@@ -864,16 +1277,10 @@ def compare_effect(path1,
|
|
|
864
1277
|
label.set_ha('left')
|
|
865
1278
|
label.set_position((-8*width,0))
|
|
866
1279
|
|
|
867
|
-
ax.tick_params(axis='both', labelsize=
|
|
868
|
-
plt.setp(L.texts,**
|
|
869
|
-
plt.setp(L.get_title(),**
|
|
870
|
-
|
|
871
|
-
gc.collect()
|
|
872
|
-
|
|
873
|
-
save_figure(fig, save, keyword="esc",save_args=save_args, log=log, verbose=verbose)
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
return [sig_list_merged, fig,log]
|
|
1280
|
+
ax.tick_params(axis='both', labelsize=font_kwargs["fontsize"])
|
|
1281
|
+
plt.setp(L.texts,**font_kwargs)
|
|
1282
|
+
plt.setp(L.get_title(),**font_kwargs)
|
|
1283
|
+
return ax
|
|
877
1284
|
|
|
878
1285
|
def reorderLegend(ax=None, order=None, add=None):
|
|
879
1286
|
handles, labels = ax.get_legend_handles_labels()
|
|
@@ -882,78 +1289,18 @@ def reorderLegend(ax=None, order=None, add=None):
|
|
|
882
1289
|
new_handles = [info[l] for l in order]
|
|
883
1290
|
return new_handles, order
|
|
884
1291
|
|
|
885
|
-
def
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
pq="HetP"
|
|
891
|
-
i2="I2"
|
|
892
|
-
df[w1]=1/(df[se1])**2
|
|
893
|
-
df[w2]=1/(df[se2])**2
|
|
894
|
-
df[beta] =(df[w1]*df[beta1] + df[w2]*df[beta2])/(df[w1]+df[w2])
|
|
1292
|
+
def reorder_columns(sig_list_merged):
|
|
1293
|
+
order=[ 'CHR', 'POS', 'GENENAME',
|
|
1294
|
+
'EA_1', 'NEA_1', 'EFFECT_1', 'SE_1', 'P_1', 'MLOG10P_1',
|
|
1295
|
+
'EA_2_aligned','NEA_2_aligned', 'EFFECT_2_aligned', 'SE_2','P_2','MLOG10P_2', 'EA_2', 'NEA_2', 'EFFECT_2',
|
|
1296
|
+
'indicator' ]
|
|
895
1297
|
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
df[pq] = ss.false_discovery_control(df[pq])
|
|
904
|
-
elif is_q_mc=="bon":
|
|
905
|
-
log.write(" -Bonferroni correction applied...", verbose=verbose)
|
|
906
|
-
df[pq] = df[pq] * len(df[pq])
|
|
907
|
-
|
|
908
|
-
df.loc[df[pq]<q_level,"Edge_color"]="black"
|
|
909
|
-
df.drop(columns=["Weight_1","Weight_2","BETA_FE"],inplace=True)
|
|
910
|
-
# Huedo-Medina, T. B., Sánchez-Meca, J., Marín-Martínez, F., & Botella, J. (2006). Assessing heterogeneity in meta-analysis: Q statistic or I² index?. Psychological methods, 11(2), 193.
|
|
911
|
-
|
|
912
|
-
# calculate I2
|
|
913
|
-
df[i2] = (df[q] - 1)/df[q]
|
|
914
|
-
df.loc[df[i2]<0,i2] = 0
|
|
915
|
-
|
|
916
|
-
return df
|
|
917
|
-
|
|
918
|
-
def jackknife_r(df,x="EFFECT_1",y="EFFECT_2_aligned"):
|
|
919
|
-
"""Jackknife estimation of se for rsq
|
|
920
|
-
|
|
921
|
-
"""
|
|
922
|
-
|
|
923
|
-
# dropna
|
|
924
|
-
df_nona = df.loc[:,[x,y]].dropna()
|
|
925
|
-
|
|
926
|
-
# non-empty entries
|
|
927
|
-
n=len(df)
|
|
928
|
-
|
|
929
|
-
# assign row number
|
|
930
|
-
df_nona["nrow"] = range(n)
|
|
931
|
-
|
|
932
|
-
# a list to store r2
|
|
933
|
-
r_list=[]
|
|
1298
|
+
new_order=[]
|
|
1299
|
+
for i in order:
|
|
1300
|
+
if i in sig_list_merged.columns:
|
|
1301
|
+
new_order.append(i)
|
|
1302
|
+
for i in sig_list_merged.columns:
|
|
1303
|
+
if i not in new_order:
|
|
1304
|
+
new_order.append(i)
|
|
934
1305
|
|
|
935
|
-
|
|
936
|
-
for i in range(n):
|
|
937
|
-
# exclude 1 record
|
|
938
|
-
records_to_use = df_nona["nrow"]!=i
|
|
939
|
-
# estimate r
|
|
940
|
-
reg_jackknife = ss.linregress(df_nona.loc[records_to_use, x],df_nona.loc[records_to_use,y])
|
|
941
|
-
# add r_i to list
|
|
942
|
-
r_list.append(reg_jackknife[2])
|
|
943
|
-
|
|
944
|
-
# convert list to array
|
|
945
|
-
rs = np.array(r_list)
|
|
946
|
-
# https://en.wikipedia.org/wiki/Jackknife_resampling
|
|
947
|
-
r_se = np.sqrt( (n-1)/n * np.sum((rs - np.mean(rs))**2) )
|
|
948
|
-
return r_se
|
|
949
|
-
|
|
950
|
-
def drop_duplicate_and_na(df,snpid="SNPID",sort_by=False,log=Log(),verbose=True):
|
|
951
|
-
length_before = len(df)
|
|
952
|
-
if sort_by!=False:
|
|
953
|
-
df.sort_values(by = sort_by, inplace=True)
|
|
954
|
-
df.dropna(axis="index",subset=[snpid],inplace=True)
|
|
955
|
-
df.drop_duplicates(subset=[snpid], keep='first', inplace=True)
|
|
956
|
-
length_after= len(df)
|
|
957
|
-
if length_before != length_after:
|
|
958
|
-
log.write(" -Dropped {} duplicates or NAs...".format(length_before - length_after), verbose=verbose)
|
|
959
|
-
return df
|
|
1306
|
+
return sig_list_merged[new_order]
|