gwaslab 3.5.0__py3-none-any.whl → 3.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/__init__.py +1 -0
- gwaslab/bd_get_hapmap3.py +3 -1
- gwaslab/g_Sumstats.py +15 -9
- gwaslab/g_version.py +2 -2
- gwaslab/io_process_args.py +28 -0
- gwaslab/qc_fix_sumstats.py +9 -5
- gwaslab/vis_plot_credible sets.py +0 -0
- gwaslab/viz_aux_annotate_plot.py +8 -0
- gwaslab/viz_aux_property.py +2 -0
- gwaslab/viz_aux_quickfix.py +8 -6
- gwaslab/viz_aux_save_figure.py +2 -1
- gwaslab/viz_plot_compare_effect.py +787 -468
- gwaslab/viz_plot_mqqplot.py +27 -9
- gwaslab/viz_plot_phe_heatmap.py +1 -1
- gwaslab/viz_plot_regional2.py +44 -9
- gwaslab/viz_plot_scatter_with_reg.py +229 -0
- gwaslab/viz_plot_stackedregional.py +1 -1
- gwaslab/viz_plot_trumpetplot.py +1 -1
- {gwaslab-3.5.0.dist-info → gwaslab-3.5.2.dist-info}/METADATA +3 -3
- {gwaslab-3.5.0.dist-info → gwaslab-3.5.2.dist-info}/RECORD +24 -20
- {gwaslab-3.5.0.dist-info → gwaslab-3.5.2.dist-info}/WHEEL +1 -1
- {gwaslab-3.5.0.dist-info → gwaslab-3.5.2.dist-info}/LICENSE +0 -0
- {gwaslab-3.5.0.dist-info → gwaslab-3.5.2.dist-info}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.5.0.dist-info → gwaslab-3.5.2.dist-info}/top_level.txt +0 -0
|
@@ -4,6 +4,7 @@ import matplotlib.pyplot as plt
|
|
|
4
4
|
import scipy.stats as ss
|
|
5
5
|
import seaborn as sns
|
|
6
6
|
import gc
|
|
7
|
+
import math
|
|
7
8
|
import scipy.stats as ss
|
|
8
9
|
from matplotlib.patches import Rectangle
|
|
9
10
|
from adjustText import adjust_text
|
|
@@ -14,7 +15,8 @@ from gwaslab.g_Log import Log
|
|
|
14
15
|
from gwaslab.util_in_correct_winnerscurse import wc_correct
|
|
15
16
|
from gwaslab.util_in_correct_winnerscurse import wc_correct_test
|
|
16
17
|
from gwaslab.g_Sumstats import Sumstats
|
|
17
|
-
|
|
18
|
+
from gwaslab.io_process_args import _merge_and_sync_dic
|
|
19
|
+
from gwaslab.io_process_args import _extract_kwargs
|
|
18
20
|
#20220422
|
|
19
21
|
def compare_effect(path1,
|
|
20
22
|
path2,
|
|
@@ -31,6 +33,7 @@ def compare_effect(path1,
|
|
|
31
33
|
anno_min1=0,
|
|
32
34
|
anno_min2=0,
|
|
33
35
|
anno_diff=0,
|
|
36
|
+
anno_args=None,
|
|
34
37
|
scaled=False,
|
|
35
38
|
scaled1=False,
|
|
36
39
|
scaled2=False,
|
|
@@ -59,24 +62,36 @@ def compare_effect(path1,
|
|
|
59
62
|
plt_args=None,
|
|
60
63
|
xylabel_prefix="Per-allele effect size in ",
|
|
61
64
|
helper_line_args=None,
|
|
65
|
+
adjust_text_kwargs = None,
|
|
66
|
+
adjust_text_kwargs_l = None,
|
|
67
|
+
adjust_text_kwargs_r = None,
|
|
68
|
+
font_args=None,
|
|
62
69
|
fontargs=None,
|
|
63
70
|
build="19",
|
|
64
71
|
r_or_r2="r",
|
|
65
|
-
#
|
|
66
72
|
errargs=None,
|
|
67
73
|
legend_args=None,
|
|
68
74
|
sep=["\t","\t"],
|
|
69
75
|
log = Log(),
|
|
70
76
|
save=False,
|
|
71
77
|
save_args=None,
|
|
72
|
-
verbose=False
|
|
73
|
-
|
|
78
|
+
verbose=False,
|
|
79
|
+
**kwargs):
|
|
80
|
+
|
|
74
81
|
#[snpid,p,ea,nea] ,[effect,se]
|
|
75
82
|
#[snpid,p,ea,nea,chr,pos],[effect,se]
|
|
76
83
|
#[snpid,p,ea,nea,chr,pos],[OR,OR_l,OR_h]
|
|
77
84
|
if scaled == True:
|
|
78
85
|
scaled1 = True
|
|
79
86
|
scaled2 = True
|
|
87
|
+
|
|
88
|
+
if legend_title== r'$ P < 5 x 10^{-8}$ in:' and sig_level!=5e-8:
|
|
89
|
+
|
|
90
|
+
exponent = math.floor(math.log10(sig_level))
|
|
91
|
+
mantissa = sig_level / 10**exponent
|
|
92
|
+
|
|
93
|
+
legend_title = '$ P < {} x 10^{{{}}}$ in:'.format(mantissa, exponent)
|
|
94
|
+
|
|
80
95
|
if is_q_mc=="fdr" or is_q_mc=="bon":
|
|
81
96
|
is_q = True
|
|
82
97
|
if is_q == True:
|
|
@@ -92,6 +107,8 @@ def compare_effect(path1,
|
|
|
92
107
|
get_lead_args = {}
|
|
93
108
|
if anno=="GENENAME":
|
|
94
109
|
get_lead_args["anno"]=True
|
|
110
|
+
if anno_args is None:
|
|
111
|
+
anno_args = {}
|
|
95
112
|
if errargs is None:
|
|
96
113
|
errargs={"ecolor":"#cccccc","elinewidth":1}
|
|
97
114
|
if fontargs is None:
|
|
@@ -106,155 +123,564 @@ def compare_effect(path1,
|
|
|
106
123
|
label = ["Sumstats_1","Sumstats_2","Both","None"]
|
|
107
124
|
if anno_het ==True:
|
|
108
125
|
is_q=True
|
|
126
|
+
|
|
127
|
+
adjust_text_kwargs_r_default = {"autoalign":False,"precision":0.001,"lim":1000,"ha":"left","va":"top","expand_text":(1,1.8),"expand_objects":(0.1,0.1),"expand_points":(1.8,1.8),"force_objects":(0.8,0.8),"arrowprops":dict(arrowstyle='-|>', color='grey')}
|
|
128
|
+
adjust_text_kwargs_l_default = {"autoalign":False,"precision":0.001,"lim":1000,"ha":"right","va":"bottom","expand_text":(1,1.8),"expand_objects":(0.1,0.1),"expand_points":(1.8,1.8),"force_objects":(0.8,0.8),"arrowprops":dict(arrowstyle='-|>', color='grey')}
|
|
129
|
+
|
|
130
|
+
if adjust_text_kwargs_l is None:
|
|
131
|
+
adjust_text_kwargs_l = adjust_text_kwargs_l_default
|
|
132
|
+
else:
|
|
133
|
+
for key, value in adjust_text_kwargs_l_default.items():
|
|
134
|
+
if key not in adjust_text_kwargs_l:
|
|
135
|
+
adjust_text_kwargs_l[key] = value
|
|
136
|
+
|
|
137
|
+
if adjust_text_kwargs_r is None:
|
|
138
|
+
adjust_text_kwargs_r = adjust_text_kwargs_r_default
|
|
139
|
+
else:
|
|
140
|
+
for key, value in adjust_text_kwargs_r_default.items():
|
|
141
|
+
if key not in adjust_text_kwargs_r:
|
|
142
|
+
adjust_text_kwargs_r[key] = value
|
|
143
|
+
|
|
144
|
+
if adjust_text_kwargs is not None:
|
|
145
|
+
for key, value in adjust_text_kwargs.items():
|
|
146
|
+
adjust_text_kwargs_l[key] = value
|
|
147
|
+
adjust_text_kwargs_r[key] = value
|
|
148
|
+
else:
|
|
149
|
+
adjust_text_kwargs = {}
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
save_kwargs = _extract_kwargs("save", save_args, locals())
|
|
153
|
+
anno_kwargs = _extract_kwargs("anno", anno_args, locals())
|
|
154
|
+
err_kwargs = _extract_kwargs("err", errargs, locals())
|
|
155
|
+
plt_kwargs = _extract_kwargs("plt", plt_args, locals())
|
|
156
|
+
scatter_kwargs = _extract_kwargs("scatter", scatterargs, locals())
|
|
157
|
+
font_kwargs = _extract_kwargs("font",fontargs, locals())
|
|
158
|
+
|
|
159
|
+
log.write("Start to process the raw sumstats for plotting...", verbose=verbose)
|
|
160
|
+
|
|
161
|
+
# configure headers
|
|
162
|
+
cols_name_list_1,cols_name_list_2, effect_cols_list_1, effect_cols_list_2 = configure_headers(mode,
|
|
163
|
+
path1,
|
|
164
|
+
path2,
|
|
165
|
+
cols_name_list_1,
|
|
166
|
+
cols_name_list_2,
|
|
167
|
+
effect_cols_list_1,
|
|
168
|
+
effect_cols_list_2,
|
|
169
|
+
scaled1,
|
|
170
|
+
scaled2,
|
|
171
|
+
log,
|
|
172
|
+
verbose)
|
|
173
|
+
|
|
174
|
+
# extract common variants / load sumstats 1
|
|
175
|
+
sumstats, common_snp_set = configure_common_snp_set(path1,path2,
|
|
176
|
+
snplist,
|
|
177
|
+
label,
|
|
178
|
+
cols_name_list_1,
|
|
179
|
+
cols_name_list_2,
|
|
180
|
+
sep,
|
|
181
|
+
scaled1,
|
|
182
|
+
scaled2,
|
|
183
|
+
log,verbose)
|
|
184
|
+
|
|
185
|
+
# rename sumstats headers -> keywords in gwaslab
|
|
186
|
+
sumstats = rename_sumtats(sumstats=sumstats,
|
|
187
|
+
cols_name_list = cols_name_list_1,
|
|
188
|
+
scaled=scaled1,
|
|
189
|
+
snplist=snplist)
|
|
190
|
+
|
|
191
|
+
# exctract only available variants from sumstats1
|
|
192
|
+
sumstats = sumstats.loc[sumstats["SNPID"].isin(common_snp_set),:]
|
|
193
|
+
log.write(" -Using only variants available for both datasets...", verbose=verbose)
|
|
194
|
+
|
|
195
|
+
######### 8 extact SNPs for comparison
|
|
196
|
+
sig_list_1 = extract_snp_for_comparison(sumstats,
|
|
197
|
+
snplist,
|
|
198
|
+
label=label[0],
|
|
199
|
+
get_lead_args=get_lead_args,
|
|
200
|
+
build=build,
|
|
201
|
+
drop=drop,
|
|
202
|
+
anno=anno,
|
|
203
|
+
sig_level=sig_level,
|
|
204
|
+
scaled = scaled1,
|
|
205
|
+
log = log,
|
|
206
|
+
verbose = verbose)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
######### load sumstats1
|
|
210
|
+
|
|
211
|
+
######### 9 extract snplist2
|
|
212
|
+
if snplist is not None:
|
|
213
|
+
cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1]]
|
|
214
|
+
else:
|
|
215
|
+
cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1],cols_name_list_2[4],cols_name_list_2[5]]
|
|
216
|
+
|
|
217
|
+
sumstats = load_sumstats(path=path2,
|
|
218
|
+
usecols=cols_to_extract,
|
|
219
|
+
label=label[1],
|
|
220
|
+
log=log,
|
|
221
|
+
verbose= verbose,
|
|
222
|
+
sep=sep[1])
|
|
223
|
+
gc.collect()
|
|
224
|
+
|
|
225
|
+
#if scaled2==True:
|
|
226
|
+
# sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
|
|
227
|
+
|
|
228
|
+
sumstats = rename_sumtats(sumstats=sumstats,
|
|
229
|
+
cols_name_list = cols_name_list_2,
|
|
230
|
+
scaled=scaled2,
|
|
231
|
+
snplist=snplist)
|
|
232
|
+
######### 11 exctract only overlapping variants from sumstats2
|
|
233
|
+
sumstats = sumstats.loc[sumstats["SNPID"].isin(common_snp_set),:]
|
|
234
|
+
sig_list_2 = extract_snp_for_comparison(sumstats,
|
|
235
|
+
snplist,
|
|
236
|
+
label=label[1],
|
|
237
|
+
get_lead_args=get_lead_args,
|
|
238
|
+
build=build,
|
|
239
|
+
drop=drop,
|
|
240
|
+
anno=anno,
|
|
241
|
+
sig_level=sig_level,
|
|
242
|
+
scaled = scaled2,
|
|
243
|
+
log = log,
|
|
244
|
+
verbose = verbose)
|
|
245
|
+
|
|
246
|
+
######### 13 Merge two list using SNPID
|
|
247
|
+
sig_list_merged = merge_list(sig_list_1,
|
|
248
|
+
sig_list_2,
|
|
249
|
+
anno = anno,
|
|
250
|
+
labels=label,
|
|
251
|
+
log=log,
|
|
252
|
+
verbose=verbose)
|
|
253
|
+
|
|
254
|
+
###############################################################################
|
|
255
|
+
cols_to_extract = configure_cols_to_extract(mode=mode,
|
|
256
|
+
cols_name_list = cols_name_list_1,
|
|
257
|
+
effect_cols_list= effect_cols_list_1,
|
|
258
|
+
eaf = eaf)
|
|
259
|
+
sumstats = load_sumstats(path=path1,
|
|
260
|
+
usecols=cols_to_extract,
|
|
261
|
+
label=label[0],
|
|
262
|
+
log=log,
|
|
263
|
+
verbose= verbose,
|
|
264
|
+
sep=sep[0])
|
|
265
|
+
|
|
266
|
+
#if scaled1==True:
|
|
267
|
+
# sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
|
|
268
|
+
sumstats = rename_sumstats_full(mode, sumstats,
|
|
269
|
+
index=1,
|
|
270
|
+
cols_name_list = cols_name_list_1,
|
|
271
|
+
effect_cols_list = effect_cols_list_1,
|
|
272
|
+
eaf = eaf,
|
|
273
|
+
drop = drop,
|
|
274
|
+
scaled=scaled1,
|
|
275
|
+
log=log, verbose=verbose)
|
|
276
|
+
|
|
277
|
+
log.write(" -Merging "+label[0]+" effect information...", verbose=verbose)
|
|
278
|
+
sig_list_merged = pd.merge(sig_list_merged,sumstats,
|
|
279
|
+
left_on="SNPID",right_on="SNPID",
|
|
280
|
+
how="left")
|
|
281
|
+
|
|
282
|
+
############ 15 merging sumstats2
|
|
283
|
+
cols_to_extract = configure_cols_to_extract(mode=mode,
|
|
284
|
+
cols_name_list = cols_name_list_2,
|
|
285
|
+
effect_cols_list= effect_cols_list_2,
|
|
286
|
+
eaf = eaf)
|
|
287
|
+
|
|
288
|
+
sumstats = load_sumstats(path=path2,
|
|
289
|
+
usecols=cols_to_extract,
|
|
290
|
+
label=label[1],
|
|
291
|
+
log=log,
|
|
292
|
+
verbose= verbose,
|
|
293
|
+
sep=sep[1])
|
|
294
|
+
|
|
295
|
+
#if scaled2==True:
|
|
296
|
+
# sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
|
|
297
|
+
|
|
298
|
+
gc.collect()
|
|
299
|
+
|
|
300
|
+
sumstats = rename_sumstats_full(mode, sumstats,
|
|
301
|
+
index=2,
|
|
302
|
+
cols_name_list = cols_name_list_2,
|
|
303
|
+
effect_cols_list = effect_cols_list_2,
|
|
304
|
+
eaf = eaf,
|
|
305
|
+
drop = drop,
|
|
306
|
+
scaled=scaled2,
|
|
307
|
+
log=log, verbose=verbose)
|
|
308
|
+
|
|
309
|
+
log.write(" -Merging "+label[1]+" effect information...", verbose=verbose)
|
|
310
|
+
sig_list_merged = pd.merge(sig_list_merged,sumstats,
|
|
311
|
+
left_on="SNPID",right_on="SNPID",
|
|
312
|
+
how="left")
|
|
313
|
+
|
|
314
|
+
sig_list_merged.set_index("SNPID",inplace=True)
|
|
315
|
+
|
|
316
|
+
################ 16 update sumstats1
|
|
317
|
+
|
|
318
|
+
sig_list_merged = update_stats(sig_list_merged = sig_list_merged,
|
|
319
|
+
path = path1,
|
|
320
|
+
cols_name_list = cols_name_list_1,
|
|
321
|
+
index=1,
|
|
322
|
+
sep=sep[0],
|
|
323
|
+
snplist = snplist,
|
|
324
|
+
label=label[0],
|
|
325
|
+
drop = drop,
|
|
326
|
+
scaled=scaled1,
|
|
327
|
+
log=log,
|
|
328
|
+
verbose = verbose)
|
|
329
|
+
|
|
330
|
+
################# 17 update sumstats2
|
|
331
|
+
sig_list_merged = update_stats(sig_list_merged = sig_list_merged,
|
|
332
|
+
path = path2,
|
|
333
|
+
cols_name_list = cols_name_list_2,
|
|
334
|
+
index=2,
|
|
335
|
+
sep=sep[1],
|
|
336
|
+
snplist = snplist,
|
|
337
|
+
label=label[1],
|
|
338
|
+
drop = drop,
|
|
339
|
+
scaled=scaled2,
|
|
340
|
+
log=log,
|
|
341
|
+
verbose = verbose)
|
|
342
|
+
|
|
343
|
+
#if scaled1 ==True :
|
|
344
|
+
# log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
|
|
345
|
+
# sig_list_merged["P_1"] = np.power(10,-sig_list_merged["P_1"])
|
|
346
|
+
#if scaled2 ==True :
|
|
347
|
+
# log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
|
|
348
|
+
# sig_list_merged["P_2"] = np.power(10,-sig_list_merged["P_2"])
|
|
349
|
+
|
|
350
|
+
#################################################################################
|
|
351
|
+
sig_list_merged = assign_indicator(sig_list_merged, snplist, sig_level, scaled1, scaled2, log, verbose)
|
|
352
|
+
|
|
353
|
+
sig_list_merged = align_alleles(sig_list_merged, label, mode, eaf, log, verbose)
|
|
354
|
+
|
|
355
|
+
sig_list_merged = check_allele_match(sig_list_merged, allele_match, label, log,verbose)
|
|
356
|
+
|
|
357
|
+
sig_list_merged = filter_by_maf(sig_list_merged, eaf, maf_level, log, verbose)
|
|
358
|
+
|
|
359
|
+
if fdr==True and scaled==False:
|
|
360
|
+
log.write(" -Using FDR...", verbose=verbose)
|
|
361
|
+
#sig_list_merged["P_1"] = fdrcorrection(sig_list_merged["P_1"])[1]
|
|
362
|
+
#sig_list_merged["P_2"] = fdrcorrection(sig_list_merged["P_2"])[1]
|
|
363
|
+
sig_list_merged["P_1"] =ss.false_discovery_control(sig_list_merged["P_1"])
|
|
364
|
+
sig_list_merged["P_2"] =ss.false_discovery_control(sig_list_merged["P_2"])
|
|
109
365
|
|
|
110
|
-
|
|
366
|
+
####################################################################################################################################
|
|
367
|
+
## winner's curse correction using aligned beta
|
|
368
|
+
sig_list_merged = winnerscurse_correction(sig_list_merged, mode, wc_correction, sig_level,scaled1, scaled2, log, verbose)
|
|
369
|
+
|
|
370
|
+
########################## Het test############################################################
|
|
371
|
+
## heterogeneity test
|
|
372
|
+
if (is_q == True):
|
|
373
|
+
log.write(" -Calculating Cochran's Q statistics and peform chisq test...", verbose=verbose)
|
|
374
|
+
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
375
|
+
sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
|
|
376
|
+
else:
|
|
377
|
+
sig_list_merged = test_q(sig_list_merged,"BETA_1","SE_1","BETA_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
|
|
378
|
+
|
|
379
|
+
# heterogeneity summary
|
|
380
|
+
log.write(" -Significant het:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:]), verbose=verbose)
|
|
381
|
+
log.write(" -All sig:" ,len(sig_list_merged), verbose=verbose)
|
|
382
|
+
log.write(" -Het rate:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:])/len(sig_list_merged), verbose=verbose)
|
|
383
|
+
|
|
384
|
+
######################### save ###############################################################
|
|
385
|
+
## save the merged data
|
|
386
|
+
save_path = label[0]+"_"+label[1]+"_beta_sig_list_merged.tsv"
|
|
387
|
+
log.write(" -Saving the merged data to:",save_path, verbose=verbose)
|
|
388
|
+
sig_list_merged = reorder_columns(sig_list_merged)
|
|
389
|
+
sig_list_merged.to_csv(save_path,sep="\t")
|
|
390
|
+
|
|
391
|
+
# extract group
|
|
392
|
+
if include_all==True:
|
|
393
|
+
sum0 = sig_list_merged.loc[sig_list_merged["indicator"]==0,:].dropna(axis=0)
|
|
394
|
+
else:
|
|
395
|
+
sum0 = pd.DataFrame()
|
|
396
|
+
|
|
397
|
+
sum1only = sig_list_merged.loc[sig_list_merged["indicator"]==1,:].copy()
|
|
398
|
+
sum2only = sig_list_merged.loc[sig_list_merged["indicator"]==2,:].copy()
|
|
399
|
+
both = sig_list_merged.loc[sig_list_merged["indicator"]==3,:].copy()
|
|
400
|
+
|
|
401
|
+
if is_q==False:
|
|
402
|
+
sum0["Edge_color"]="none"
|
|
403
|
+
sum1only["Edge_color"]="none"
|
|
404
|
+
sum2only["Edge_color"]="none"
|
|
405
|
+
both["Edge_color"]="none"
|
|
406
|
+
|
|
407
|
+
log.write(" -Identified "+str(len(sum0)) + " variants which are not significant in " + label[3]+".", verbose=verbose)
|
|
408
|
+
log.write(" -Identified "+str(len(sum1only)) + " variants which are only significant in " + label[0]+".", verbose=verbose)
|
|
409
|
+
log.write(" -Identified "+str(len(sum2only)) + " variants which are only significant in " + label[1]+".", verbose=verbose)
|
|
410
|
+
log.write(" -Identified "+str(len(both)) + " variants which are significant in " + label[2] + ".", verbose=verbose)
|
|
411
|
+
|
|
412
|
+
##plot########################################################################################
|
|
413
|
+
log.write("Creating the scatter plot for effect sizes comparison...", verbose=verbose)
|
|
414
|
+
#plt.style.use("ggplot")
|
|
415
|
+
sns.set_style("ticks")
|
|
416
|
+
fig,ax = plt.subplots(**plt_kwargs)
|
|
417
|
+
legend_elements=[]
|
|
418
|
+
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
419
|
+
if len(sum0)>0:
|
|
420
|
+
ax.errorbar(sum0["EFFECT_1"],sum0["EFFECT_2_aligned"], xerr=sum0["SE_1"],yerr=sum0["SE_2"],
|
|
421
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
422
|
+
|
|
423
|
+
ax.scatter(sum0["EFFECT_1"],sum0["EFFECT_2_aligned"],label=label[3],zorder=2,color="#cccccc",edgecolors=sum0["Edge_color"],marker=".",**scatter_kwargs)
|
|
424
|
+
#legend_elements.append(mpatches.Circle(facecolor='#cccccc', edgecolor='white', label=label[3]))
|
|
425
|
+
legend_elements.append(label[3])
|
|
426
|
+
if len(sum1only)>0:
|
|
427
|
+
ax.errorbar(sum1only["EFFECT_1"],sum1only["EFFECT_2_aligned"], xerr=sum1only["SE_1"],yerr=sum1only["SE_2"],
|
|
428
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
429
|
+
ax.scatter(sum1only["EFFECT_1"],sum1only["EFFECT_2_aligned"],label=label[0],zorder=2,color="#e6320e",edgecolors=sum1only["Edge_color"],marker="^",**scatter_kwargs)
|
|
430
|
+
#legend_elements.append(mpatches.Patch(facecolor='#e6320e', edgecolor='white', label=label[0]))
|
|
431
|
+
legend_elements.append(label[0])
|
|
432
|
+
if len(sum2only)>0:
|
|
433
|
+
ax.errorbar(sum2only["EFFECT_1"],sum2only["EFFECT_2_aligned"], xerr=sum2only["SE_1"],yerr=sum2only["SE_2"],
|
|
434
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
435
|
+
ax.scatter(sum2only["EFFECT_1"],sum2only["EFFECT_2_aligned"],label=label[1],zorder=2,color="#41e620",edgecolors=sum2only["Edge_color"],marker="o",**scatter_kwargs)
|
|
436
|
+
#legend_elements.append(mpatches.Circle(facecolor='#41e620', edgecolor='white', label=label[1]))
|
|
437
|
+
legend_elements.append(label[1])
|
|
438
|
+
if len(both)>0:
|
|
439
|
+
ax.errorbar(both["EFFECT_1"],both["EFFECT_2_aligned"], xerr=both["SE_1"],yerr=both["SE_2"],
|
|
440
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
441
|
+
ax.scatter(both["EFFECT_1"],both["EFFECT_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**scatter_kwargs)
|
|
442
|
+
#legend_elements.append(mpatches.Patch(facecolor='#205be6', edgecolor='white', label=label[2]))
|
|
443
|
+
legend_elements.append(label[2])
|
|
444
|
+
else:
|
|
445
|
+
## if OR
|
|
446
|
+
if len(sum0)>0:
|
|
447
|
+
ax.errorbar(sum0["OR_1"],sum0["OR_2_aligned"], xerr=sum0[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum0[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
|
|
448
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
449
|
+
ax.scatter(sum0["OR_1"],sum0["OR_2_aligned"],label=label[3],zorder=2,color="#cccccc",edgecolors=sum0["Edge_color"],marker=".",**scatter_kwargs)
|
|
450
|
+
legend_elements.append(label[3])
|
|
451
|
+
if len(sum1only)>0:
|
|
452
|
+
ax.errorbar(sum1only["OR_1"],sum1only["OR_2_aligned"], xerr=sum1only[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum1only[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
|
|
453
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
454
|
+
ax.scatter(sum1only["OR_1"],sum1only["OR_2_aligned"],label=label[0],zorder=2,color="#e6320e",edgecolors=sum1only["Edge_color"],marker="^",**scatter_kwargs)
|
|
455
|
+
legend_elements.append(label[0])
|
|
456
|
+
if len(sum2only)>0:
|
|
457
|
+
ax.errorbar(sum2only["OR_1"],sum2only["OR_2_aligned"], xerr=sum2only[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum2only[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
|
|
458
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
459
|
+
ax.scatter(sum2only["OR_1"],sum2only["OR_2_aligned"],label=label[1],zorder=2,color="#41e620",edgecolors=sum2only["Edge_color"],marker="o",**scatter_kwargs)
|
|
460
|
+
legend_elements.append(label[1])
|
|
461
|
+
if len(both)>0:
|
|
462
|
+
ax.errorbar(both["OR_1"],both["OR_2_aligned"], xerr=both[["OR_L_1_err","OR_H_1_err"]].T,yerr=both[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
|
|
463
|
+
linewidth=0,zorder=1,**err_kwargs)
|
|
464
|
+
ax.scatter(both["OR_1"],both["OR_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**scatter_kwargs)
|
|
465
|
+
legend_elements.append(label[2])
|
|
466
|
+
## annotation #################################################################################################################
|
|
467
|
+
ax = scatter_annotation(ax, sig_list_merged,anno, anno_het, is_q, mode,
|
|
468
|
+
anno_min,anno_min1,anno_min2,anno_diff,anno_kwargs,adjust_text_kwargs_l,adjust_text_kwargs_r,
|
|
469
|
+
log,verbose
|
|
470
|
+
)
|
|
471
|
+
#################################################################################################################################
|
|
472
|
+
|
|
473
|
+
# plot x=0,y=0, and a 45 degree line
|
|
474
|
+
xl,xh=ax.get_xlim()
|
|
475
|
+
yl,yh=ax.get_ylim()
|
|
476
|
+
|
|
477
|
+
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
478
|
+
#if using beta
|
|
479
|
+
ax.axhline(y=0, zorder=1,**helper_line_args)
|
|
480
|
+
ax.axvline(x=0, zorder=1,**helper_line_args)
|
|
481
|
+
else:
|
|
482
|
+
#if using OR
|
|
483
|
+
ax.axhline(y=1, zorder=1,**helper_line_args)
|
|
484
|
+
ax.axvline(x=1, zorder=1,**helper_line_args)
|
|
485
|
+
|
|
486
|
+
for spine in ['top', 'right']:
|
|
487
|
+
ax.spines[spine].set_visible(False)
|
|
488
|
+
|
|
489
|
+
###regression line##############################################################################################################################
|
|
490
|
+
ax = confire_regression_line(is_reg,reg_box, sig_list_merged, ax, mode,xl,yl,xh,yh, null_beta, r_se,
|
|
491
|
+
is_45_helper_line,helper_line_args, font_kwargs,
|
|
492
|
+
log, verbose)
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
ax.set_xlabel(xylabel_prefix+label[0],**font_kwargs)
|
|
496
|
+
ax.set_ylabel(xylabel_prefix+label[1],**font_kwargs)
|
|
497
|
+
|
|
498
|
+
ax = configure_legend(fig, ax, legend_mode, is_q, is_q_mc, legend_elements, legend_pos, q_level,
|
|
499
|
+
font_kwargs,scatterargs,legend_args,
|
|
500
|
+
legend_title, legend_title2 )
|
|
501
|
+
##plot finished########################################################################################
|
|
502
|
+
gc.collect()
|
|
503
|
+
|
|
504
|
+
save_figure(fig, save, keyword="esc",save_args=save_kwargs, log=log, verbose=verbose)
|
|
505
|
+
|
|
506
|
+
sig_list_merged = reorder_columns(sig_list_merged)
|
|
507
|
+
|
|
508
|
+
return [sig_list_merged, fig,log]
|
|
509
|
+
|
|
510
|
+
###############################################################################################
|
|
511
|
+
###############################################################################################
|
|
512
|
+
###############################################################################################
|
|
513
|
+
###############################################################################################
|
|
514
|
+
###############################################################################################
|
|
515
|
+
###############################################################################################
|
|
516
|
+
###############################################################################################
|
|
517
|
+
###############################################################################################
|
|
518
|
+
###############################################################################################
|
|
519
|
+
###############################################################################################
|
|
520
|
+
###############################################################################################
|
|
521
|
+
###############################################################################################
|
|
522
|
+
###############################################################################################
|
|
523
|
+
|
|
524
|
+
def load_sumstats(path, usecols, label, log, verbose, sep):
|
|
525
|
+
if type(usecols) is not list:
|
|
526
|
+
usecols = [usecols]
|
|
527
|
+
|
|
528
|
+
log.write(" -Loading sumstats for {} : {}".format(label,",".join(usecols)), verbose=verbose)
|
|
529
|
+
#log.write(" -Loading {} SNP list in memory...".format(label), verbose=verbose)
|
|
530
|
+
|
|
531
|
+
if type(path) is Sumstats:
|
|
532
|
+
sumstats = path.data.loc[:,usecols].copy()
|
|
533
|
+
elif type(path) is pd.DataFrame:
|
|
534
|
+
sumstats = path.loc[:,usecols].copy()
|
|
535
|
+
else:
|
|
536
|
+
sumstats=pd.read_table(path,sep=sep,usecols=usecols)
|
|
537
|
+
return sumstats
|
|
538
|
+
|
|
539
|
+
def configure_headers(mode,
|
|
540
|
+
path1,
|
|
541
|
+
path2,
|
|
542
|
+
cols_name_list_1,
|
|
543
|
+
cols_name_list_2,
|
|
544
|
+
effect_cols_list_1,
|
|
545
|
+
effect_cols_list_2,
|
|
546
|
+
scaled1,
|
|
547
|
+
scaled2,
|
|
548
|
+
log,
|
|
549
|
+
verbose):
|
|
111
550
|
|
|
112
|
-
######### 1 check the value used to plot
|
|
113
551
|
if mode not in ["Beta","beta","BETA","OR","or"]:
|
|
114
552
|
raise ValueError("Please input Beta or OR")
|
|
115
553
|
|
|
116
554
|
if type(path1) is Sumstats:
|
|
117
|
-
log.write("Path1 is gwaslab Sumstats object...")
|
|
555
|
+
log.write("Path1 is gwaslab Sumstats object...", verbose=verbose)
|
|
118
556
|
if cols_name_list_1 is None:
|
|
119
557
|
cols_name_list_1 = ["SNPID","P","EA","NEA","CHR","POS"]
|
|
558
|
+
if scaled1==True:
|
|
559
|
+
cols_name_list_1 = ["SNPID","MLOG10P","EA","NEA","CHR","POS"]
|
|
120
560
|
if effect_cols_list_1 is None:
|
|
121
561
|
if mode=="beta":
|
|
122
562
|
effect_cols_list_1 = ["BETA","SE"]
|
|
123
563
|
else:
|
|
124
564
|
effect_cols_list_1 = ["OR","OR_95L","OR_95U"]
|
|
125
565
|
elif type(path1) is pd.DataFrame:
|
|
126
|
-
log.write("Path1 is pandas DataFrame object...")
|
|
566
|
+
log.write("Path1 is pandas DataFrame object...", verbose=verbose)
|
|
127
567
|
|
|
128
568
|
if type(path2) is Sumstats:
|
|
129
|
-
log.write("Path2 is gwaslab Sumstats object...")
|
|
569
|
+
log.write("Path2 is gwaslab Sumstats object...", verbose=verbose)
|
|
130
570
|
if cols_name_list_2 is None:
|
|
131
571
|
cols_name_list_2 = ["SNPID","P","EA","NEA","CHR","POS"]
|
|
572
|
+
if scaled2==True:
|
|
573
|
+
cols_name_list_2 = ["SNPID","MLOG10P","EA","NEA","CHR","POS"]
|
|
132
574
|
if effect_cols_list_2 is None:
|
|
133
575
|
if mode=="beta":
|
|
134
576
|
effect_cols_list_2 = ["BETA","SE"]
|
|
135
577
|
else:
|
|
136
578
|
effect_cols_list_2 = ["OR","OR_95L","OR_95U"]
|
|
137
579
|
elif type(path2) is pd.DataFrame:
|
|
138
|
-
log.write("Path2 is pandas DataFrame object...")
|
|
580
|
+
log.write("Path2 is pandas DataFrame object...", verbose=verbose)
|
|
581
|
+
|
|
582
|
+
return cols_name_list_1,cols_name_list_2, effect_cols_list_1, effect_cols_list_2
|
|
583
|
+
|
|
584
|
+
def configure_common_snp_set(path1,path2,
|
|
585
|
+
snplist,
|
|
586
|
+
label,
|
|
587
|
+
cols_name_list_1,cols_name_list_2,
|
|
588
|
+
sep,
|
|
589
|
+
scaled1,
|
|
590
|
+
scaled2,
|
|
591
|
+
log,verbose):
|
|
139
592
|
|
|
140
|
-
#########
|
|
141
|
-
|
|
593
|
+
######### load sumstats2
|
|
594
|
+
sumstats = load_sumstats(path=path2,
|
|
595
|
+
usecols=cols_name_list_2[0],
|
|
596
|
+
label=label[1],
|
|
597
|
+
log=log,
|
|
598
|
+
verbose= verbose,
|
|
599
|
+
sep=sep[1])
|
|
142
600
|
|
|
143
|
-
if type(path2) is Sumstats:
|
|
144
|
-
sumstats = path2.data[[cols_name_list_2[0]]].copy()
|
|
145
|
-
elif type(path2) is pd.DataFrame:
|
|
146
|
-
sumstats = path2[[cols_name_list_2[0]]].copy()
|
|
147
|
-
else:
|
|
148
|
-
sumstats=pd.read_table(path2,sep=sep[1],usecols=[cols_name_list_2[0]])
|
|
149
|
-
|
|
150
601
|
common_snp_set=set(sumstats[cols_name_list_2[0]].values)
|
|
151
602
|
|
|
152
|
-
#########
|
|
603
|
+
######### extract snplist1
|
|
153
604
|
if snplist is not None:
|
|
605
|
+
#use only SNPID, P
|
|
154
606
|
cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1]]
|
|
155
607
|
else:
|
|
608
|
+
# use SNPID, P, chr pos
|
|
156
609
|
cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1],cols_name_list_1[4],cols_name_list_1[5]]
|
|
157
610
|
|
|
158
|
-
#########
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
else:
|
|
166
|
-
sumstats = pd.read_table(path1,sep=sep[0],usecols=cols_to_extract)
|
|
611
|
+
######### load sumstats1
|
|
612
|
+
sumstats = load_sumstats(path=path1,
|
|
613
|
+
usecols=cols_to_extract,
|
|
614
|
+
label=label[0],
|
|
615
|
+
log=log,
|
|
616
|
+
verbose= verbose,
|
|
617
|
+
sep=sep[0])
|
|
167
618
|
|
|
168
619
|
gc.collect()
|
|
169
620
|
|
|
170
|
-
if scaled1==True:
|
|
171
|
-
|
|
621
|
+
#if scaled1==True:
|
|
622
|
+
# sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
|
|
172
623
|
######### 5 extract the common set
|
|
624
|
+
|
|
173
625
|
common_snp_set = common_snp_set.intersection(sumstats[cols_name_list_1[0]].values)
|
|
174
|
-
log.write(" -Counting variants available for both datasets:",len(common_snp_set)," variants...")
|
|
175
626
|
|
|
627
|
+
log.write(" -Counting variants available for both datasets:",len(common_snp_set)," variants...", verbose=verbose)
|
|
628
|
+
|
|
629
|
+
return sumstats, common_snp_set
|
|
630
|
+
|
|
631
|
+
def rename_sumtats(sumstats, cols_name_list, snplist, scaled,suffix=""):
|
|
176
632
|
######### 6 rename the sumstats
|
|
177
|
-
rename_dict = {
|
|
178
|
-
|
|
633
|
+
rename_dict = { cols_name_list[0]:"SNPID",
|
|
634
|
+
cols_name_list[1]:"P{}".format(suffix),
|
|
179
635
|
}
|
|
636
|
+
if scaled==True:
|
|
637
|
+
rename_dict[cols_name_list[1]] = "MLOG10P{}".format(suffix)
|
|
180
638
|
|
|
181
639
|
if snplist is None:
|
|
182
|
-
rename_dict[
|
|
183
|
-
rename_dict[
|
|
184
|
-
|
|
185
|
-
sumstats.rename(columns=rename_dict,inplace=True)
|
|
640
|
+
rename_dict[cols_name_list[4]]="CHR"
|
|
641
|
+
rename_dict[cols_name_list[5]]="POS"
|
|
186
642
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
643
|
+
sumstats = sumstats.rename(columns=rename_dict)
|
|
644
|
+
return sumstats
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
def extract_snp_for_comparison(sumstats, snplist, label,
|
|
648
|
+
get_lead_args, build, drop, anno,
|
|
649
|
+
sig_level,scaled, log, verbose):
|
|
191
650
|
######### 8 extact SNPs for comparison
|
|
192
|
-
|
|
193
651
|
if snplist is not None:
|
|
194
652
|
######### 8.1 if a snplist is provided, use the snp list
|
|
195
|
-
log.write(" -Extract variants in the given list from "+label
|
|
196
|
-
|
|
653
|
+
log.write(" -Extract variants in the given list from "+label+"...")
|
|
654
|
+
sig_list = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
|
|
197
655
|
if anno=="GENENAME":
|
|
198
|
-
|
|
656
|
+
sig_list = annogene(sig_list,"SNPID","CHR","POS", build=build, verbose=verbose, **get_lead_args)
|
|
199
657
|
else:
|
|
200
658
|
######### 8,2 otherwise use the automatically detected lead SNPs
|
|
201
|
-
log.write(" -Extract lead variants from "+label
|
|
202
|
-
|
|
659
|
+
log.write(" -Extract lead variants from "+label +"...", verbose=verbose)
|
|
660
|
+
sig_list = getsig(sumstats,"SNPID","CHR","POS","P","MLOG10P", build=build, verbose=verbose,sig_level=sig_level,**get_lead_args)
|
|
203
661
|
|
|
204
662
|
if drop==True:
|
|
205
|
-
|
|
663
|
+
if scaled==True:
|
|
664
|
+
sig_list = drop_duplicate_and_na(sig_list, sort_by="MLOG10P",ascending=False, log=log , verbose=verbose)
|
|
665
|
+
else:
|
|
666
|
+
sig_list = drop_duplicate_and_na(sig_list, sort_by="P", ascending=True, log=log , verbose=verbose)
|
|
206
667
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
else:
|
|
211
|
-
cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1],cols_name_list_2[4],cols_name_list_2[5]]
|
|
212
|
-
|
|
213
|
-
log.write(" -Loading sumstats for "+label[1]+":",",".join(cols_to_extract))
|
|
214
|
-
|
|
215
|
-
if type(path2) is Sumstats:
|
|
216
|
-
sumstats = path2.data[cols_to_extract].copy()
|
|
217
|
-
elif type(path2) is pd.DataFrame:
|
|
218
|
-
sumstats = path2[cols_to_extract].copy()
|
|
219
|
-
else:
|
|
220
|
-
sumstats = pd.read_table(path2,sep=sep[1],usecols=cols_to_extract)
|
|
221
|
-
|
|
222
|
-
gc.collect()
|
|
223
|
-
|
|
224
|
-
if scaled2==True:
|
|
225
|
-
sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
|
|
226
|
-
######### 10 rename sumstats2
|
|
227
|
-
rename_dict = { cols_name_list_2[0]:"SNPID",
|
|
228
|
-
cols_name_list_2[1]:"P",
|
|
229
|
-
}
|
|
230
|
-
if snplist is None:
|
|
231
|
-
rename_dict[cols_name_list_2[4]]="CHR"
|
|
232
|
-
rename_dict[cols_name_list_2[5]]="POS"
|
|
233
|
-
sumstats.rename(columns=rename_dict,inplace=True)
|
|
668
|
+
return sig_list
|
|
669
|
+
|
|
670
|
+
def merge_list(sig_list_1, sig_list_2, anno,labels,log, verbose):
|
|
234
671
|
|
|
235
|
-
|
|
236
|
-
sumstats = sumstats.loc[sumstats["SNPID"].isin(common_snp_set),:]
|
|
672
|
+
log.write("Merging snps from "+labels[0]+" and "+labels[1]+"...", verbose=verbose)
|
|
237
673
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
sig_list_2
|
|
245
|
-
else:
|
|
246
|
-
log.write(" -Extract lead snps from "+label[1]+"...")
|
|
247
|
-
######### 12.2 otherwise use the sutomatically detected lead SNPs
|
|
248
|
-
sig_list_2 = getsig(sumstats,"SNPID","CHR","POS","P",build=build,
|
|
249
|
-
verbose=verbose,sig_level=sig_level,**get_lead_args)
|
|
250
|
-
if drop==True:
|
|
251
|
-
sig_list_2 = drop_duplicate_and_na(sig_list_2, sort_by="P", log=log ,verbose=verbose)
|
|
674
|
+
if anno == "GENENAME":
|
|
675
|
+
if "GENE" not in sig_list_1.columns:
|
|
676
|
+
sig_list_1["GENE"]=pd.NA
|
|
677
|
+
sig_list_1["LOCATION"]=pd.NA
|
|
678
|
+
if "GENE" not in sig_list_2.columns:
|
|
679
|
+
sig_list_2["GENE"]=pd.NA
|
|
680
|
+
sig_list_2["LOCATION"]=pd.NA
|
|
252
681
|
|
|
253
|
-
######### 13 Merge two list using SNPID
|
|
254
|
-
##############################################################################
|
|
255
|
-
log.write("Merging snps from "+label[0]+" and "+label[1]+"...")
|
|
256
|
-
|
|
257
682
|
sig_list_merged = pd.merge(sig_list_1,sig_list_2,left_on="SNPID",right_on="SNPID",how="outer",suffixes=('_1', '_2'))
|
|
683
|
+
|
|
258
684
|
if anno == "GENENAME":
|
|
259
685
|
sig_list_merged.loc[sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENENAME"] = sig_list_merged.loc[sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENE_1"]
|
|
260
686
|
sig_list_merged.loc[~sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENENAME"] = sig_list_merged.loc[~sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENE_2"]
|
|
@@ -262,173 +688,109 @@ def compare_effect(path1,
|
|
|
262
688
|
# SNPID P_1 P_2
|
|
263
689
|
#0 rs117986209 0.142569 0.394455
|
|
264
690
|
#1 rs6704312 0.652104 0.143750
|
|
691
|
+
return sig_list_merged
|
|
265
692
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
693
|
+
def configure_cols_to_extract(mode,
|
|
694
|
+
cols_name_list,
|
|
695
|
+
effect_cols_list,
|
|
696
|
+
eaf):
|
|
269
697
|
|
|
270
698
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
271
|
-
|
|
699
|
+
#[snpid,p,ea,nea] ,[effect,se]
|
|
272
700
|
#[snpid,p,ea,nea,chr,pos],[effect,se]
|
|
273
701
|
#[snpid,p,ea,nea,chr,pos],[OR,OR_l,OR_h]
|
|
274
|
-
cols_to_extract = [
|
|
702
|
+
cols_to_extract = [cols_name_list[0],cols_name_list[1], cols_name_list[2],cols_name_list[3], effect_cols_list[0], effect_cols_list[1]]
|
|
275
703
|
else:
|
|
276
|
-
cols_to_extract = [
|
|
277
|
-
|
|
278
|
-
if len(eaf)>0: cols_to_extract.append(eaf[0])
|
|
279
|
-
log.write(" -Extract statistics of selected variants from "+label[0]+" : ",",".join(cols_to_extract) )
|
|
704
|
+
cols_to_extract = [cols_name_list[0],cols_name_list[1], cols_name_list[2],cols_name_list[3], effect_cols_list[0], effect_cols_list[1], effect_cols_list[2]]
|
|
280
705
|
|
|
281
|
-
if
|
|
282
|
-
|
|
283
|
-
elif type(path1) is pd.DataFrame:
|
|
284
|
-
sumstats = path1[cols_to_extract].copy()
|
|
285
|
-
else:
|
|
286
|
-
sumstats = pd.read_table(path1,sep=sep[0],usecols=cols_to_extract)
|
|
706
|
+
if len(eaf)>0:
|
|
707
|
+
cols_to_extract.append(eaf[0])
|
|
287
708
|
|
|
288
|
-
|
|
289
|
-
sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
|
|
709
|
+
return cols_to_extract
|
|
290
710
|
|
|
711
|
+
def rename_sumstats_full(mode, sumstats, cols_name_list, effect_cols_list, eaf, drop, index, scaled, log, verbose):
|
|
291
712
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
292
|
-
rename_dict = {
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
713
|
+
rename_dict = { cols_name_list[0]:"SNPID",
|
|
714
|
+
cols_name_list[1]:"P_{}".format(index),
|
|
715
|
+
cols_name_list[2]:"EA_{}".format(index),
|
|
716
|
+
cols_name_list[3]:"NEA_{}".format(index),
|
|
717
|
+
effect_cols_list[0]:"EFFECT_{}".format(index),
|
|
718
|
+
effect_cols_list[1]:"SE_{}".format(index)}
|
|
719
|
+
|
|
299
720
|
|
|
300
721
|
else:
|
|
301
722
|
# if or
|
|
302
|
-
rename_dict = {
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
723
|
+
rename_dict = { cols_name_list[0]:"SNPID",
|
|
724
|
+
cols_name_list[1]:"P_{}".format(index),
|
|
725
|
+
cols_name_list[2]:"EA_{}".format(index),
|
|
726
|
+
cols_name_list[3]:"NEA_{}".format(index),
|
|
727
|
+
effect_cols_list[0]:"OR_{}".format(index),
|
|
728
|
+
effect_cols_list[1]:"OR_L_{}".format(index),
|
|
729
|
+
effect_cols_list[2]:"OR_H_{}".format(index)}
|
|
730
|
+
if scaled==True:
|
|
731
|
+
rename_dict[cols_name_list[1]]="MLOG10P_{}".format(index)
|
|
310
732
|
## check if eaf column is provided.
|
|
311
|
-
if len(eaf)>0:
|
|
312
|
-
|
|
733
|
+
if len(eaf)>0:
|
|
734
|
+
rename_dict[eaf[index-1]]="EAF_{}".format(index)
|
|
735
|
+
sumstats = sumstats.rename(columns=rename_dict)
|
|
313
736
|
|
|
314
737
|
# drop na and duplicate
|
|
315
738
|
if drop==True:
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
sig_list_merged = pd.merge(sig_list_merged,sumstats,
|
|
322
|
-
left_on="SNPID",right_on="SNPID",
|
|
323
|
-
how="left")
|
|
739
|
+
if scaled==True:
|
|
740
|
+
sumstats = drop_duplicate_and_na(sumstats, sort_by="MLOG10P_{}".format(index),ascending=False, log=log , verbose=verbose)
|
|
741
|
+
else:
|
|
742
|
+
sumstats = drop_duplicate_and_na(sumstats, sort_by="P_{}".format(index), ascending=True, log=log , verbose=verbose)
|
|
324
743
|
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
328
|
-
cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1],cols_name_list_2[2],cols_name_list_2[3], effect_cols_list_2[0], effect_cols_list_2[1]]
|
|
744
|
+
if scaled==True:
|
|
745
|
+
sumstats.drop("MLOG10P_{}".format(index),axis=1,inplace=True)
|
|
329
746
|
else:
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
## check if eaf column is provided.
|
|
333
|
-
if len(eaf)>0: cols_to_extract.append(eaf[1])
|
|
747
|
+
sumstats.drop("P_{}".format(index),axis=1,inplace=True)
|
|
748
|
+
return sumstats
|
|
334
749
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
750
|
+
def update_stats(sig_list_merged,
|
|
751
|
+
path,
|
|
752
|
+
cols_name_list,
|
|
753
|
+
sep,
|
|
754
|
+
snplist,
|
|
755
|
+
label,
|
|
756
|
+
drop,
|
|
757
|
+
index,
|
|
758
|
+
scaled,
|
|
759
|
+
log,
|
|
760
|
+
verbose):
|
|
342
761
|
|
|
343
|
-
|
|
344
|
-
|
|
762
|
+
log.write(" -Updating missing information for "+label+" ...", verbose=verbose)
|
|
763
|
+
cols_to_extract = [cols_name_list[0], cols_name_list[1]]
|
|
345
764
|
|
|
346
|
-
|
|
765
|
+
sumstats = load_sumstats(path=path,
|
|
766
|
+
usecols=cols_to_extract,
|
|
767
|
+
label=label,
|
|
768
|
+
log=log,
|
|
769
|
+
verbose= verbose,
|
|
770
|
+
sep=sep)
|
|
771
|
+
#if scaled1==True:
|
|
772
|
+
# sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
|
|
347
773
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
effect_cols_list_2[0]:"EFFECT_2",
|
|
354
|
-
effect_cols_list_2[1]:"SE_2",
|
|
355
|
-
}
|
|
356
|
-
else:
|
|
357
|
-
rename_dict = { cols_name_list_2[0]:"SNPID",
|
|
358
|
-
cols_name_list_2[1]:"P_2",
|
|
359
|
-
cols_name_list_2[2]:"EA_2",
|
|
360
|
-
cols_name_list_2[3]:"NEA_2",
|
|
361
|
-
effect_cols_list_2[0]:"OR_2",
|
|
362
|
-
effect_cols_list_2[1]:"OR_L_2",
|
|
363
|
-
effect_cols_list_2[2]:"OR_H_2"
|
|
364
|
-
}
|
|
365
|
-
if len(eaf)>0: rename_dict[eaf[1]]="EAF_2"
|
|
366
|
-
sumstats.rename(columns=rename_dict, inplace=True)
|
|
774
|
+
sumstats = rename_sumtats(sumstats = sumstats,
|
|
775
|
+
cols_name_list = cols_name_list,
|
|
776
|
+
snplist = snplist,
|
|
777
|
+
scaled=scaled,
|
|
778
|
+
suffix="_{}".format(index))
|
|
367
779
|
# drop na and duplicate
|
|
368
780
|
if drop==True:
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
sig_list_merged = pd.merge(sig_list_merged,sumstats,
|
|
374
|
-
left_on="SNPID",right_on="SNPID",
|
|
375
|
-
how="left")
|
|
376
|
-
|
|
377
|
-
sig_list_merged.set_index("SNPID",inplace=True)
|
|
781
|
+
if scaled==True:
|
|
782
|
+
sumstats = drop_duplicate_and_na(sumstats, sort_by="MLOG10P_{}".format(index),ascending=False, log=log , verbose=verbose)
|
|
783
|
+
else:
|
|
784
|
+
sumstats = drop_duplicate_and_na(sumstats, sort_by="P_{}".format(index), ascending=True, log=log , verbose=verbose)
|
|
378
785
|
|
|
379
|
-
################ 16 update sumstats1
|
|
380
|
-
log.write(" -Updating missing information for "+label[0]+" ...", verbose=verbose)
|
|
381
|
-
if type(path1) is Sumstats:
|
|
382
|
-
sumstats = path1.data[[cols_name_list_1[0],cols_name_list_1[1]]].copy()
|
|
383
|
-
elif type(path1) is pd.DataFrame:
|
|
384
|
-
sumstats = path1[[cols_name_list_1[0],cols_name_list_1[1]]].copy()
|
|
385
|
-
else:
|
|
386
|
-
sumstats = pd.read_table(path1,sep=sep[0],usecols=[cols_name_list_1[0],cols_name_list_1[1]])
|
|
387
|
-
if scaled1==True:
|
|
388
|
-
sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
|
|
389
|
-
sumstats.rename(columns={
|
|
390
|
-
cols_name_list_1[0]:"SNPID",
|
|
391
|
-
cols_name_list_1[1]:"P_1"
|
|
392
|
-
},
|
|
393
|
-
inplace=True)
|
|
394
|
-
# drop na and duplicate
|
|
395
|
-
if drop==True:
|
|
396
|
-
sumstats = drop_duplicate_and_na(sumstats, sort_by="P_1", log=log, verbose=verbose)
|
|
397
786
|
|
|
398
|
-
sumstats.set_index("SNPID"
|
|
787
|
+
sumstats = sumstats.set_index("SNPID")
|
|
399
788
|
sig_list_merged.update(sumstats)
|
|
400
|
-
|
|
401
|
-
################# 17 update sumstats2
|
|
402
|
-
log.write(" -Updating missing information for "+label[1]+" ...", verbose=verbose)
|
|
403
|
-
if type(path2) is Sumstats:
|
|
404
|
-
sumstats = path2.data[[cols_name_list_2[0],cols_name_list_2[1]]].copy()
|
|
405
|
-
elif type(path2) is pd.DataFrame:
|
|
406
|
-
sumstats = path2[[cols_name_list_2[0],cols_name_list_2[1]]].copy()
|
|
407
|
-
else:
|
|
408
|
-
sumstats = pd.read_table(path2,sep=sep[1],usecols=[cols_name_list_2[0],cols_name_list_2[1]])
|
|
409
789
|
|
|
410
|
-
|
|
411
|
-
sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
|
|
412
|
-
sumstats.rename(columns={
|
|
413
|
-
cols_name_list_2[0]:"SNPID",
|
|
414
|
-
cols_name_list_2[1]:"P_2"
|
|
415
|
-
},
|
|
416
|
-
inplace=True)
|
|
417
|
-
# drop na and duplicate
|
|
418
|
-
if drop==True:
|
|
419
|
-
sumstats = drop_duplicate_and_na(sumstats, sort_by="P_2", log=log, verbose=verbose)
|
|
420
|
-
|
|
421
|
-
sumstats.set_index("SNPID",inplace=True)
|
|
422
|
-
sig_list_merged.update(sumstats)
|
|
790
|
+
return sig_list_merged
|
|
423
791
|
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
sig_list_merged["P_1"] = np.power(10,-sig_list_merged["P_1"])
|
|
427
|
-
if scaled2 ==True :
|
|
428
|
-
log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
|
|
429
|
-
sig_list_merged["P_2"] = np.power(10,-sig_list_merged["P_2"])
|
|
430
|
-
####
|
|
431
|
-
#################################################################################
|
|
792
|
+
|
|
793
|
+
def assign_indicator(sig_list_merged, snplist, sig_level, scaled1, scaled2, log, verbose):
|
|
432
794
|
############## 18 init indicator
|
|
433
795
|
log.write(" -Assigning indicator ...", verbose=verbose)
|
|
434
796
|
# 0-> 0
|
|
@@ -436,14 +798,24 @@ def compare_effect(path1,
|
|
|
436
798
|
# 2 -> sig in sumsatts2
|
|
437
799
|
# 3-> sig in both sumstats1 + sumstats2
|
|
438
800
|
sig_list_merged["indicator"] = 0
|
|
439
|
-
|
|
440
|
-
|
|
801
|
+
|
|
802
|
+
if scaled1==True:
|
|
803
|
+
sig_list_merged.loc[sig_list_merged["MLOG10P_1"]>-np.log10(sig_level),"indicator"]=1+sig_list_merged.loc[sig_list_merged["MLOG10P_1"]>-np.log10(sig_level),"indicator"]
|
|
804
|
+
else:
|
|
805
|
+
sig_list_merged.loc[sig_list_merged["P_1"]<sig_level,"indicator"]=1+sig_list_merged.loc[sig_list_merged["P_1"]<sig_level,"indicator"]
|
|
806
|
+
|
|
807
|
+
if scaled2==True:
|
|
808
|
+
sig_list_merged.loc[sig_list_merged["MLOG10P_2"]>-np.log10(sig_level),"indicator"]=2+sig_list_merged.loc[sig_list_merged["MLOG10P_2"]>-np.log10(sig_level),"indicator"]
|
|
809
|
+
else:
|
|
810
|
+
sig_list_merged.loc[sig_list_merged["P_2"]<sig_level,"indicator"]=2+sig_list_merged.loc[sig_list_merged["P_2"]<sig_level,"indicator"]
|
|
441
811
|
|
|
442
812
|
if snplist is None:
|
|
443
813
|
sig_list_merged["CHR"]=np.max(sig_list_merged[["CHR_1","CHR_2"]], axis=1).astype(int)
|
|
444
814
|
sig_list_merged["POS"]=np.max(sig_list_merged[["POS_1","POS_2"]], axis=1).astype(int)
|
|
445
815
|
sig_list_merged.drop(labels=['CHR_1', 'CHR_2','POS_1', 'POS_2'], axis=1,inplace=True)
|
|
446
|
-
|
|
816
|
+
return sig_list_merged
|
|
817
|
+
|
|
818
|
+
def align_alleles(sig_list_merged, label,mode,eaf, log, verbose):
|
|
447
819
|
log.write(" -Aligning "+label[1]+" EA with "+label[0]+" EA ...", verbose=verbose)
|
|
448
820
|
############### 19 align allele effect with sumstats 1
|
|
449
821
|
sig_list_merged["EA_1"]=sig_list_merged["EA_1"].astype("string")
|
|
@@ -489,7 +861,19 @@ def compare_effect(path1,
|
|
|
489
861
|
# flip eaf
|
|
490
862
|
sig_list_merged["EAF_2_aligned"]=sig_list_merged["EAF_2"]
|
|
491
863
|
sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EAF_2_aligned"]= 1 -sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EAF_2"]
|
|
492
|
-
|
|
864
|
+
return sig_list_merged
|
|
865
|
+
|
|
866
|
+
#########################################################################################################################
|
|
867
|
+
#########################################################################################################################
|
|
868
|
+
#########################################################################################################################
|
|
869
|
+
#########################################################################################################################
|
|
870
|
+
#########################################################################################################################
|
|
871
|
+
#########################################################################################################################
|
|
872
|
+
#########################################################################################################################
|
|
873
|
+
#########################################################################################################################
|
|
874
|
+
#########################################################################################################################
|
|
875
|
+
|
|
876
|
+
def check_allele_match(sig_list_merged, allele_match, label, log,verbose):
|
|
493
877
|
# checking effect allele matching
|
|
494
878
|
nonmatch = np.nansum(sig_list_merged["EA_1"] != sig_list_merged["EA_2_aligned"])
|
|
495
879
|
log.write(" -Aligned all EAs in {} with EAs in {} ...".format(label[1],label[0]), verbose=verbose)
|
|
@@ -500,16 +884,19 @@ def compare_effect(path1,
|
|
|
500
884
|
sig_list_merged = sig_list_merged.loc[sig_list_merged["EA_1"] == sig_list_merged["EA_2_aligned"]]
|
|
501
885
|
else:
|
|
502
886
|
log.write(" -No variants with EA not matching...", verbose=verbose)
|
|
503
|
-
|
|
504
|
-
log.write(" -Using FDR...", verbose=verbose)
|
|
505
|
-
#sig_list_merged["P_1"] = fdrcorrection(sig_list_merged["P_1"])[1]
|
|
506
|
-
#sig_list_merged["P_2"] = fdrcorrection(sig_list_merged["P_2"])[1]
|
|
507
|
-
sig_list_merged["P_1"] =ss.false_discovery_control(sig_list_merged["P_1"])
|
|
508
|
-
sig_list_merged["P_2"] =ss.false_discovery_control(sig_list_merged["P_2"])
|
|
887
|
+
return sig_list_merged
|
|
509
888
|
|
|
510
|
-
|
|
511
|
-
## winner's curse correction using aligned beta
|
|
889
|
+
def winnerscurse_correction(sig_list_merged, mode, wc_correction, sig_level, scaled1, scaled2, log, verbose):
|
|
512
890
|
if mode=="beta":
|
|
891
|
+
if scaled1==True:
|
|
892
|
+
match1= sig_list_merged["MLOG10P_1"]>-np.log10(sig_level)
|
|
893
|
+
else:
|
|
894
|
+
match1 = sig_list_merged["P_1"]<sig_level
|
|
895
|
+
if scaled2==True:
|
|
896
|
+
match2= sig_list_merged["MLOG10P_2"]>-np.log10(sig_level)
|
|
897
|
+
else:
|
|
898
|
+
match2 = sig_list_merged["P_2"]<sig_level
|
|
899
|
+
|
|
513
900
|
if wc_correction == "all":
|
|
514
901
|
log.write(" -Correcting BETA for winner's curse with threshold at {} for all variants...".format(sig_level), verbose=verbose)
|
|
515
902
|
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
@@ -522,128 +909,139 @@ def compare_effect(path1,
|
|
|
522
909
|
sig_list_merged["EFFECT_2_aligned"] = sig_list_merged[["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
|
|
523
910
|
|
|
524
911
|
elif wc_correction == "sig" :
|
|
912
|
+
|
|
525
913
|
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants...".format(sig_level), verbose=verbose)
|
|
526
914
|
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
527
915
|
sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
|
|
528
|
-
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(
|
|
529
|
-
sig_list_merged.loc[
|
|
530
|
-
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(
|
|
531
|
-
sig_list_merged.loc[
|
|
916
|
+
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(match1)), verbose=verbose)
|
|
917
|
+
sig_list_merged.loc[match1, "EFFECT_1"] = sig_list_merged.loc[match1, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
918
|
+
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(match2)), verbose=verbose)
|
|
919
|
+
sig_list_merged.loc[match2, "EFFECT_2_aligned"] = sig_list_merged.loc[match2, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
532
920
|
|
|
533
921
|
elif wc_correction == "sumstats1" :
|
|
534
922
|
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats1...".format(sig_level), verbose=verbose)
|
|
535
923
|
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
536
|
-
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(
|
|
537
|
-
sig_list_merged.loc[
|
|
924
|
+
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(match1)), verbose=verbose)
|
|
925
|
+
sig_list_merged.loc[match1, "EFFECT_1"] = sig_list_merged.loc[match1, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
538
926
|
|
|
539
927
|
elif wc_correction == "sumstats2" :
|
|
540
928
|
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats2...".format(sig_level), verbose=verbose)
|
|
541
929
|
sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
|
|
542
|
-
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(
|
|
543
|
-
sig_list_merged.loc[
|
|
930
|
+
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(match2)), verbose=verbose)
|
|
931
|
+
sig_list_merged.loc[match2, "EFFECT_2_aligned"] = sig_list_merged.loc[match2, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
932
|
+
return sig_list_merged
|
|
544
933
|
|
|
545
|
-
|
|
546
|
-
## heterogeneity test
|
|
547
|
-
if (is_q == True):
|
|
548
|
-
log.write(" -Calculating Cochran's Q statistics and peform chisq test...", verbose=verbose)
|
|
549
|
-
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
550
|
-
sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
|
|
551
|
-
else:
|
|
552
|
-
sig_list_merged = test_q(sig_list_merged,"BETA_1","SE_1","BETA_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
|
|
553
|
-
|
|
554
|
-
######################### save ###############################################################
|
|
555
|
-
## save the merged data
|
|
556
|
-
save_path = label[0]+"_"+label[1]+"_beta_sig_list_merged.tsv"
|
|
557
|
-
log.write(" -Saving the merged data to:",save_path, verbose=verbose)
|
|
558
|
-
sig_list_merged.to_csv(save_path,"\t")
|
|
559
|
-
|
|
560
|
-
########################## maf_threshold#############################################################
|
|
934
|
+
def filter_by_maf(sig_list_merged, eaf, maf_level, log, verbose):
|
|
561
935
|
if (len(eaf)>0) and (maf_level is not None):
|
|
562
936
|
both_eaf_clear = (sig_list_merged["EAF_1"]>maf_level)&(sig_list_merged["EAF_1"]<1-maf_level)&(sig_list_merged["EAF_2"]>maf_level)&(sig_list_merged["EAF_2"]<1-maf_level)
|
|
563
937
|
log.write(" -Exclude "+str(len(sig_list_merged) -sum(both_eaf_clear))+ " variants with maf <",maf_level, verbose=verbose)
|
|
564
938
|
sig_list_merged = sig_list_merged.loc[both_eaf_clear,:]
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
log.write(" -Significant het:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:]), verbose=verbose)
|
|
568
|
-
log.write(" -All sig:" ,len(sig_list_merged), verbose=verbose)
|
|
569
|
-
log.write(" -Het rate:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:])/len(sig_list_merged), verbose=verbose)
|
|
939
|
+
return sig_list_merged
|
|
940
|
+
|
|
570
941
|
|
|
571
|
-
# extract group
|
|
572
|
-
if include_all==True:
|
|
573
|
-
sum0 = sig_list_merged.loc[sig_list_merged["indicator"]==0,:].dropna(axis=0)
|
|
574
|
-
else:
|
|
575
|
-
sum0 = pd.DataFrame()
|
|
576
942
|
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
943
|
+
|
|
944
|
+
|
|
945
|
+
def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose=False):
|
|
946
|
+
w1="Weight_1"
|
|
947
|
+
w2="Weight_2"
|
|
948
|
+
beta="BETA_FE"
|
|
949
|
+
q="Q"
|
|
950
|
+
pq="HetP"
|
|
951
|
+
rawpq="RAW_HetP"
|
|
952
|
+
i2="I2"
|
|
953
|
+
df[w1]=1/(df[se1])**2
|
|
954
|
+
df[w2]=1/(df[se2])**2
|
|
955
|
+
df[beta] =(df[w1]*df[beta1] + df[w2]*df[beta2])/(df[w1]+df[w2])
|
|
580
956
|
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
957
|
+
# Cochran(1954)
|
|
958
|
+
df[q] = df[w1]*(df[beta1]-df[beta])**2 + df[w2]*(df[beta2]-df[beta])**2
|
|
959
|
+
df[pq] = ss.chi2.sf(df[q], 1)
|
|
960
|
+
df["Edge_color"]="white"
|
|
961
|
+
|
|
962
|
+
if is_q_mc=="fdr":
|
|
963
|
+
log.write(" -FDR correction applied...", verbose=verbose)
|
|
964
|
+
df[rawpq] = df[pq]
|
|
965
|
+
df[pq] = ss.false_discovery_control(df[pq])
|
|
966
|
+
|
|
967
|
+
elif is_q_mc=="bon":
|
|
968
|
+
log.write(" -Bonferroni correction applied...", verbose=verbose)
|
|
969
|
+
df[rawpq] = df[pq]
|
|
970
|
+
df[pq] = df[pq] * len(df[pq])
|
|
586
971
|
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
log.write(" -Identified "+str(len(both)) + " variants which are significant in " + label[2] + ".", verbose=verbose)
|
|
972
|
+
df.loc[df[pq]<q_level,"Edge_color"]="black"
|
|
973
|
+
df.drop(columns=["Weight_1","Weight_2","BETA_FE"],inplace=True)
|
|
974
|
+
# Huedo-Medina, T. B., Sánchez-Meca, J., Marín-Martínez, F., & Botella, J. (2006). Assessing heterogeneity in meta-analysis: Q statistic or I² index?. Psychological methods, 11(2), 193.
|
|
591
975
|
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
976
|
+
# calculate I2
|
|
977
|
+
df[i2] = (df[q] - 1)/df[q]
|
|
978
|
+
df.loc[df[i2]<0,i2] = 0
|
|
979
|
+
|
|
980
|
+
return df
|
|
981
|
+
|
|
982
|
+
def jackknife_r(df,x="EFFECT_1",y="EFFECT_2_aligned"):
|
|
983
|
+
"""Jackknife estimation of se for rsq
|
|
984
|
+
|
|
985
|
+
"""
|
|
986
|
+
|
|
987
|
+
# dropna
|
|
988
|
+
df_nona = df.loc[:,[x,y]].dropna()
|
|
989
|
+
|
|
990
|
+
# non-empty entries
|
|
991
|
+
n=len(df)
|
|
992
|
+
|
|
993
|
+
# assign row number
|
|
994
|
+
df_nona["nrow"] = range(n)
|
|
995
|
+
|
|
996
|
+
# a list to store r2
|
|
997
|
+
r_list=[]
|
|
998
|
+
|
|
999
|
+
# estimate r
|
|
1000
|
+
for i in range(n):
|
|
1001
|
+
# exclude 1 record
|
|
1002
|
+
records_to_use = df_nona["nrow"]!=i
|
|
1003
|
+
# estimate r
|
|
1004
|
+
reg_jackknife = ss.linregress(df_nona.loc[records_to_use, x],df_nona.loc[records_to_use,y])
|
|
1005
|
+
# add r_i to list
|
|
1006
|
+
r_list.append(reg_jackknife[2])
|
|
1007
|
+
|
|
1008
|
+
# convert list to array
|
|
1009
|
+
rs = np.array(r_list)
|
|
1010
|
+
# https://en.wikipedia.org/wiki/Jackknife_resampling
|
|
1011
|
+
r_se = np.sqrt( (n-1)/n * np.sum((rs - np.mean(rs))**2) )
|
|
1012
|
+
return r_se
|
|
1013
|
+
|
|
1014
|
+
def drop_duplicate_and_na(df,snpid="SNPID",sort_by=False,log=Log(),ascending=True,verbose=True):
|
|
1015
|
+
|
|
1016
|
+
length_before = len(df)
|
|
1017
|
+
|
|
1018
|
+
if sort_by!=False:
|
|
1019
|
+
df.sort_values(by = sort_by, ascending=ascending, inplace=True)
|
|
1020
|
+
|
|
1021
|
+
df.dropna(axis="index",subset=[snpid],inplace=True)
|
|
1022
|
+
df.drop_duplicates(subset=[snpid], keep='first', inplace=True)
|
|
1023
|
+
|
|
1024
|
+
length_after= len(df)
|
|
1025
|
+
if length_before != length_after:
|
|
1026
|
+
log.write(" -Dropped {} duplicates or NAs...".format(length_before - length_after), verbose=verbose)
|
|
1027
|
+
return df
|
|
1028
|
+
|
|
1029
|
+
|
|
1030
|
+
|
|
1031
|
+
#########################################################################################################################
|
|
1032
|
+
#########################################################################################################################
|
|
1033
|
+
#########################################################################################################################
|
|
1034
|
+
#########################################################################################################################
|
|
1035
|
+
#########################################################################################################################
|
|
1036
|
+
#########################################################################################################################
|
|
1037
|
+
#########################################################################################################################
|
|
1038
|
+
#########################################################################################################################
|
|
1039
|
+
#########################################################################################################################
|
|
1040
|
+
|
|
1041
|
+
def scatter_annotation(ax, sig_list_merged,anno, anno_het, is_q, mode,
|
|
1042
|
+
anno_min,anno_min1,anno_min2,anno_diff,anno_kwargs,adjust_text_kwargs_l,adjust_text_kwargs_r,
|
|
1043
|
+
log,verbose
|
|
1044
|
+
):
|
|
647
1045
|
if anno==True or anno=="GENENAME":
|
|
648
1046
|
sig_list_toanno = sig_list_merged.dropna(axis=0)
|
|
649
1047
|
if is_q==True and anno_het == True:
|
|
@@ -669,7 +1067,7 @@ def compare_effect(path1,
|
|
|
669
1067
|
log.write("Annotating variants using {}".format("GENENAME"), verbose=verbose)
|
|
670
1068
|
|
|
671
1069
|
for index, row in sig_list_toanno.iterrows():
|
|
672
|
-
log.write("Annotating {}...".format(row), verbose=verbose)
|
|
1070
|
+
#log.write("Annotating {}...".format(row), verbose=verbose)
|
|
673
1071
|
if anno==True:
|
|
674
1072
|
to_anno_text = index
|
|
675
1073
|
elif type(anno) is str:
|
|
@@ -680,18 +1078,18 @@ def compare_effect(path1,
|
|
|
680
1078
|
|
|
681
1079
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
682
1080
|
if row["EFFECT_1"] < row["EFFECT_2_aligned"]:
|
|
683
|
-
texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],to_anno_text,ha="right",va="bottom"))
|
|
1081
|
+
texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],to_anno_text,ha="right",va="bottom", **anno_kwargs))
|
|
684
1082
|
else:
|
|
685
|
-
texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],to_anno_text,ha="left",va="top"))
|
|
1083
|
+
texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],to_anno_text,ha="left",va="top", **anno_kwargs))
|
|
686
1084
|
else:
|
|
687
1085
|
if row["OR_1"] < row["OR_2_aligned"]:
|
|
688
|
-
texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],to_anno_text, ha='right', va='bottom'))
|
|
1086
|
+
texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],to_anno_text, ha='right', va='bottom', **anno_kwargs))
|
|
689
1087
|
else:
|
|
690
|
-
texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],to_anno_text, ha='left', va='top'))
|
|
1088
|
+
texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],to_anno_text, ha='left', va='top', **anno_kwargs))
|
|
691
1089
|
if len(texts_l)>0:
|
|
692
|
-
adjust_text(texts_l,
|
|
1090
|
+
adjust_text(texts_l,ax=ax,**adjust_text_kwargs_l)
|
|
693
1091
|
if len(texts_r)>0:
|
|
694
|
-
adjust_text(texts_r,
|
|
1092
|
+
adjust_text(texts_r,ax=ax,**adjust_text_kwargs_r)
|
|
695
1093
|
elif type(anno) is dict:
|
|
696
1094
|
sig_list_toanno = sig_list_merged.dropna(axis=0)
|
|
697
1095
|
# if input is a dict
|
|
@@ -715,38 +1113,24 @@ def compare_effect(path1,
|
|
|
715
1113
|
for index, row in sig_list_toanno.iterrows():
|
|
716
1114
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
717
1115
|
if row["EFFECT_1"] < row["EFFECT_2_aligned"]:
|
|
718
|
-
texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="right",va="bottom"))
|
|
1116
|
+
texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="right",va="bottom", **anno_kwargs))
|
|
719
1117
|
else:
|
|
720
|
-
texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="left",va="top"))
|
|
1118
|
+
texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="left",va="top", **anno_kwargs))
|
|
721
1119
|
else:
|
|
722
1120
|
if row["OR_1"] < row["OR_2_aligned"]:
|
|
723
|
-
texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='right', va='bottom'))
|
|
1121
|
+
texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='right', va='bottom', **anno_kwargs))
|
|
724
1122
|
else:
|
|
725
|
-
texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='left', va='top'))
|
|
1123
|
+
texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='left', va='top', **anno_kwargs))
|
|
726
1124
|
if len(texts_l)>0:
|
|
727
|
-
adjust_text(texts_l,
|
|
1125
|
+
adjust_text(texts_l,ax=ax,**adjust_text_kwargs_l)
|
|
728
1126
|
if len(texts_r)>0:
|
|
729
|
-
adjust_text(texts_r,
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
# plot x=0,y=0, and a 45 degree line
|
|
733
|
-
xl,xh=ax.get_xlim()
|
|
734
|
-
yl,yh=ax.get_ylim()
|
|
735
|
-
|
|
736
|
-
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
737
|
-
#if using beta
|
|
738
|
-
ax.axhline(y=0, zorder=1,**helper_line_args)
|
|
739
|
-
ax.axvline(x=0, zorder=1,**helper_line_args)
|
|
740
|
-
else:
|
|
741
|
-
#if using OR
|
|
742
|
-
ax.axhline(y=1, zorder=1,**helper_line_args)
|
|
743
|
-
ax.axvline(x=1, zorder=1,**helper_line_args)
|
|
744
|
-
|
|
745
|
-
for spine in ['top', 'right']:
|
|
746
|
-
ax.spines[spine].set_visible(False)
|
|
747
|
-
|
|
1127
|
+
adjust_text(texts_r,ax=ax,**adjust_text_kwargs_r)
|
|
1128
|
+
return ax
|
|
748
1129
|
|
|
749
|
-
|
|
1130
|
+
|
|
1131
|
+
def confire_regression_line(is_reg, reg_box, sig_list_merged, ax, mode,xl,yl,xh,yh, null_beta, r_se,
|
|
1132
|
+
is_45_helper_line,helper_line_args, font_kwargs,
|
|
1133
|
+
log, verbose):
|
|
750
1134
|
if len(sig_list_merged)<3: is_reg=False
|
|
751
1135
|
if is_reg is True:
|
|
752
1136
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
@@ -792,7 +1176,7 @@ def compare_effect(path1,
|
|
|
792
1176
|
pe="0"
|
|
793
1177
|
p_text="$p = " + p12 + " \\times 10^{"+pe+"}$"
|
|
794
1178
|
p_latex= f'{p_text}'
|
|
795
|
-
ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $+$ "+ "{:.2f}".format(reg[0])+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes, bbox=reg_box, **
|
|
1179
|
+
ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $+$ "+ "{:.2f}".format(reg[0])+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes, bbox=reg_box, **font_kwargs)
|
|
796
1180
|
else:
|
|
797
1181
|
#if regression coeeficient <0 : auxiliary line slope = -1
|
|
798
1182
|
if is_45_helper_line is True:
|
|
@@ -809,7 +1193,7 @@ def compare_effect(path1,
|
|
|
809
1193
|
pe="0"
|
|
810
1194
|
p_text="$p = " + p12 + " \\times 10^{"+pe+"}$"
|
|
811
1195
|
p_latex= f'{p_text}'
|
|
812
|
-
ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $-$ "+ "{:.2f}".format(abs(reg[0]))+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes,bbox=reg_box,**
|
|
1196
|
+
ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $-$ "+ "{:.2f}".format(abs(reg[0]))+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes,bbox=reg_box,**font_kwargs)
|
|
813
1197
|
|
|
814
1198
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
815
1199
|
middle = sig_list_merged["EFFECT_1"].mean()
|
|
@@ -820,11 +1204,12 @@ def compare_effect(path1,
|
|
|
820
1204
|
ax.axline(xy1=(0,reg[1]),slope=reg[0],color="#cccccc",linestyle='--',zorder=1)
|
|
821
1205
|
else:
|
|
822
1206
|
ax.axline(xy1=(1,reg[0]+reg[1]),slope=reg[0],color="#cccccc",linestyle='--',zorder=1)
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
1207
|
+
return ax
|
|
1208
|
+
|
|
1209
|
+
|
|
1210
|
+
def configure_legend(fig, ax, legend_mode, is_q, is_q_mc, legend_elements, legend_pos, q_level,
|
|
1211
|
+
font_kwargs,scatterargs,legend_args,
|
|
1212
|
+
legend_title, legend_title2 ):
|
|
828
1213
|
legend_args_to_use ={
|
|
829
1214
|
"framealpha":1,
|
|
830
1215
|
"handlelength":0.7,
|
|
@@ -892,16 +1277,10 @@ def compare_effect(path1,
|
|
|
892
1277
|
label.set_ha('left')
|
|
893
1278
|
label.set_position((-8*width,0))
|
|
894
1279
|
|
|
895
|
-
ax.tick_params(axis='both', labelsize=
|
|
896
|
-
plt.setp(L.texts,**
|
|
897
|
-
plt.setp(L.get_title(),**
|
|
898
|
-
|
|
899
|
-
gc.collect()
|
|
900
|
-
|
|
901
|
-
save_figure(fig, save, keyword="esc",save_args=save_args, log=log, verbose=verbose)
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
return [sig_list_merged, fig,log]
|
|
1280
|
+
ax.tick_params(axis='both', labelsize=font_kwargs["fontsize"])
|
|
1281
|
+
plt.setp(L.texts,**font_kwargs)
|
|
1282
|
+
plt.setp(L.get_title(),**font_kwargs)
|
|
1283
|
+
return ax
|
|
905
1284
|
|
|
906
1285
|
def reorderLegend(ax=None, order=None, add=None):
|
|
907
1286
|
handles, labels = ax.get_legend_handles_labels()
|
|
@@ -910,78 +1289,18 @@ def reorderLegend(ax=None, order=None, add=None):
|
|
|
910
1289
|
new_handles = [info[l] for l in order]
|
|
911
1290
|
return new_handles, order
|
|
912
1291
|
|
|
913
|
-
def
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
pq="HetP"
|
|
919
|
-
i2="I2"
|
|
920
|
-
df[w1]=1/(df[se1])**2
|
|
921
|
-
df[w2]=1/(df[se2])**2
|
|
922
|
-
df[beta] =(df[w1]*df[beta1] + df[w2]*df[beta2])/(df[w1]+df[w2])
|
|
923
|
-
|
|
924
|
-
# Cochran(1954)
|
|
925
|
-
df[q] = df[w1]*(df[beta1]-df[beta])**2 + df[w2]*(df[beta2]-df[beta])**2
|
|
926
|
-
df[pq] = ss.chi2.sf(df[q], 1)
|
|
927
|
-
df["Edge_color"]="white"
|
|
928
|
-
|
|
929
|
-
if is_q_mc=="fdr":
|
|
930
|
-
log.write(" -FDR correction applied...", verbose=verbose)
|
|
931
|
-
df[pq] = ss.false_discovery_control(df[pq])
|
|
932
|
-
elif is_q_mc=="bon":
|
|
933
|
-
log.write(" -Bonferroni correction applied...", verbose=verbose)
|
|
934
|
-
df[pq] = df[pq] * len(df[pq])
|
|
935
|
-
|
|
936
|
-
df.loc[df[pq]<q_level,"Edge_color"]="black"
|
|
937
|
-
df.drop(columns=["Weight_1","Weight_2","BETA_FE"],inplace=True)
|
|
938
|
-
# Huedo-Medina, T. B., Sánchez-Meca, J., Marín-Martínez, F., & Botella, J. (2006). Assessing heterogeneity in meta-analysis: Q statistic or I² index?. Psychological methods, 11(2), 193.
|
|
939
|
-
|
|
940
|
-
# calculate I2
|
|
941
|
-
df[i2] = (df[q] - 1)/df[q]
|
|
942
|
-
df.loc[df[i2]<0,i2] = 0
|
|
943
|
-
|
|
944
|
-
return df
|
|
945
|
-
|
|
946
|
-
def jackknife_r(df,x="EFFECT_1",y="EFFECT_2_aligned"):
|
|
947
|
-
"""Jackknife estimation of se for rsq
|
|
948
|
-
|
|
949
|
-
"""
|
|
950
|
-
|
|
951
|
-
# dropna
|
|
952
|
-
df_nona = df.loc[:,[x,y]].dropna()
|
|
953
|
-
|
|
954
|
-
# non-empty entries
|
|
955
|
-
n=len(df)
|
|
956
|
-
|
|
957
|
-
# assign row number
|
|
958
|
-
df_nona["nrow"] = range(n)
|
|
1292
|
+
def reorder_columns(sig_list_merged):
|
|
1293
|
+
order=[ 'CHR', 'POS', 'GENENAME',
|
|
1294
|
+
'EA_1', 'NEA_1', 'EFFECT_1', 'SE_1', 'P_1', 'MLOG10P_1',
|
|
1295
|
+
'EA_2_aligned','NEA_2_aligned', 'EFFECT_2_aligned', 'SE_2','P_2','MLOG10P_2', 'EA_2', 'NEA_2', 'EFFECT_2',
|
|
1296
|
+
'indicator' ]
|
|
959
1297
|
|
|
960
|
-
|
|
961
|
-
|
|
1298
|
+
new_order=[]
|
|
1299
|
+
for i in order:
|
|
1300
|
+
if i in sig_list_merged.columns:
|
|
1301
|
+
new_order.append(i)
|
|
1302
|
+
for i in sig_list_merged.columns:
|
|
1303
|
+
if i not in new_order:
|
|
1304
|
+
new_order.append(i)
|
|
962
1305
|
|
|
963
|
-
|
|
964
|
-
for i in range(n):
|
|
965
|
-
# exclude 1 record
|
|
966
|
-
records_to_use = df_nona["nrow"]!=i
|
|
967
|
-
# estimate r
|
|
968
|
-
reg_jackknife = ss.linregress(df_nona.loc[records_to_use, x],df_nona.loc[records_to_use,y])
|
|
969
|
-
# add r_i to list
|
|
970
|
-
r_list.append(reg_jackknife[2])
|
|
971
|
-
|
|
972
|
-
# convert list to array
|
|
973
|
-
rs = np.array(r_list)
|
|
974
|
-
# https://en.wikipedia.org/wiki/Jackknife_resampling
|
|
975
|
-
r_se = np.sqrt( (n-1)/n * np.sum((rs - np.mean(rs))**2) )
|
|
976
|
-
return r_se
|
|
977
|
-
|
|
978
|
-
def drop_duplicate_and_na(df,snpid="SNPID",sort_by=False,log=Log(),verbose=True):
|
|
979
|
-
length_before = len(df)
|
|
980
|
-
if sort_by!=False:
|
|
981
|
-
df.sort_values(by = sort_by, inplace=True)
|
|
982
|
-
df.dropna(axis="index",subset=[snpid],inplace=True)
|
|
983
|
-
df.drop_duplicates(subset=[snpid], keep='first', inplace=True)
|
|
984
|
-
length_after= len(df)
|
|
985
|
-
if length_before != length_after:
|
|
986
|
-
log.write(" -Dropped {} duplicates or NAs...".format(length_before - length_after), verbose=verbose)
|
|
987
|
-
return df
|
|
1306
|
+
return sig_list_merged[new_order]
|