gwaslab 3.5.0__py3-none-any.whl → 3.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

@@ -4,6 +4,7 @@ import matplotlib.pyplot as plt
4
4
  import scipy.stats as ss
5
5
  import seaborn as sns
6
6
  import gc
7
+ import math
7
8
  import scipy.stats as ss
8
9
  from matplotlib.patches import Rectangle
9
10
  from adjustText import adjust_text
@@ -14,7 +15,8 @@ from gwaslab.g_Log import Log
14
15
  from gwaslab.util_in_correct_winnerscurse import wc_correct
15
16
  from gwaslab.util_in_correct_winnerscurse import wc_correct_test
16
17
  from gwaslab.g_Sumstats import Sumstats
17
-
18
+ from gwaslab.io_process_args import _merge_and_sync_dic
19
+ from gwaslab.io_process_args import _extract_kwargs
18
20
  #20220422
19
21
  def compare_effect(path1,
20
22
  path2,
@@ -31,6 +33,7 @@ def compare_effect(path1,
31
33
  anno_min1=0,
32
34
  anno_min2=0,
33
35
  anno_diff=0,
36
+ anno_args=None,
34
37
  scaled=False,
35
38
  scaled1=False,
36
39
  scaled2=False,
@@ -59,24 +62,36 @@ def compare_effect(path1,
59
62
  plt_args=None,
60
63
  xylabel_prefix="Per-allele effect size in ",
61
64
  helper_line_args=None,
65
+ adjust_text_kwargs = None,
66
+ adjust_text_kwargs_l = None,
67
+ adjust_text_kwargs_r = None,
68
+ font_args=None,
62
69
  fontargs=None,
63
70
  build="19",
64
71
  r_or_r2="r",
65
- #
66
72
  errargs=None,
67
73
  legend_args=None,
68
74
  sep=["\t","\t"],
69
75
  log = Log(),
70
76
  save=False,
71
77
  save_args=None,
72
- verbose=False):
73
-
78
+ verbose=False,
79
+ **kwargs):
80
+
74
81
  #[snpid,p,ea,nea] ,[effect,se]
75
82
  #[snpid,p,ea,nea,chr,pos],[effect,se]
76
83
  #[snpid,p,ea,nea,chr,pos],[OR,OR_l,OR_h]
77
84
  if scaled == True:
78
85
  scaled1 = True
79
86
  scaled2 = True
87
+
88
+ if legend_title== r'$ P < 5 x 10^{-8}$ in:' and sig_level!=5e-8:
89
+
90
+ exponent = math.floor(math.log10(sig_level))
91
+ mantissa = sig_level / 10**exponent
92
+
93
+ legend_title = '$ P < {} x 10^{{{}}}$ in:'.format(mantissa, exponent)
94
+
80
95
  if is_q_mc=="fdr" or is_q_mc=="bon":
81
96
  is_q = True
82
97
  if is_q == True:
@@ -92,6 +107,8 @@ def compare_effect(path1,
92
107
  get_lead_args = {}
93
108
  if anno=="GENENAME":
94
109
  get_lead_args["anno"]=True
110
+ if anno_args is None:
111
+ anno_args = {}
95
112
  if errargs is None:
96
113
  errargs={"ecolor":"#cccccc","elinewidth":1}
97
114
  if fontargs is None:
@@ -106,155 +123,564 @@ def compare_effect(path1,
106
123
  label = ["Sumstats_1","Sumstats_2","Both","None"]
107
124
  if anno_het ==True:
108
125
  is_q=True
126
+
127
+ adjust_text_kwargs_r_default = {"autoalign":False,"precision":0.001,"lim":1000,"ha":"left","va":"top","expand_text":(1,1.8),"expand_objects":(0.1,0.1),"expand_points":(1.8,1.8),"force_objects":(0.8,0.8),"arrowprops":dict(arrowstyle='-|>', color='grey')}
128
+ adjust_text_kwargs_l_default = {"autoalign":False,"precision":0.001,"lim":1000,"ha":"right","va":"bottom","expand_text":(1,1.8),"expand_objects":(0.1,0.1),"expand_points":(1.8,1.8),"force_objects":(0.8,0.8),"arrowprops":dict(arrowstyle='-|>', color='grey')}
129
+
130
+ if adjust_text_kwargs_l is None:
131
+ adjust_text_kwargs_l = adjust_text_kwargs_l_default
132
+ else:
133
+ for key, value in adjust_text_kwargs_l_default.items():
134
+ if key not in adjust_text_kwargs_l:
135
+ adjust_text_kwargs_l[key] = value
136
+
137
+ if adjust_text_kwargs_r is None:
138
+ adjust_text_kwargs_r = adjust_text_kwargs_r_default
139
+ else:
140
+ for key, value in adjust_text_kwargs_r_default.items():
141
+ if key not in adjust_text_kwargs_r:
142
+ adjust_text_kwargs_r[key] = value
143
+
144
+ if adjust_text_kwargs is not None:
145
+ for key, value in adjust_text_kwargs.items():
146
+ adjust_text_kwargs_l[key] = value
147
+ adjust_text_kwargs_r[key] = value
148
+ else:
149
+ adjust_text_kwargs = {}
150
+
151
+
152
+ save_kwargs = _extract_kwargs("save", save_args, locals())
153
+ anno_kwargs = _extract_kwargs("anno", anno_args, locals())
154
+ err_kwargs = _extract_kwargs("err", errargs, locals())
155
+ plt_kwargs = _extract_kwargs("plt", plt_args, locals())
156
+ scatter_kwargs = _extract_kwargs("scatter", scatterargs, locals())
157
+ font_kwargs = _extract_kwargs("font",fontargs, locals())
158
+
159
+ log.write("Start to process the raw sumstats for plotting...", verbose=verbose)
160
+
161
+ # configure headers
162
+ cols_name_list_1,cols_name_list_2, effect_cols_list_1, effect_cols_list_2 = configure_headers(mode,
163
+ path1,
164
+ path2,
165
+ cols_name_list_1,
166
+ cols_name_list_2,
167
+ effect_cols_list_1,
168
+ effect_cols_list_2,
169
+ scaled1,
170
+ scaled2,
171
+ log,
172
+ verbose)
173
+
174
+ # extract common variants / load sumstats 1
175
+ sumstats, common_snp_set = configure_common_snp_set(path1,path2,
176
+ snplist,
177
+ label,
178
+ cols_name_list_1,
179
+ cols_name_list_2,
180
+ sep,
181
+ scaled1,
182
+ scaled2,
183
+ log,verbose)
184
+
185
+ # rename sumstats headers -> keywords in gwaslab
186
+ sumstats = rename_sumtats(sumstats=sumstats,
187
+ cols_name_list = cols_name_list_1,
188
+ scaled=scaled1,
189
+ snplist=snplist)
190
+
191
+ # exctract only available variants from sumstats1
192
+ sumstats = sumstats.loc[sumstats["SNPID"].isin(common_snp_set),:]
193
+ log.write(" -Using only variants available for both datasets...", verbose=verbose)
194
+
195
+ ######### 8 extact SNPs for comparison
196
+ sig_list_1 = extract_snp_for_comparison(sumstats,
197
+ snplist,
198
+ label=label[0],
199
+ get_lead_args=get_lead_args,
200
+ build=build,
201
+ drop=drop,
202
+ anno=anno,
203
+ sig_level=sig_level,
204
+ scaled = scaled1,
205
+ log = log,
206
+ verbose = verbose)
207
+
208
+
209
+ ######### load sumstats1
210
+
211
+ ######### 9 extract snplist2
212
+ if snplist is not None:
213
+ cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1]]
214
+ else:
215
+ cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1],cols_name_list_2[4],cols_name_list_2[5]]
216
+
217
+ sumstats = load_sumstats(path=path2,
218
+ usecols=cols_to_extract,
219
+ label=label[1],
220
+ log=log,
221
+ verbose= verbose,
222
+ sep=sep[1])
223
+ gc.collect()
224
+
225
+ #if scaled2==True:
226
+ # sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
227
+
228
+ sumstats = rename_sumtats(sumstats=sumstats,
229
+ cols_name_list = cols_name_list_2,
230
+ scaled=scaled2,
231
+ snplist=snplist)
232
+ ######### 11 exctract only overlapping variants from sumstats2
233
+ sumstats = sumstats.loc[sumstats["SNPID"].isin(common_snp_set),:]
234
+ sig_list_2 = extract_snp_for_comparison(sumstats,
235
+ snplist,
236
+ label=label[1],
237
+ get_lead_args=get_lead_args,
238
+ build=build,
239
+ drop=drop,
240
+ anno=anno,
241
+ sig_level=sig_level,
242
+ scaled = scaled2,
243
+ log = log,
244
+ verbose = verbose)
245
+
246
+ ######### 13 Merge two list using SNPID
247
+ sig_list_merged = merge_list(sig_list_1,
248
+ sig_list_2,
249
+ anno = anno,
250
+ labels=label,
251
+ log=log,
252
+ verbose=verbose)
253
+
254
+ ###############################################################################
255
+ cols_to_extract = configure_cols_to_extract(mode=mode,
256
+ cols_name_list = cols_name_list_1,
257
+ effect_cols_list= effect_cols_list_1,
258
+ eaf = eaf)
259
+ sumstats = load_sumstats(path=path1,
260
+ usecols=cols_to_extract,
261
+ label=label[0],
262
+ log=log,
263
+ verbose= verbose,
264
+ sep=sep[0])
265
+
266
+ #if scaled1==True:
267
+ # sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
268
+ sumstats = rename_sumstats_full(mode, sumstats,
269
+ index=1,
270
+ cols_name_list = cols_name_list_1,
271
+ effect_cols_list = effect_cols_list_1,
272
+ eaf = eaf,
273
+ drop = drop,
274
+ scaled=scaled1,
275
+ log=log, verbose=verbose)
276
+
277
+ log.write(" -Merging "+label[0]+" effect information...", verbose=verbose)
278
+ sig_list_merged = pd.merge(sig_list_merged,sumstats,
279
+ left_on="SNPID",right_on="SNPID",
280
+ how="left")
281
+
282
+ ############ 15 merging sumstats2
283
+ cols_to_extract = configure_cols_to_extract(mode=mode,
284
+ cols_name_list = cols_name_list_2,
285
+ effect_cols_list= effect_cols_list_2,
286
+ eaf = eaf)
287
+
288
+ sumstats = load_sumstats(path=path2,
289
+ usecols=cols_to_extract,
290
+ label=label[1],
291
+ log=log,
292
+ verbose= verbose,
293
+ sep=sep[1])
294
+
295
+ #if scaled2==True:
296
+ # sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
297
+
298
+ gc.collect()
299
+
300
+ sumstats = rename_sumstats_full(mode, sumstats,
301
+ index=2,
302
+ cols_name_list = cols_name_list_2,
303
+ effect_cols_list = effect_cols_list_2,
304
+ eaf = eaf,
305
+ drop = drop,
306
+ scaled=scaled2,
307
+ log=log, verbose=verbose)
308
+
309
+ log.write(" -Merging "+label[1]+" effect information...", verbose=verbose)
310
+ sig_list_merged = pd.merge(sig_list_merged,sumstats,
311
+ left_on="SNPID",right_on="SNPID",
312
+ how="left")
313
+
314
+ sig_list_merged.set_index("SNPID",inplace=True)
315
+
316
+ ################ 16 update sumstats1
317
+
318
+ sig_list_merged = update_stats(sig_list_merged = sig_list_merged,
319
+ path = path1,
320
+ cols_name_list = cols_name_list_1,
321
+ index=1,
322
+ sep=sep[0],
323
+ snplist = snplist,
324
+ label=label[0],
325
+ drop = drop,
326
+ scaled=scaled1,
327
+ log=log,
328
+ verbose = verbose)
329
+
330
+ ################# 17 update sumstats2
331
+ sig_list_merged = update_stats(sig_list_merged = sig_list_merged,
332
+ path = path2,
333
+ cols_name_list = cols_name_list_2,
334
+ index=2,
335
+ sep=sep[1],
336
+ snplist = snplist,
337
+ label=label[1],
338
+ drop = drop,
339
+ scaled=scaled2,
340
+ log=log,
341
+ verbose = verbose)
342
+
343
+ #if scaled1 ==True :
344
+ # log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
345
+ # sig_list_merged["P_1"] = np.power(10,-sig_list_merged["P_1"])
346
+ #if scaled2 ==True :
347
+ # log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
348
+ # sig_list_merged["P_2"] = np.power(10,-sig_list_merged["P_2"])
349
+
350
+ #################################################################################
351
+ sig_list_merged = assign_indicator(sig_list_merged, snplist, sig_level, scaled1, scaled2, log, verbose)
352
+
353
+ sig_list_merged = align_alleles(sig_list_merged, label, mode, eaf, log, verbose)
354
+
355
+ sig_list_merged = check_allele_match(sig_list_merged, allele_match, label, log,verbose)
356
+
357
+ sig_list_merged = filter_by_maf(sig_list_merged, eaf, maf_level, log, verbose)
358
+
359
+ if fdr==True and scaled==False:
360
+ log.write(" -Using FDR...", verbose=verbose)
361
+ #sig_list_merged["P_1"] = fdrcorrection(sig_list_merged["P_1"])[1]
362
+ #sig_list_merged["P_2"] = fdrcorrection(sig_list_merged["P_2"])[1]
363
+ sig_list_merged["P_1"] =ss.false_discovery_control(sig_list_merged["P_1"])
364
+ sig_list_merged["P_2"] =ss.false_discovery_control(sig_list_merged["P_2"])
109
365
 
110
- log.write("Start to process the raw sumstats for plotting...")
366
+ ####################################################################################################################################
367
+ ## winner's curse correction using aligned beta
368
+ sig_list_merged = winnerscurse_correction(sig_list_merged, mode, wc_correction, sig_level,scaled1, scaled2, log, verbose)
369
+
370
+ ########################## Het test############################################################
371
+ ## heterogeneity test
372
+ if (is_q == True):
373
+ log.write(" -Calculating Cochran's Q statistics and peform chisq test...", verbose=verbose)
374
+ if mode=="beta" or mode=="BETA" or mode=="Beta":
375
+ sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
376
+ else:
377
+ sig_list_merged = test_q(sig_list_merged,"BETA_1","SE_1","BETA_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
378
+
379
+ # heterogeneity summary
380
+ log.write(" -Significant het:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:]), verbose=verbose)
381
+ log.write(" -All sig:" ,len(sig_list_merged), verbose=verbose)
382
+ log.write(" -Het rate:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:])/len(sig_list_merged), verbose=verbose)
383
+
384
+ ######################### save ###############################################################
385
+ ## save the merged data
386
+ save_path = label[0]+"_"+label[1]+"_beta_sig_list_merged.tsv"
387
+ log.write(" -Saving the merged data to:",save_path, verbose=verbose)
388
+ sig_list_merged = reorder_columns(sig_list_merged)
389
+ sig_list_merged.to_csv(save_path,sep="\t")
390
+
391
+ # extract group
392
+ if include_all==True:
393
+ sum0 = sig_list_merged.loc[sig_list_merged["indicator"]==0,:].dropna(axis=0)
394
+ else:
395
+ sum0 = pd.DataFrame()
396
+
397
+ sum1only = sig_list_merged.loc[sig_list_merged["indicator"]==1,:].copy()
398
+ sum2only = sig_list_merged.loc[sig_list_merged["indicator"]==2,:].copy()
399
+ both = sig_list_merged.loc[sig_list_merged["indicator"]==3,:].copy()
400
+
401
+ if is_q==False:
402
+ sum0["Edge_color"]="none"
403
+ sum1only["Edge_color"]="none"
404
+ sum2only["Edge_color"]="none"
405
+ both["Edge_color"]="none"
406
+
407
+ log.write(" -Identified "+str(len(sum0)) + " variants which are not significant in " + label[3]+".", verbose=verbose)
408
+ log.write(" -Identified "+str(len(sum1only)) + " variants which are only significant in " + label[0]+".", verbose=verbose)
409
+ log.write(" -Identified "+str(len(sum2only)) + " variants which are only significant in " + label[1]+".", verbose=verbose)
410
+ log.write(" -Identified "+str(len(both)) + " variants which are significant in " + label[2] + ".", verbose=verbose)
411
+
412
+ ##plot########################################################################################
413
+ log.write("Creating the scatter plot for effect sizes comparison...", verbose=verbose)
414
+ #plt.style.use("ggplot")
415
+ sns.set_style("ticks")
416
+ fig,ax = plt.subplots(**plt_kwargs)
417
+ legend_elements=[]
418
+ if mode=="beta" or mode=="BETA" or mode=="Beta":
419
+ if len(sum0)>0:
420
+ ax.errorbar(sum0["EFFECT_1"],sum0["EFFECT_2_aligned"], xerr=sum0["SE_1"],yerr=sum0["SE_2"],
421
+ linewidth=0,zorder=1,**err_kwargs)
422
+
423
+ ax.scatter(sum0["EFFECT_1"],sum0["EFFECT_2_aligned"],label=label[3],zorder=2,color="#cccccc",edgecolors=sum0["Edge_color"],marker=".",**scatter_kwargs)
424
+ #legend_elements.append(mpatches.Circle(facecolor='#cccccc', edgecolor='white', label=label[3]))
425
+ legend_elements.append(label[3])
426
+ if len(sum1only)>0:
427
+ ax.errorbar(sum1only["EFFECT_1"],sum1only["EFFECT_2_aligned"], xerr=sum1only["SE_1"],yerr=sum1only["SE_2"],
428
+ linewidth=0,zorder=1,**err_kwargs)
429
+ ax.scatter(sum1only["EFFECT_1"],sum1only["EFFECT_2_aligned"],label=label[0],zorder=2,color="#e6320e",edgecolors=sum1only["Edge_color"],marker="^",**scatter_kwargs)
430
+ #legend_elements.append(mpatches.Patch(facecolor='#e6320e', edgecolor='white', label=label[0]))
431
+ legend_elements.append(label[0])
432
+ if len(sum2only)>0:
433
+ ax.errorbar(sum2only["EFFECT_1"],sum2only["EFFECT_2_aligned"], xerr=sum2only["SE_1"],yerr=sum2only["SE_2"],
434
+ linewidth=0,zorder=1,**err_kwargs)
435
+ ax.scatter(sum2only["EFFECT_1"],sum2only["EFFECT_2_aligned"],label=label[1],zorder=2,color="#41e620",edgecolors=sum2only["Edge_color"],marker="o",**scatter_kwargs)
436
+ #legend_elements.append(mpatches.Circle(facecolor='#41e620', edgecolor='white', label=label[1]))
437
+ legend_elements.append(label[1])
438
+ if len(both)>0:
439
+ ax.errorbar(both["EFFECT_1"],both["EFFECT_2_aligned"], xerr=both["SE_1"],yerr=both["SE_2"],
440
+ linewidth=0,zorder=1,**err_kwargs)
441
+ ax.scatter(both["EFFECT_1"],both["EFFECT_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**scatter_kwargs)
442
+ #legend_elements.append(mpatches.Patch(facecolor='#205be6', edgecolor='white', label=label[2]))
443
+ legend_elements.append(label[2])
444
+ else:
445
+ ## if OR
446
+ if len(sum0)>0:
447
+ ax.errorbar(sum0["OR_1"],sum0["OR_2_aligned"], xerr=sum0[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum0[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
448
+ linewidth=0,zorder=1,**err_kwargs)
449
+ ax.scatter(sum0["OR_1"],sum0["OR_2_aligned"],label=label[3],zorder=2,color="#cccccc",edgecolors=sum0["Edge_color"],marker=".",**scatter_kwargs)
450
+ legend_elements.append(label[3])
451
+ if len(sum1only)>0:
452
+ ax.errorbar(sum1only["OR_1"],sum1only["OR_2_aligned"], xerr=sum1only[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum1only[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
453
+ linewidth=0,zorder=1,**err_kwargs)
454
+ ax.scatter(sum1only["OR_1"],sum1only["OR_2_aligned"],label=label[0],zorder=2,color="#e6320e",edgecolors=sum1only["Edge_color"],marker="^",**scatter_kwargs)
455
+ legend_elements.append(label[0])
456
+ if len(sum2only)>0:
457
+ ax.errorbar(sum2only["OR_1"],sum2only["OR_2_aligned"], xerr=sum2only[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum2only[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
458
+ linewidth=0,zorder=1,**err_kwargs)
459
+ ax.scatter(sum2only["OR_1"],sum2only["OR_2_aligned"],label=label[1],zorder=2,color="#41e620",edgecolors=sum2only["Edge_color"],marker="o",**scatter_kwargs)
460
+ legend_elements.append(label[1])
461
+ if len(both)>0:
462
+ ax.errorbar(both["OR_1"],both["OR_2_aligned"], xerr=both[["OR_L_1_err","OR_H_1_err"]].T,yerr=both[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
463
+ linewidth=0,zorder=1,**err_kwargs)
464
+ ax.scatter(both["OR_1"],both["OR_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**scatter_kwargs)
465
+ legend_elements.append(label[2])
466
+ ## annotation #################################################################################################################
467
+ ax = scatter_annotation(ax, sig_list_merged,anno, anno_het, is_q, mode,
468
+ anno_min,anno_min1,anno_min2,anno_diff,anno_kwargs,adjust_text_kwargs_l,adjust_text_kwargs_r,
469
+ log,verbose
470
+ )
471
+ #################################################################################################################################
472
+
473
+ # plot x=0,y=0, and a 45 degree line
474
+ xl,xh=ax.get_xlim()
475
+ yl,yh=ax.get_ylim()
476
+
477
+ if mode=="beta" or mode=="BETA" or mode=="Beta":
478
+ #if using beta
479
+ ax.axhline(y=0, zorder=1,**helper_line_args)
480
+ ax.axvline(x=0, zorder=1,**helper_line_args)
481
+ else:
482
+ #if using OR
483
+ ax.axhline(y=1, zorder=1,**helper_line_args)
484
+ ax.axvline(x=1, zorder=1,**helper_line_args)
485
+
486
+ for spine in ['top', 'right']:
487
+ ax.spines[spine].set_visible(False)
488
+
489
+ ###regression line##############################################################################################################################
490
+ ax = confire_regression_line(is_reg,reg_box, sig_list_merged, ax, mode,xl,yl,xh,yh, null_beta, r_se,
491
+ is_45_helper_line,helper_line_args, font_kwargs,
492
+ log, verbose)
493
+
494
+
495
+ ax.set_xlabel(xylabel_prefix+label[0],**font_kwargs)
496
+ ax.set_ylabel(xylabel_prefix+label[1],**font_kwargs)
497
+
498
+ ax = configure_legend(fig, ax, legend_mode, is_q, is_q_mc, legend_elements, legend_pos, q_level,
499
+ font_kwargs,scatterargs,legend_args,
500
+ legend_title, legend_title2 )
501
+ ##plot finished########################################################################################
502
+ gc.collect()
503
+
504
+ save_figure(fig, save, keyword="esc",save_args=save_kwargs, log=log, verbose=verbose)
505
+
506
+ sig_list_merged = reorder_columns(sig_list_merged)
507
+
508
+ return [sig_list_merged, fig,log]
509
+
510
+ ###############################################################################################
511
+ ###############################################################################################
512
+ ###############################################################################################
513
+ ###############################################################################################
514
+ ###############################################################################################
515
+ ###############################################################################################
516
+ ###############################################################################################
517
+ ###############################################################################################
518
+ ###############################################################################################
519
+ ###############################################################################################
520
+ ###############################################################################################
521
+ ###############################################################################################
522
+ ###############################################################################################
523
+
524
+ def load_sumstats(path, usecols, label, log, verbose, sep):
525
+ if type(usecols) is not list:
526
+ usecols = [usecols]
527
+
528
+ log.write(" -Loading sumstats for {} : {}".format(label,",".join(usecols)), verbose=verbose)
529
+ #log.write(" -Loading {} SNP list in memory...".format(label), verbose=verbose)
530
+
531
+ if type(path) is Sumstats:
532
+ sumstats = path.data.loc[:,usecols].copy()
533
+ elif type(path) is pd.DataFrame:
534
+ sumstats = path.loc[:,usecols].copy()
535
+ else:
536
+ sumstats=pd.read_table(path,sep=sep,usecols=usecols)
537
+ return sumstats
538
+
539
+ def configure_headers(mode,
540
+ path1,
541
+ path2,
542
+ cols_name_list_1,
543
+ cols_name_list_2,
544
+ effect_cols_list_1,
545
+ effect_cols_list_2,
546
+ scaled1,
547
+ scaled2,
548
+ log,
549
+ verbose):
111
550
 
112
- ######### 1 check the value used to plot
113
551
  if mode not in ["Beta","beta","BETA","OR","or"]:
114
552
  raise ValueError("Please input Beta or OR")
115
553
 
116
554
  if type(path1) is Sumstats:
117
- log.write("Path1 is gwaslab Sumstats object...")
555
+ log.write("Path1 is gwaslab Sumstats object...", verbose=verbose)
118
556
  if cols_name_list_1 is None:
119
557
  cols_name_list_1 = ["SNPID","P","EA","NEA","CHR","POS"]
558
+ if scaled1==True:
559
+ cols_name_list_1 = ["SNPID","MLOG10P","EA","NEA","CHR","POS"]
120
560
  if effect_cols_list_1 is None:
121
561
  if mode=="beta":
122
562
  effect_cols_list_1 = ["BETA","SE"]
123
563
  else:
124
564
  effect_cols_list_1 = ["OR","OR_95L","OR_95U"]
125
565
  elif type(path1) is pd.DataFrame:
126
- log.write("Path1 is pandas DataFrame object...")
566
+ log.write("Path1 is pandas DataFrame object...", verbose=verbose)
127
567
 
128
568
  if type(path2) is Sumstats:
129
- log.write("Path2 is gwaslab Sumstats object...")
569
+ log.write("Path2 is gwaslab Sumstats object...", verbose=verbose)
130
570
  if cols_name_list_2 is None:
131
571
  cols_name_list_2 = ["SNPID","P","EA","NEA","CHR","POS"]
572
+ if scaled2==True:
573
+ cols_name_list_2 = ["SNPID","MLOG10P","EA","NEA","CHR","POS"]
132
574
  if effect_cols_list_2 is None:
133
575
  if mode=="beta":
134
576
  effect_cols_list_2 = ["BETA","SE"]
135
577
  else:
136
578
  effect_cols_list_2 = ["OR","OR_95L","OR_95U"]
137
579
  elif type(path2) is pd.DataFrame:
138
- log.write("Path2 is pandas DataFrame object...")
580
+ log.write("Path2 is pandas DataFrame object...", verbose=verbose)
581
+
582
+ return cols_name_list_1,cols_name_list_2, effect_cols_list_1, effect_cols_list_2
583
+
584
+ def configure_common_snp_set(path1,path2,
585
+ snplist,
586
+ label,
587
+ cols_name_list_1,cols_name_list_2,
588
+ sep,
589
+ scaled1,
590
+ scaled2,
591
+ log,verbose):
139
592
 
140
- ######### 2 extract snplist2
141
- log.write(" -Loading "+label[1]+" SNP list in memory...")
593
+ ######### load sumstats2
594
+ sumstats = load_sumstats(path=path2,
595
+ usecols=cols_name_list_2[0],
596
+ label=label[1],
597
+ log=log,
598
+ verbose= verbose,
599
+ sep=sep[1])
142
600
 
143
- if type(path2) is Sumstats:
144
- sumstats = path2.data[[cols_name_list_2[0]]].copy()
145
- elif type(path2) is pd.DataFrame:
146
- sumstats = path2[[cols_name_list_2[0]]].copy()
147
- else:
148
- sumstats=pd.read_table(path2,sep=sep[1],usecols=[cols_name_list_2[0]])
149
-
150
601
  common_snp_set=set(sumstats[cols_name_list_2[0]].values)
151
602
 
152
- ######### 3 extract snplist1
603
+ ######### extract snplist1
153
604
  if snplist is not None:
605
+ #use only SNPID, P
154
606
  cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1]]
155
607
  else:
608
+ # use SNPID, P, chr pos
156
609
  cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1],cols_name_list_1[4],cols_name_list_1[5]]
157
610
 
158
- ######### 4 load sumstats1
159
- log.write(" -Loading sumstats for "+label[0]+":",",".join(cols_to_extract))
160
-
161
- if type(path1) is Sumstats:
162
- sumstats = path1.data[cols_to_extract].copy()
163
- elif type(path1) is pd.DataFrame:
164
- sumstats = path1[cols_to_extract].copy()
165
- else:
166
- sumstats = pd.read_table(path1,sep=sep[0],usecols=cols_to_extract)
611
+ ######### load sumstats1
612
+ sumstats = load_sumstats(path=path1,
613
+ usecols=cols_to_extract,
614
+ label=label[0],
615
+ log=log,
616
+ verbose= verbose,
617
+ sep=sep[0])
167
618
 
168
619
  gc.collect()
169
620
 
170
- if scaled1==True:
171
- sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
621
+ #if scaled1==True:
622
+ # sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
172
623
  ######### 5 extract the common set
624
+
173
625
  common_snp_set = common_snp_set.intersection(sumstats[cols_name_list_1[0]].values)
174
- log.write(" -Counting variants available for both datasets:",len(common_snp_set)," variants...")
175
626
 
627
+ log.write(" -Counting variants available for both datasets:",len(common_snp_set)," variants...", verbose=verbose)
628
+
629
+ return sumstats, common_snp_set
630
+
631
+ def rename_sumtats(sumstats, cols_name_list, snplist, scaled,suffix=""):
176
632
  ######### 6 rename the sumstats
177
- rename_dict = { cols_name_list_1[0]:"SNPID",
178
- cols_name_list_1[1]:"P",
633
+ rename_dict = { cols_name_list[0]:"SNPID",
634
+ cols_name_list[1]:"P{}".format(suffix),
179
635
  }
636
+ if scaled==True:
637
+ rename_dict[cols_name_list[1]] = "MLOG10P{}".format(suffix)
180
638
 
181
639
  if snplist is None:
182
- rename_dict[cols_name_list_1[4]]="CHR"
183
- rename_dict[cols_name_list_1[5]]="POS"
184
-
185
- sumstats.rename(columns=rename_dict,inplace=True)
640
+ rename_dict[cols_name_list[4]]="CHR"
641
+ rename_dict[cols_name_list[5]]="POS"
186
642
 
187
- ######### 7 exctract only available variants from sumstats1
188
- sumstats = sumstats.loc[sumstats["SNPID"].isin(common_snp_set),:]
189
-
190
- log.write(" -Using only variants available for both datasets...")
643
+ sumstats = sumstats.rename(columns=rename_dict)
644
+ return sumstats
645
+
646
+
647
+ def extract_snp_for_comparison(sumstats, snplist, label,
648
+ get_lead_args, build, drop, anno,
649
+ sig_level,scaled, log, verbose):
191
650
  ######### 8 extact SNPs for comparison
192
-
193
651
  if snplist is not None:
194
652
  ######### 8.1 if a snplist is provided, use the snp list
195
- log.write(" -Extract variants in the given list from "+label[0]+"...")
196
- sig_list_1 = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
653
+ log.write(" -Extract variants in the given list from "+label+"...")
654
+ sig_list = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
197
655
  if anno=="GENENAME":
198
- sig_list_1 = annogene(sumstats,"SNPID","CHR","POS", build=build, verbose=verbose,**get_lead_args)
656
+ sig_list = annogene(sig_list,"SNPID","CHR","POS", build=build, verbose=verbose, **get_lead_args)
199
657
  else:
200
658
  ######### 8,2 otherwise use the automatically detected lead SNPs
201
- log.write(" -Extract lead variants from "+label[0]+"...")
202
- sig_list_1 = getsig(sumstats,"SNPID","CHR","POS","P", build=build, verbose=verbose,sig_level=sig_level,**get_lead_args)
659
+ log.write(" -Extract lead variants from "+label +"...", verbose=verbose)
660
+ sig_list = getsig(sumstats,"SNPID","CHR","POS","P","MLOG10P", build=build, verbose=verbose,sig_level=sig_level,**get_lead_args)
203
661
 
204
662
  if drop==True:
205
- sig_list_1 = drop_duplicate_and_na(sig_list_1, sort_by="P", log=log ,verbose=verbose)
663
+ if scaled==True:
664
+ sig_list = drop_duplicate_and_na(sig_list, sort_by="MLOG10P",ascending=False, log=log , verbose=verbose)
665
+ else:
666
+ sig_list = drop_duplicate_and_na(sig_list, sort_by="P", ascending=True, log=log , verbose=verbose)
206
667
 
207
- ######### 9 extract snplist2
208
- if snplist is not None:
209
- cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1]]
210
- else:
211
- cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1],cols_name_list_2[4],cols_name_list_2[5]]
212
-
213
- log.write(" -Loading sumstats for "+label[1]+":",",".join(cols_to_extract))
214
-
215
- if type(path2) is Sumstats:
216
- sumstats = path2.data[cols_to_extract].copy()
217
- elif type(path2) is pd.DataFrame:
218
- sumstats = path2[cols_to_extract].copy()
219
- else:
220
- sumstats = pd.read_table(path2,sep=sep[1],usecols=cols_to_extract)
221
-
222
- gc.collect()
223
-
224
- if scaled2==True:
225
- sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
226
- ######### 10 rename sumstats2
227
- rename_dict = { cols_name_list_2[0]:"SNPID",
228
- cols_name_list_2[1]:"P",
229
- }
230
- if snplist is None:
231
- rename_dict[cols_name_list_2[4]]="CHR"
232
- rename_dict[cols_name_list_2[5]]="POS"
233
- sumstats.rename(columns=rename_dict,inplace=True)
668
+ return sig_list
669
+
670
+ def merge_list(sig_list_1, sig_list_2, anno,labels,log, verbose):
234
671
 
235
- ######### 11 exctract only overlapping variants from sumstats2
236
- sumstats = sumstats.loc[sumstats["SNPID"].isin(common_snp_set),:]
672
+ log.write("Merging snps from "+labels[0]+" and "+labels[1]+"...", verbose=verbose)
237
673
 
238
- ######## 12 extact SNPs for comparison
239
- if snplist is not None:
240
- ######### 12.1 if a snplist is provided, use the snp list
241
- log.write(" -Extract snps in the given list from "+label[1]+"...")
242
- sig_list_2 = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
243
- if anno=="GENENAME":
244
- sig_list_2 = annogene(sumstats,"SNPID","CHR","POS", build=build, verbose=verbose,**get_lead_args)
245
- else:
246
- log.write(" -Extract lead snps from "+label[1]+"...")
247
- ######### 12.2 otherwise use the sutomatically detected lead SNPs
248
- sig_list_2 = getsig(sumstats,"SNPID","CHR","POS","P",build=build,
249
- verbose=verbose,sig_level=sig_level,**get_lead_args)
250
- if drop==True:
251
- sig_list_2 = drop_duplicate_and_na(sig_list_2, sort_by="P", log=log ,verbose=verbose)
674
+ if anno == "GENENAME":
675
+ if "GENE" not in sig_list_1.columns:
676
+ sig_list_1["GENE"]=pd.NA
677
+ sig_list_1["LOCATION"]=pd.NA
678
+ if "GENE" not in sig_list_2.columns:
679
+ sig_list_2["GENE"]=pd.NA
680
+ sig_list_2["LOCATION"]=pd.NA
252
681
 
253
- ######### 13 Merge two list using SNPID
254
- ##############################################################################
255
- log.write("Merging snps from "+label[0]+" and "+label[1]+"...")
256
-
257
682
  sig_list_merged = pd.merge(sig_list_1,sig_list_2,left_on="SNPID",right_on="SNPID",how="outer",suffixes=('_1', '_2'))
683
+
258
684
  if anno == "GENENAME":
259
685
  sig_list_merged.loc[sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENENAME"] = sig_list_merged.loc[sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENE_1"]
260
686
  sig_list_merged.loc[~sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENENAME"] = sig_list_merged.loc[~sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENE_2"]
@@ -262,173 +688,109 @@ def compare_effect(path1,
262
688
  # SNPID P_1 P_2
263
689
  #0 rs117986209 0.142569 0.394455
264
690
  #1 rs6704312 0.652104 0.143750
691
+ return sig_list_merged
265
692
 
266
- ###############################################################################
267
-
268
- ########## 14 Merging sumstats1
693
+ def configure_cols_to_extract(mode,
694
+ cols_name_list,
695
+ effect_cols_list,
696
+ eaf):
269
697
 
270
698
  if mode=="beta" or mode=="BETA" or mode=="Beta":
271
- #[snpid,p,ea,nea] ,[effect,se]
699
+ #[snpid,p,ea,nea] ,[effect,se]
272
700
  #[snpid,p,ea,nea,chr,pos],[effect,se]
273
701
  #[snpid,p,ea,nea,chr,pos],[OR,OR_l,OR_h]
274
- cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1], cols_name_list_1[2],cols_name_list_1[3], effect_cols_list_1[0], effect_cols_list_1[1]]
702
+ cols_to_extract = [cols_name_list[0],cols_name_list[1], cols_name_list[2],cols_name_list[3], effect_cols_list[0], effect_cols_list[1]]
275
703
  else:
276
- cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1], cols_name_list_1[2],cols_name_list_1[3], effect_cols_list_1[0], effect_cols_list_1[1], effect_cols_list_1[2]]
277
-
278
- if len(eaf)>0: cols_to_extract.append(eaf[0])
279
- log.write(" -Extract statistics of selected variants from "+label[0]+" : ",",".join(cols_to_extract) )
704
+ cols_to_extract = [cols_name_list[0],cols_name_list[1], cols_name_list[2],cols_name_list[3], effect_cols_list[0], effect_cols_list[1], effect_cols_list[2]]
280
705
 
281
- if type(path1) is Sumstats:
282
- sumstats = path1.data[cols_to_extract].copy()
283
- elif type(path1) is pd.DataFrame:
284
- sumstats = path1[cols_to_extract].copy()
285
- else:
286
- sumstats = pd.read_table(path1,sep=sep[0],usecols=cols_to_extract)
706
+ if len(eaf)>0:
707
+ cols_to_extract.append(eaf[0])
287
708
 
288
- if scaled1==True:
289
- sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
709
+ return cols_to_extract
290
710
 
711
+ def rename_sumstats_full(mode, sumstats, cols_name_list, effect_cols_list, eaf, drop, index, scaled, log, verbose):
291
712
  if mode=="beta" or mode=="BETA" or mode=="Beta":
292
- rename_dict = { cols_name_list_1[0]:"SNPID",
293
- cols_name_list_1[1]:"P_1",
294
- cols_name_list_1[2]:"EA_1",
295
- cols_name_list_1[3]:"NEA_1",
296
- effect_cols_list_1[0]:"EFFECT_1",
297
- effect_cols_list_1[1]:"SE_1",
298
- }
713
+ rename_dict = { cols_name_list[0]:"SNPID",
714
+ cols_name_list[1]:"P_{}".format(index),
715
+ cols_name_list[2]:"EA_{}".format(index),
716
+ cols_name_list[3]:"NEA_{}".format(index),
717
+ effect_cols_list[0]:"EFFECT_{}".format(index),
718
+ effect_cols_list[1]:"SE_{}".format(index)}
719
+
299
720
 
300
721
  else:
301
722
  # if or
302
- rename_dict = { cols_name_list_1[0]:"SNPID",
303
- cols_name_list_1[1]:"P_1",
304
- cols_name_list_1[2]:"EA_1",
305
- cols_name_list_1[3]:"NEA_1",
306
- effect_cols_list_1[0]:"OR_1",
307
- effect_cols_list_1[1]:"OR_L_1",
308
- effect_cols_list_1[2]:"OR_H_1"
309
- }
723
+ rename_dict = { cols_name_list[0]:"SNPID",
724
+ cols_name_list[1]:"P_{}".format(index),
725
+ cols_name_list[2]:"EA_{}".format(index),
726
+ cols_name_list[3]:"NEA_{}".format(index),
727
+ effect_cols_list[0]:"OR_{}".format(index),
728
+ effect_cols_list[1]:"OR_L_{}".format(index),
729
+ effect_cols_list[2]:"OR_H_{}".format(index)}
730
+ if scaled==True:
731
+ rename_dict[cols_name_list[1]]="MLOG10P_{}".format(index)
310
732
  ## check if eaf column is provided.
311
- if len(eaf)>0: rename_dict[eaf[0]]="EAF_1"
312
- sumstats.rename(columns=rename_dict, inplace=True)
733
+ if len(eaf)>0:
734
+ rename_dict[eaf[index-1]]="EAF_{}".format(index)
735
+ sumstats = sumstats.rename(columns=rename_dict)
313
736
 
314
737
  # drop na and duplicate
315
738
  if drop==True:
316
- sumstats = drop_duplicate_and_na(sumstats, sort_by="P_1", log=log , verbose=verbose)
317
- sumstats.drop("P_1",axis=1,inplace=True)
318
-
319
- log.write(" -Merging "+label[0]+" effect information...", verbose=verbose)
320
-
321
- sig_list_merged = pd.merge(sig_list_merged,sumstats,
322
- left_on="SNPID",right_on="SNPID",
323
- how="left")
739
+ if scaled==True:
740
+ sumstats = drop_duplicate_and_na(sumstats, sort_by="MLOG10P_{}".format(index),ascending=False, log=log , verbose=verbose)
741
+ else:
742
+ sumstats = drop_duplicate_and_na(sumstats, sort_by="P_{}".format(index), ascending=True, log=log , verbose=verbose)
324
743
 
325
- ############ 15 merging sumstats2
326
-
327
- if mode=="beta" or mode=="BETA" or mode=="Beta":
328
- cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1],cols_name_list_2[2],cols_name_list_2[3], effect_cols_list_2[0], effect_cols_list_2[1]]
744
+ if scaled==True:
745
+ sumstats.drop("MLOG10P_{}".format(index),axis=1,inplace=True)
329
746
  else:
330
- # if or
331
- cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1],cols_name_list_2[2],cols_name_list_2[3], effect_cols_list_2[0], effect_cols_list_2[1], effect_cols_list_2[2]]
332
- ## check if eaf column is provided.
333
- if len(eaf)>0: cols_to_extract.append(eaf[1])
747
+ sumstats.drop("P_{}".format(index),axis=1,inplace=True)
748
+ return sumstats
334
749
 
335
- log.write(" -Extract statistics of selected variants from "+label[1]+" : ",",".join(cols_to_extract), verbose=verbose )
336
- if type(path2) is Sumstats:
337
- sumstats = path2.data[cols_to_extract].copy()
338
- elif type(path2) is pd.DataFrame:
339
- sumstats = path2[cols_to_extract].copy()
340
- else:
341
- sumstats = pd.read_table(path2,sep=sep[1],usecols=cols_to_extract)
750
+ def update_stats(sig_list_merged,
751
+ path,
752
+ cols_name_list,
753
+ sep,
754
+ snplist,
755
+ label,
756
+ drop,
757
+ index,
758
+ scaled,
759
+ log,
760
+ verbose):
342
761
 
343
- if scaled2==True:
344
- sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
762
+ log.write(" -Updating missing information for "+label+" ...", verbose=verbose)
763
+ cols_to_extract = [cols_name_list[0], cols_name_list[1]]
345
764
 
346
- gc.collect()
765
+ sumstats = load_sumstats(path=path,
766
+ usecols=cols_to_extract,
767
+ label=label,
768
+ log=log,
769
+ verbose= verbose,
770
+ sep=sep)
771
+ #if scaled1==True:
772
+ # sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
347
773
 
348
- if mode=="beta" or mode=="BETA" or mode=="Beta":
349
- rename_dict = { cols_name_list_2[0]:"SNPID",
350
- cols_name_list_2[1]:"P_2",
351
- cols_name_list_2[2]:"EA_2",
352
- cols_name_list_2[3]:"NEA_2",
353
- effect_cols_list_2[0]:"EFFECT_2",
354
- effect_cols_list_2[1]:"SE_2",
355
- }
356
- else:
357
- rename_dict = { cols_name_list_2[0]:"SNPID",
358
- cols_name_list_2[1]:"P_2",
359
- cols_name_list_2[2]:"EA_2",
360
- cols_name_list_2[3]:"NEA_2",
361
- effect_cols_list_2[0]:"OR_2",
362
- effect_cols_list_2[1]:"OR_L_2",
363
- effect_cols_list_2[2]:"OR_H_2"
364
- }
365
- if len(eaf)>0: rename_dict[eaf[1]]="EAF_2"
366
- sumstats.rename(columns=rename_dict, inplace=True)
774
+ sumstats = rename_sumtats(sumstats = sumstats,
775
+ cols_name_list = cols_name_list,
776
+ snplist = snplist,
777
+ scaled=scaled,
778
+ suffix="_{}".format(index))
367
779
  # drop na and duplicate
368
780
  if drop==True:
369
- sumstats = drop_duplicate_and_na(sumstats, sort_by="P_2", log=log, verbose=verbose)
370
- sumstats.drop("P_2",axis=1,inplace=True)
371
-
372
- log.write(" -Merging "+label[1]+" effect information...", verbose=verbose)
373
- sig_list_merged = pd.merge(sig_list_merged,sumstats,
374
- left_on="SNPID",right_on="SNPID",
375
- how="left")
376
-
377
- sig_list_merged.set_index("SNPID",inplace=True)
781
+ if scaled==True:
782
+ sumstats = drop_duplicate_and_na(sumstats, sort_by="MLOG10P_{}".format(index),ascending=False, log=log , verbose=verbose)
783
+ else:
784
+ sumstats = drop_duplicate_and_na(sumstats, sort_by="P_{}".format(index), ascending=True, log=log , verbose=verbose)
378
785
 
379
- ################ 16 update sumstats1
380
- log.write(" -Updating missing information for "+label[0]+" ...", verbose=verbose)
381
- if type(path1) is Sumstats:
382
- sumstats = path1.data[[cols_name_list_1[0],cols_name_list_1[1]]].copy()
383
- elif type(path1) is pd.DataFrame:
384
- sumstats = path1[[cols_name_list_1[0],cols_name_list_1[1]]].copy()
385
- else:
386
- sumstats = pd.read_table(path1,sep=sep[0],usecols=[cols_name_list_1[0],cols_name_list_1[1]])
387
- if scaled1==True:
388
- sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
389
- sumstats.rename(columns={
390
- cols_name_list_1[0]:"SNPID",
391
- cols_name_list_1[1]:"P_1"
392
- },
393
- inplace=True)
394
- # drop na and duplicate
395
- if drop==True:
396
- sumstats = drop_duplicate_and_na(sumstats, sort_by="P_1", log=log, verbose=verbose)
397
786
 
398
- sumstats.set_index("SNPID",inplace=True)
787
+ sumstats = sumstats.set_index("SNPID")
399
788
  sig_list_merged.update(sumstats)
400
-
401
- ################# 17 update sumstats2
402
- log.write(" -Updating missing information for "+label[1]+" ...", verbose=verbose)
403
- if type(path2) is Sumstats:
404
- sumstats = path2.data[[cols_name_list_2[0],cols_name_list_2[1]]].copy()
405
- elif type(path2) is pd.DataFrame:
406
- sumstats = path2[[cols_name_list_2[0],cols_name_list_2[1]]].copy()
407
- else:
408
- sumstats = pd.read_table(path2,sep=sep[1],usecols=[cols_name_list_2[0],cols_name_list_2[1]])
409
789
 
410
- if scaled2==True:
411
- sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
412
- sumstats.rename(columns={
413
- cols_name_list_2[0]:"SNPID",
414
- cols_name_list_2[1]:"P_2"
415
- },
416
- inplace=True)
417
- # drop na and duplicate
418
- if drop==True:
419
- sumstats = drop_duplicate_and_na(sumstats, sort_by="P_2", log=log, verbose=verbose)
420
-
421
- sumstats.set_index("SNPID",inplace=True)
422
- sig_list_merged.update(sumstats)
790
+ return sig_list_merged
423
791
 
424
- if scaled1 ==True :
425
- log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
426
- sig_list_merged["P_1"] = np.power(10,-sig_list_merged["P_1"])
427
- if scaled2 ==True :
428
- log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
429
- sig_list_merged["P_2"] = np.power(10,-sig_list_merged["P_2"])
430
- ####
431
- #################################################################################
792
+
793
+ def assign_indicator(sig_list_merged, snplist, sig_level, scaled1, scaled2, log, verbose):
432
794
  ############## 18 init indicator
433
795
  log.write(" -Assigning indicator ...", verbose=verbose)
434
796
  # 0-> 0
@@ -436,14 +798,24 @@ def compare_effect(path1,
436
798
  # 2 -> sig in sumsatts2
437
799
  # 3-> sig in both sumstats1 + sumstats2
438
800
  sig_list_merged["indicator"] = 0
439
- sig_list_merged.loc[sig_list_merged["P_1"]<sig_level,"indicator"]=1+sig_list_merged.loc[sig_list_merged["P_1"]<sig_level,"indicator"]
440
- sig_list_merged.loc[sig_list_merged["P_2"]<sig_level,"indicator"]=2+sig_list_merged.loc[sig_list_merged["P_2"]<sig_level,"indicator"]
801
+
802
+ if scaled1==True:
803
+ sig_list_merged.loc[sig_list_merged["MLOG10P_1"]>-np.log10(sig_level),"indicator"]=1+sig_list_merged.loc[sig_list_merged["MLOG10P_1"]>-np.log10(sig_level),"indicator"]
804
+ else:
805
+ sig_list_merged.loc[sig_list_merged["P_1"]<sig_level,"indicator"]=1+sig_list_merged.loc[sig_list_merged["P_1"]<sig_level,"indicator"]
806
+
807
+ if scaled2==True:
808
+ sig_list_merged.loc[sig_list_merged["MLOG10P_2"]>-np.log10(sig_level),"indicator"]=2+sig_list_merged.loc[sig_list_merged["MLOG10P_2"]>-np.log10(sig_level),"indicator"]
809
+ else:
810
+ sig_list_merged.loc[sig_list_merged["P_2"]<sig_level,"indicator"]=2+sig_list_merged.loc[sig_list_merged["P_2"]<sig_level,"indicator"]
441
811
 
442
812
  if snplist is None:
443
813
  sig_list_merged["CHR"]=np.max(sig_list_merged[["CHR_1","CHR_2"]], axis=1).astype(int)
444
814
  sig_list_merged["POS"]=np.max(sig_list_merged[["POS_1","POS_2"]], axis=1).astype(int)
445
815
  sig_list_merged.drop(labels=['CHR_1', 'CHR_2','POS_1', 'POS_2'], axis=1,inplace=True)
446
-
816
+ return sig_list_merged
817
+
818
+ def align_alleles(sig_list_merged, label,mode,eaf, log, verbose):
447
819
  log.write(" -Aligning "+label[1]+" EA with "+label[0]+" EA ...", verbose=verbose)
448
820
  ############### 19 align allele effect with sumstats 1
449
821
  sig_list_merged["EA_1"]=sig_list_merged["EA_1"].astype("string")
@@ -489,7 +861,19 @@ def compare_effect(path1,
489
861
  # flip eaf
490
862
  sig_list_merged["EAF_2_aligned"]=sig_list_merged["EAF_2"]
491
863
  sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EAF_2_aligned"]= 1 -sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EAF_2"]
492
-
864
+ return sig_list_merged
865
+
866
+ #########################################################################################################################
867
+ #########################################################################################################################
868
+ #########################################################################################################################
869
+ #########################################################################################################################
870
+ #########################################################################################################################
871
+ #########################################################################################################################
872
+ #########################################################################################################################
873
+ #########################################################################################################################
874
+ #########################################################################################################################
875
+
876
+ def check_allele_match(sig_list_merged, allele_match, label, log,verbose):
493
877
  # checking effect allele matching
494
878
  nonmatch = np.nansum(sig_list_merged["EA_1"] != sig_list_merged["EA_2_aligned"])
495
879
  log.write(" -Aligned all EAs in {} with EAs in {} ...".format(label[1],label[0]), verbose=verbose)
@@ -500,16 +884,19 @@ def compare_effect(path1,
500
884
  sig_list_merged = sig_list_merged.loc[sig_list_merged["EA_1"] == sig_list_merged["EA_2_aligned"]]
501
885
  else:
502
886
  log.write(" -No variants with EA not matching...", verbose=verbose)
503
- if fdr==True:
504
- log.write(" -Using FDR...", verbose=verbose)
505
- #sig_list_merged["P_1"] = fdrcorrection(sig_list_merged["P_1"])[1]
506
- #sig_list_merged["P_2"] = fdrcorrection(sig_list_merged["P_2"])[1]
507
- sig_list_merged["P_1"] =ss.false_discovery_control(sig_list_merged["P_1"])
508
- sig_list_merged["P_2"] =ss.false_discovery_control(sig_list_merged["P_2"])
887
+ return sig_list_merged
509
888
 
510
- ####################################################################################################################################
511
- ## winner's curse correction using aligned beta
889
+ def winnerscurse_correction(sig_list_merged, mode, wc_correction, sig_level, scaled1, scaled2, log, verbose):
512
890
  if mode=="beta":
891
+ if scaled1==True:
892
+ match1= sig_list_merged["MLOG10P_1"]>-np.log10(sig_level)
893
+ else:
894
+ match1 = sig_list_merged["P_1"]<sig_level
895
+ if scaled2==True:
896
+ match2= sig_list_merged["MLOG10P_2"]>-np.log10(sig_level)
897
+ else:
898
+ match2 = sig_list_merged["P_2"]<sig_level
899
+
513
900
  if wc_correction == "all":
514
901
  log.write(" -Correcting BETA for winner's curse with threshold at {} for all variants...".format(sig_level), verbose=verbose)
515
902
  sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
@@ -522,128 +909,139 @@ def compare_effect(path1,
522
909
  sig_list_merged["EFFECT_2_aligned"] = sig_list_merged[["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
523
910
 
524
911
  elif wc_correction == "sig" :
912
+
525
913
  log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants...".format(sig_level), verbose=verbose)
526
914
  sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
527
915
  sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
528
- log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(sig_list_merged["P_1"]<sig_level)), verbose=verbose)
529
- sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, "EFFECT_1"] = sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
530
- log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(sig_list_merged["P_2"]<sig_level)), verbose=verbose)
531
- sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, "EFFECT_2_aligned"] = sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
916
+ log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(match1)), verbose=verbose)
917
+ sig_list_merged.loc[match1, "EFFECT_1"] = sig_list_merged.loc[match1, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
918
+ log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(match2)), verbose=verbose)
919
+ sig_list_merged.loc[match2, "EFFECT_2_aligned"] = sig_list_merged.loc[match2, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
532
920
 
533
921
  elif wc_correction == "sumstats1" :
534
922
  log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats1...".format(sig_level), verbose=verbose)
535
923
  sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
536
- log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(sig_list_merged["P_1"]<sig_level)), verbose=verbose)
537
- sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, "EFFECT_1"] = sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
924
+ log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(match1)), verbose=verbose)
925
+ sig_list_merged.loc[match1, "EFFECT_1"] = sig_list_merged.loc[match1, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
538
926
 
539
927
  elif wc_correction == "sumstats2" :
540
928
  log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats2...".format(sig_level), verbose=verbose)
541
929
  sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
542
- log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(sig_list_merged["P_2"]<sig_level)), verbose=verbose)
543
- sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, "EFFECT_2_aligned"] = sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
930
+ log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(match2)), verbose=verbose)
931
+ sig_list_merged.loc[match2, "EFFECT_2_aligned"] = sig_list_merged.loc[match2, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
932
+ return sig_list_merged
544
933
 
545
- ########################## Het test############################################################
546
- ## heterogeneity test
547
- if (is_q == True):
548
- log.write(" -Calculating Cochran's Q statistics and peform chisq test...", verbose=verbose)
549
- if mode=="beta" or mode=="BETA" or mode=="Beta":
550
- sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
551
- else:
552
- sig_list_merged = test_q(sig_list_merged,"BETA_1","SE_1","BETA_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
553
-
554
- ######################### save ###############################################################
555
- ## save the merged data
556
- save_path = label[0]+"_"+label[1]+"_beta_sig_list_merged.tsv"
557
- log.write(" -Saving the merged data to:",save_path, verbose=verbose)
558
- sig_list_merged.to_csv(save_path,"\t")
559
-
560
- ########################## maf_threshold#############################################################
934
+ def filter_by_maf(sig_list_merged, eaf, maf_level, log, verbose):
561
935
  if (len(eaf)>0) and (maf_level is not None):
562
936
  both_eaf_clear = (sig_list_merged["EAF_1"]>maf_level)&(sig_list_merged["EAF_1"]<1-maf_level)&(sig_list_merged["EAF_2"]>maf_level)&(sig_list_merged["EAF_2"]<1-maf_level)
563
937
  log.write(" -Exclude "+str(len(sig_list_merged) -sum(both_eaf_clear))+ " variants with maf <",maf_level, verbose=verbose)
564
938
  sig_list_merged = sig_list_merged.loc[both_eaf_clear,:]
565
- # heterogeneity summary
566
- if (is_q == True):
567
- log.write(" -Significant het:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:]), verbose=verbose)
568
- log.write(" -All sig:" ,len(sig_list_merged), verbose=verbose)
569
- log.write(" -Het rate:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:])/len(sig_list_merged), verbose=verbose)
939
+ return sig_list_merged
940
+
570
941
 
571
- # extract group
572
- if include_all==True:
573
- sum0 = sig_list_merged.loc[sig_list_merged["indicator"]==0,:].dropna(axis=0)
574
- else:
575
- sum0 = pd.DataFrame()
576
942
 
577
- sum1only = sig_list_merged.loc[sig_list_merged["indicator"]==1,:].copy()
578
- sum2only = sig_list_merged.loc[sig_list_merged["indicator"]==2,:].copy()
579
- both = sig_list_merged.loc[sig_list_merged["indicator"]==3,:].copy()
943
+
944
+
945
+ def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose=False):
946
+ w1="Weight_1"
947
+ w2="Weight_2"
948
+ beta="BETA_FE"
949
+ q="Q"
950
+ pq="HetP"
951
+ rawpq="RAW_HetP"
952
+ i2="I2"
953
+ df[w1]=1/(df[se1])**2
954
+ df[w2]=1/(df[se2])**2
955
+ df[beta] =(df[w1]*df[beta1] + df[w2]*df[beta2])/(df[w1]+df[w2])
580
956
 
581
- if is_q==False:
582
- sum0["Edge_color"]="none"
583
- sum1only["Edge_color"]="none"
584
- sum2only["Edge_color"]="none"
585
- both["Edge_color"]="none"
957
+ # Cochran(1954)
958
+ df[q] = df[w1]*(df[beta1]-df[beta])**2 + df[w2]*(df[beta2]-df[beta])**2
959
+ df[pq] = ss.chi2.sf(df[q], 1)
960
+ df["Edge_color"]="white"
961
+
962
+ if is_q_mc=="fdr":
963
+ log.write(" -FDR correction applied...", verbose=verbose)
964
+ df[rawpq] = df[pq]
965
+ df[pq] = ss.false_discovery_control(df[pq])
966
+
967
+ elif is_q_mc=="bon":
968
+ log.write(" -Bonferroni correction applied...", verbose=verbose)
969
+ df[rawpq] = df[pq]
970
+ df[pq] = df[pq] * len(df[pq])
586
971
 
587
- log.write(" -Identified "+str(len(sum0)) + " variants which are not significant in " + label[3]+".", verbose=verbose)
588
- log.write(" -Identified "+str(len(sum1only)) + " variants which are only significant in " + label[0]+".", verbose=verbose)
589
- log.write(" -Identified "+str(len(sum2only)) + " variants which are only significant in " + label[1]+".", verbose=verbose)
590
- log.write(" -Identified "+str(len(both)) + " variants which are significant in " + label[2] + ".", verbose=verbose)
972
+ df.loc[df[pq]<q_level,"Edge_color"]="black"
973
+ df.drop(columns=["Weight_1","Weight_2","BETA_FE"],inplace=True)
974
+ # Huedo-Medina, T. B., Sánchez-Meca, J., Marín-Martínez, F., & Botella, J. (2006). Assessing heterogeneity in meta-analysis: Q statistic or index?. Psychological methods, 11(2), 193.
591
975
 
592
- ##plot########################################################################################
593
- log.write("Creating the scatter plot for effect sizes comparison...", verbose=verbose)
594
- #plt.style.use("ggplot")
595
- sns.set_style("ticks")
596
- fig,ax = plt.subplots(**plt_args)
597
- legend_elements=[]
598
- if mode=="beta" or mode=="BETA" or mode=="Beta":
599
- if len(sum0)>0:
600
- ax.errorbar(sum0["EFFECT_1"],sum0["EFFECT_2_aligned"], xerr=sum0["SE_1"],yerr=sum0["SE_2"],
601
- linewidth=0,zorder=1,**errargs)
602
-
603
- ax.scatter(sum0["EFFECT_1"],sum0["EFFECT_2_aligned"],label=label[3],zorder=2,color="#cccccc",edgecolors=sum0["Edge_color"],marker=".",**scatterargs)
604
- #legend_elements.append(mpatches.Circle(facecolor='#cccccc', edgecolor='white', label=label[3]))
605
- legend_elements.append(label[3])
606
- if len(sum1only)>0:
607
- ax.errorbar(sum1only["EFFECT_1"],sum1only["EFFECT_2_aligned"], xerr=sum1only["SE_1"],yerr=sum1only["SE_2"],
608
- linewidth=0,zorder=1,**errargs)
609
- ax.scatter(sum1only["EFFECT_1"],sum1only["EFFECT_2_aligned"],label=label[0],zorder=2,color="#e6320e",edgecolors=sum1only["Edge_color"],marker="^",**scatterargs)
610
- #legend_elements.append(mpatches.Patch(facecolor='#e6320e', edgecolor='white', label=label[0]))
611
- legend_elements.append(label[0])
612
- if len(sum2only)>0:
613
- ax.errorbar(sum2only["EFFECT_1"],sum2only["EFFECT_2_aligned"], xerr=sum2only["SE_1"],yerr=sum2only["SE_2"],
614
- linewidth=0,zorder=1,**errargs)
615
- ax.scatter(sum2only["EFFECT_1"],sum2only["EFFECT_2_aligned"],label=label[1],zorder=2,color="#41e620",edgecolors=sum2only["Edge_color"],marker="o",**scatterargs)
616
- #legend_elements.append(mpatches.Circle(facecolor='#41e620', edgecolor='white', label=label[1]))
617
- legend_elements.append(label[1])
618
- if len(both)>0:
619
- ax.errorbar(both["EFFECT_1"],both["EFFECT_2_aligned"], xerr=both["SE_1"],yerr=both["SE_2"],
620
- linewidth=0,zorder=1,**errargs)
621
- ax.scatter(both["EFFECT_1"],both["EFFECT_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**scatterargs)
622
- #legend_elements.append(mpatches.Patch(facecolor='#205be6', edgecolor='white', label=label[2]))
623
- legend_elements.append(label[2])
624
- else:
625
- ## if OR
626
- if len(sum0)>0:
627
- ax.errorbar(sum0["OR_1"],sum0["OR_2_aligned"], xerr=sum0[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum0[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
628
- linewidth=0,zorder=1,**errargs)
629
- ax.scatter(sum0["OR_1"],sum0["OR_2_aligned"],label=label[3],zorder=2,color="#cccccc",edgecolors=sum0["Edge_color"],marker=".",**scatterargs)
630
- legend_elements.append(label[3])
631
- if len(sum1only)>0:
632
- ax.errorbar(sum1only["OR_1"],sum1only["OR_2_aligned"], xerr=sum1only[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum1only[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
633
- linewidth=0,zorder=1,**errargs)
634
- ax.scatter(sum1only["OR_1"],sum1only["OR_2_aligned"],label=label[0],zorder=2,color="#e6320e",edgecolors=sum1only["Edge_color"],marker="^",**scatterargs)
635
- legend_elements.append(label[0])
636
- if len(sum2only)>0:
637
- ax.errorbar(sum2only["OR_1"],sum2only["OR_2_aligned"], xerr=sum2only[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum2only[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
638
- linewidth=0,zorder=1,**errargs)
639
- ax.scatter(sum2only["OR_1"],sum2only["OR_2_aligned"],label=label[1],zorder=2,color="#41e620",edgecolors=sum2only["Edge_color"],marker="o",**scatterargs)
640
- legend_elements.append(label[1])
641
- if len(both)>0:
642
- ax.errorbar(both["OR_1"],both["OR_2_aligned"], xerr=both[["OR_L_1_err","OR_H_1_err"]].T,yerr=both[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
643
- linewidth=0,zorder=1,**errargs)
644
- ax.scatter(both["OR_1"],both["OR_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**scatterargs)
645
- legend_elements.append(label[2])
646
- ## annotation #################################################################################################################
976
+ # calculate I2
977
+ df[i2] = (df[q] - 1)/df[q]
978
+ df.loc[df[i2]<0,i2] = 0
979
+
980
+ return df
981
+
982
+ def jackknife_r(df,x="EFFECT_1",y="EFFECT_2_aligned"):
983
+ """Jackknife estimation of se for rsq
984
+
985
+ """
986
+
987
+ # dropna
988
+ df_nona = df.loc[:,[x,y]].dropna()
989
+
990
+ # non-empty entries
991
+ n=len(df)
992
+
993
+ # assign row number
994
+ df_nona["nrow"] = range(n)
995
+
996
+ # a list to store r2
997
+ r_list=[]
998
+
999
+ # estimate r
1000
+ for i in range(n):
1001
+ # exclude 1 record
1002
+ records_to_use = df_nona["nrow"]!=i
1003
+ # estimate r
1004
+ reg_jackknife = ss.linregress(df_nona.loc[records_to_use, x],df_nona.loc[records_to_use,y])
1005
+ # add r_i to list
1006
+ r_list.append(reg_jackknife[2])
1007
+
1008
+ # convert list to array
1009
+ rs = np.array(r_list)
1010
+ # https://en.wikipedia.org/wiki/Jackknife_resampling
1011
+ r_se = np.sqrt( (n-1)/n * np.sum((rs - np.mean(rs))**2) )
1012
+ return r_se
1013
+
1014
+ def drop_duplicate_and_na(df,snpid="SNPID",sort_by=False,log=Log(),ascending=True,verbose=True):
1015
+
1016
+ length_before = len(df)
1017
+
1018
+ if sort_by!=False:
1019
+ df.sort_values(by = sort_by, ascending=ascending, inplace=True)
1020
+
1021
+ df.dropna(axis="index",subset=[snpid],inplace=True)
1022
+ df.drop_duplicates(subset=[snpid], keep='first', inplace=True)
1023
+
1024
+ length_after= len(df)
1025
+ if length_before != length_after:
1026
+ log.write(" -Dropped {} duplicates or NAs...".format(length_before - length_after), verbose=verbose)
1027
+ return df
1028
+
1029
+
1030
+
1031
+ #########################################################################################################################
1032
+ #########################################################################################################################
1033
+ #########################################################################################################################
1034
+ #########################################################################################################################
1035
+ #########################################################################################################################
1036
+ #########################################################################################################################
1037
+ #########################################################################################################################
1038
+ #########################################################################################################################
1039
+ #########################################################################################################################
1040
+
1041
+ def scatter_annotation(ax, sig_list_merged,anno, anno_het, is_q, mode,
1042
+ anno_min,anno_min1,anno_min2,anno_diff,anno_kwargs,adjust_text_kwargs_l,adjust_text_kwargs_r,
1043
+ log,verbose
1044
+ ):
647
1045
  if anno==True or anno=="GENENAME":
648
1046
  sig_list_toanno = sig_list_merged.dropna(axis=0)
649
1047
  if is_q==True and anno_het == True:
@@ -669,7 +1067,7 @@ def compare_effect(path1,
669
1067
  log.write("Annotating variants using {}".format("GENENAME"), verbose=verbose)
670
1068
 
671
1069
  for index, row in sig_list_toanno.iterrows():
672
- log.write("Annotating {}...".format(row), verbose=verbose)
1070
+ #log.write("Annotating {}...".format(row), verbose=verbose)
673
1071
  if anno==True:
674
1072
  to_anno_text = index
675
1073
  elif type(anno) is str:
@@ -680,18 +1078,18 @@ def compare_effect(path1,
680
1078
 
681
1079
  if mode=="beta" or mode=="BETA" or mode=="Beta":
682
1080
  if row["EFFECT_1"] < row["EFFECT_2_aligned"]:
683
- texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],to_anno_text,ha="right",va="bottom"))
1081
+ texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],to_anno_text,ha="right",va="bottom", **anno_kwargs))
684
1082
  else:
685
- texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],to_anno_text,ha="left",va="top"))
1083
+ texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],to_anno_text,ha="left",va="top", **anno_kwargs))
686
1084
  else:
687
1085
  if row["OR_1"] < row["OR_2_aligned"]:
688
- texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],to_anno_text, ha='right', va='bottom'))
1086
+ texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],to_anno_text, ha='right', va='bottom', **anno_kwargs))
689
1087
  else:
690
- texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],to_anno_text, ha='left', va='top'))
1088
+ texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],to_anno_text, ha='left', va='top', **anno_kwargs))
691
1089
  if len(texts_l)>0:
692
- adjust_text(texts_l,autoalign =False,precision =0.001,lim=1000, ha="right",va="bottom", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects=(0.8,0.8) ,arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
1090
+ adjust_text(texts_l,ax=ax,**adjust_text_kwargs_l)
693
1091
  if len(texts_r)>0:
694
- adjust_text(texts_r,autoalign =False,precision =0.001,lim=1000, ha="left",va="top", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects =(0.8,0.8),arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
1092
+ adjust_text(texts_r,ax=ax,**adjust_text_kwargs_r)
695
1093
  elif type(anno) is dict:
696
1094
  sig_list_toanno = sig_list_merged.dropna(axis=0)
697
1095
  # if input is a dict
@@ -715,38 +1113,24 @@ def compare_effect(path1,
715
1113
  for index, row in sig_list_toanno.iterrows():
716
1114
  if mode=="beta" or mode=="BETA" or mode=="Beta":
717
1115
  if row["EFFECT_1"] < row["EFFECT_2_aligned"]:
718
- texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="right",va="bottom"))
1116
+ texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="right",va="bottom", **anno_kwargs))
719
1117
  else:
720
- texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="left",va="top"))
1118
+ texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="left",va="top", **anno_kwargs))
721
1119
  else:
722
1120
  if row["OR_1"] < row["OR_2_aligned"]:
723
- texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='right', va='bottom'))
1121
+ texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='right', va='bottom', **anno_kwargs))
724
1122
  else:
725
- texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='left', va='top'))
1123
+ texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='left', va='top', **anno_kwargs))
726
1124
  if len(texts_l)>0:
727
- adjust_text(texts_l,autoalign =False,precision =0.001,lim=1000, ha="right",va="bottom", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects=(0.8,0.8) ,arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
1125
+ adjust_text(texts_l,ax=ax,**adjust_text_kwargs_l)
728
1126
  if len(texts_r)>0:
729
- adjust_text(texts_r,autoalign =False,precision =0.001,lim=1000, ha="left",va="top", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects =(0.8,0.8),arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
730
- #################################################################################################################################
731
-
732
- # plot x=0,y=0, and a 45 degree line
733
- xl,xh=ax.get_xlim()
734
- yl,yh=ax.get_ylim()
735
-
736
- if mode=="beta" or mode=="BETA" or mode=="Beta":
737
- #if using beta
738
- ax.axhline(y=0, zorder=1,**helper_line_args)
739
- ax.axvline(x=0, zorder=1,**helper_line_args)
740
- else:
741
- #if using OR
742
- ax.axhline(y=1, zorder=1,**helper_line_args)
743
- ax.axvline(x=1, zorder=1,**helper_line_args)
744
-
745
- for spine in ['top', 'right']:
746
- ax.spines[spine].set_visible(False)
747
-
1127
+ adjust_text(texts_r,ax=ax,**adjust_text_kwargs_r)
1128
+ return ax
748
1129
 
749
- ###regression line##############################################################################################################################
1130
+
1131
+ def confire_regression_line(is_reg, reg_box, sig_list_merged, ax, mode,xl,yl,xh,yh, null_beta, r_se,
1132
+ is_45_helper_line,helper_line_args, font_kwargs,
1133
+ log, verbose):
750
1134
  if len(sig_list_merged)<3: is_reg=False
751
1135
  if is_reg is True:
752
1136
  if mode=="beta" or mode=="BETA" or mode=="Beta":
@@ -792,7 +1176,7 @@ def compare_effect(path1,
792
1176
  pe="0"
793
1177
  p_text="$p = " + p12 + " \\times 10^{"+pe+"}$"
794
1178
  p_latex= f'{p_text}'
795
- ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $+$ "+ "{:.2f}".format(reg[0])+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes, bbox=reg_box, **fontargs)
1179
+ ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $+$ "+ "{:.2f}".format(reg[0])+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes, bbox=reg_box, **font_kwargs)
796
1180
  else:
797
1181
  #if regression coeeficient <0 : auxiliary line slope = -1
798
1182
  if is_45_helper_line is True:
@@ -809,7 +1193,7 @@ def compare_effect(path1,
809
1193
  pe="0"
810
1194
  p_text="$p = " + p12 + " \\times 10^{"+pe+"}$"
811
1195
  p_latex= f'{p_text}'
812
- ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $-$ "+ "{:.2f}".format(abs(reg[0]))+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes,bbox=reg_box,**fontargs)
1196
+ ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $-$ "+ "{:.2f}".format(abs(reg[0]))+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes,bbox=reg_box,**font_kwargs)
813
1197
 
814
1198
  if mode=="beta" or mode=="BETA" or mode=="Beta":
815
1199
  middle = sig_list_merged["EFFECT_1"].mean()
@@ -820,11 +1204,12 @@ def compare_effect(path1,
820
1204
  ax.axline(xy1=(0,reg[1]),slope=reg[0],color="#cccccc",linestyle='--',zorder=1)
821
1205
  else:
822
1206
  ax.axline(xy1=(1,reg[0]+reg[1]),slope=reg[0],color="#cccccc",linestyle='--',zorder=1)
823
-
824
-
825
- ax.set_xlabel(xylabel_prefix+label[0],**fontargs)
826
- ax.set_ylabel(xylabel_prefix+label[1],**fontargs)
827
-
1207
+ return ax
1208
+
1209
+
1210
+ def configure_legend(fig, ax, legend_mode, is_q, is_q_mc, legend_elements, legend_pos, q_level,
1211
+ font_kwargs,scatterargs,legend_args,
1212
+ legend_title, legend_title2 ):
828
1213
  legend_args_to_use ={
829
1214
  "framealpha":1,
830
1215
  "handlelength":0.7,
@@ -892,16 +1277,10 @@ def compare_effect(path1,
892
1277
  label.set_ha('left')
893
1278
  label.set_position((-8*width,0))
894
1279
 
895
- ax.tick_params(axis='both', labelsize=fontargs["fontsize"])
896
- plt.setp(L.texts,**fontargs)
897
- plt.setp(L.get_title(),**fontargs)
898
- ##plot finished########################################################################################
899
- gc.collect()
900
-
901
- save_figure(fig, save, keyword="esc",save_args=save_args, log=log, verbose=verbose)
902
-
903
-
904
- return [sig_list_merged, fig,log]
1280
+ ax.tick_params(axis='both', labelsize=font_kwargs["fontsize"])
1281
+ plt.setp(L.texts,**font_kwargs)
1282
+ plt.setp(L.get_title(),**font_kwargs)
1283
+ return ax
905
1284
 
906
1285
  def reorderLegend(ax=None, order=None, add=None):
907
1286
  handles, labels = ax.get_legend_handles_labels()
@@ -910,78 +1289,18 @@ def reorderLegend(ax=None, order=None, add=None):
910
1289
  new_handles = [info[l] for l in order]
911
1290
  return new_handles, order
912
1291
 
913
- def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose=False):
914
- w1="Weight_1"
915
- w2="Weight_2"
916
- beta="BETA_FE"
917
- q="Q"
918
- pq="HetP"
919
- i2="I2"
920
- df[w1]=1/(df[se1])**2
921
- df[w2]=1/(df[se2])**2
922
- df[beta] =(df[w1]*df[beta1] + df[w2]*df[beta2])/(df[w1]+df[w2])
923
-
924
- # Cochran(1954)
925
- df[q] = df[w1]*(df[beta1]-df[beta])**2 + df[w2]*(df[beta2]-df[beta])**2
926
- df[pq] = ss.chi2.sf(df[q], 1)
927
- df["Edge_color"]="white"
928
-
929
- if is_q_mc=="fdr":
930
- log.write(" -FDR correction applied...", verbose=verbose)
931
- df[pq] = ss.false_discovery_control(df[pq])
932
- elif is_q_mc=="bon":
933
- log.write(" -Bonferroni correction applied...", verbose=verbose)
934
- df[pq] = df[pq] * len(df[pq])
935
-
936
- df.loc[df[pq]<q_level,"Edge_color"]="black"
937
- df.drop(columns=["Weight_1","Weight_2","BETA_FE"],inplace=True)
938
- # Huedo-Medina, T. B., Sánchez-Meca, J., Marín-Martínez, F., & Botella, J. (2006). Assessing heterogeneity in meta-analysis: Q statistic or I² index?. Psychological methods, 11(2), 193.
939
-
940
- # calculate I2
941
- df[i2] = (df[q] - 1)/df[q]
942
- df.loc[df[i2]<0,i2] = 0
943
-
944
- return df
945
-
946
- def jackknife_r(df,x="EFFECT_1",y="EFFECT_2_aligned"):
947
- """Jackknife estimation of se for rsq
948
-
949
- """
950
-
951
- # dropna
952
- df_nona = df.loc[:,[x,y]].dropna()
953
-
954
- # non-empty entries
955
- n=len(df)
956
-
957
- # assign row number
958
- df_nona["nrow"] = range(n)
1292
+ def reorder_columns(sig_list_merged):
1293
+ order=[ 'CHR', 'POS', 'GENENAME',
1294
+ 'EA_1', 'NEA_1', 'EFFECT_1', 'SE_1', 'P_1', 'MLOG10P_1',
1295
+ 'EA_2_aligned','NEA_2_aligned', 'EFFECT_2_aligned', 'SE_2','P_2','MLOG10P_2', 'EA_2', 'NEA_2', 'EFFECT_2',
1296
+ 'indicator' ]
959
1297
 
960
- # a list to store r2
961
- r_list=[]
1298
+ new_order=[]
1299
+ for i in order:
1300
+ if i in sig_list_merged.columns:
1301
+ new_order.append(i)
1302
+ for i in sig_list_merged.columns:
1303
+ if i not in new_order:
1304
+ new_order.append(i)
962
1305
 
963
- # estimate r
964
- for i in range(n):
965
- # exclude 1 record
966
- records_to_use = df_nona["nrow"]!=i
967
- # estimate r
968
- reg_jackknife = ss.linregress(df_nona.loc[records_to_use, x],df_nona.loc[records_to_use,y])
969
- # add r_i to list
970
- r_list.append(reg_jackknife[2])
971
-
972
- # convert list to array
973
- rs = np.array(r_list)
974
- # https://en.wikipedia.org/wiki/Jackknife_resampling
975
- r_se = np.sqrt( (n-1)/n * np.sum((rs - np.mean(rs))**2) )
976
- return r_se
977
-
978
- def drop_duplicate_and_na(df,snpid="SNPID",sort_by=False,log=Log(),verbose=True):
979
- length_before = len(df)
980
- if sort_by!=False:
981
- df.sort_values(by = sort_by, inplace=True)
982
- df.dropna(axis="index",subset=[snpid],inplace=True)
983
- df.drop_duplicates(subset=[snpid], keep='first', inplace=True)
984
- length_after= len(df)
985
- if length_before != length_after:
986
- log.write(" -Dropped {} duplicates or NAs...".format(length_before - length_after), verbose=verbose)
987
- return df
1306
+ return sig_list_merged[new_order]