gwaslab 3.4.49__py3-none-any.whl → 3.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

@@ -4,16 +4,19 @@ import matplotlib.pyplot as plt
4
4
  import scipy.stats as ss
5
5
  import seaborn as sns
6
6
  import gc
7
+ import math
7
8
  import scipy.stats as ss
8
9
  from matplotlib.patches import Rectangle
9
10
  from adjustText import adjust_text
10
11
  from gwaslab.viz_aux_save_figure import save_figure
11
12
  from gwaslab.util_in_get_sig import getsig
13
+ from gwaslab.util_in_get_sig import annogene
12
14
  from gwaslab.g_Log import Log
13
15
  from gwaslab.util_in_correct_winnerscurse import wc_correct
14
16
  from gwaslab.util_in_correct_winnerscurse import wc_correct_test
15
17
  from gwaslab.g_Sumstats import Sumstats
16
-
18
+ from gwaslab.io_process_args import _merge_and_sync_dic
19
+ from gwaslab.io_process_args import _extract_kwargs
17
20
  #20220422
18
21
  def compare_effect(path1,
19
22
  path2,
@@ -30,6 +33,7 @@ def compare_effect(path1,
30
33
  anno_min1=0,
31
34
  anno_min2=0,
32
35
  anno_diff=0,
36
+ anno_args=None,
33
37
  scaled=False,
34
38
  scaled1=False,
35
39
  scaled2=False,
@@ -58,29 +62,41 @@ def compare_effect(path1,
58
62
  plt_args=None,
59
63
  xylabel_prefix="Per-allele effect size in ",
60
64
  helper_line_args=None,
65
+ adjust_text_kwargs = None,
66
+ adjust_text_kwargs_l = None,
67
+ adjust_text_kwargs_r = None,
68
+ font_args=None,
61
69
  fontargs=None,
70
+ build="19",
62
71
  r_or_r2="r",
63
- #
64
72
  errargs=None,
65
73
  legend_args=None,
66
74
  sep=["\t","\t"],
67
75
  log = Log(),
68
76
  save=False,
69
77
  save_args=None,
70
- verbose=False):
71
-
78
+ verbose=False,
79
+ **kwargs):
80
+
72
81
  #[snpid,p,ea,nea] ,[effect,se]
73
82
  #[snpid,p,ea,nea,chr,pos],[effect,se]
74
83
  #[snpid,p,ea,nea,chr,pos],[OR,OR_l,OR_h]
75
84
  if scaled == True:
76
85
  scaled1 = True
77
86
  scaled2 = True
87
+
88
+ if legend_title== r'$ P < 5 x 10^{-8}$ in:' and sig_level!=5e-8:
89
+
90
+ exponent = math.floor(math.log10(sig_level))
91
+ mantissa = sig_level / 10**exponent
92
+
93
+ legend_title = '$ P < {} x 10^{{{}}}$ in:'.format(mantissa, exponent)
94
+
78
95
  if is_q_mc=="fdr" or is_q_mc=="bon":
79
96
  is_q = True
80
-
81
97
  if is_q == True:
82
98
  if is_q_mc not in [False,"fdr","bon","non"]:
83
- raise ValueError("Please select either fdr or bon or non for is_q_mc.")
99
+ raise ValueError('Please select either "fdr" or "bon" or "non"/False for is_q_mc.')
84
100
  if save_args is None:
85
101
  save_args = {"dpi":300,"facecolor":"white"}
86
102
  if reg_box is None:
@@ -89,6 +105,10 @@ def compare_effect(path1,
89
105
  sep = ["\t","\t"]
90
106
  if get_lead_args is None:
91
107
  get_lead_args = {}
108
+ if anno=="GENENAME":
109
+ get_lead_args["anno"]=True
110
+ if anno_args is None:
111
+ anno_args = {}
92
112
  if errargs is None:
93
113
  errargs={"ecolor":"#cccccc","elinewidth":1}
94
114
  if fontargs is None:
@@ -103,101 +123,90 @@ def compare_effect(path1,
103
123
  label = ["Sumstats_1","Sumstats_2","Both","None"]
104
124
  if anno_het ==True:
105
125
  is_q=True
106
-
107
- log.write("Start to process the raw sumstats for plotting...")
108
-
109
- ######### 1 check the value used to plot
110
- if mode not in ["Beta","beta","BETA","OR","or"]:
111
- raise ValueError("Please input Beta or OR")
112
126
 
113
- if type(path1) is Sumstats:
114
- log.write("Path1 is gwaslab Sumstats object...")
115
- if cols_name_list_1 is None:
116
- cols_name_list_1 = ["SNPID","P","EA","NEA","CHR","POS"]
117
- if effect_cols_list_1 is None:
118
- if mode=="beta":
119
- effect_cols_list_1 = ["BETA","SE"]
120
- else:
121
- effect_cols_list_1 = ["OR","OR_95L","OR_95U"]
122
- elif type(path1) is pd.DataFrame:
123
- log.write("Path1 is pandas DataFrame object...")
127
+ adjust_text_kwargs_r_default = {"autoalign":False,"precision":0.001,"lim":1000,"ha":"left","va":"top","expand_text":(1,1.8),"expand_objects":(0.1,0.1),"expand_points":(1.8,1.8),"force_objects":(0.8,0.8),"arrowprops":dict(arrowstyle='-|>', color='grey')}
128
+ adjust_text_kwargs_l_default = {"autoalign":False,"precision":0.001,"lim":1000,"ha":"right","va":"bottom","expand_text":(1,1.8),"expand_objects":(0.1,0.1),"expand_points":(1.8,1.8),"force_objects":(0.8,0.8),"arrowprops":dict(arrowstyle='-|>', color='grey')}
124
129
 
125
- if type(path2) is Sumstats:
126
- log.write("Path2 is gwaslab Sumstats object...")
127
- if cols_name_list_2 is None:
128
- cols_name_list_2 = ["SNPID","P","EA","NEA","CHR","POS"]
129
- if effect_cols_list_2 is None:
130
- if mode=="beta":
131
- effect_cols_list_2 = ["BETA","SE"]
132
- else:
133
- effect_cols_list_2 = ["OR","OR_95L","OR_95U"]
134
- elif type(path2) is pd.DataFrame:
135
- log.write("Path2 is pandas DataFrame object...")
136
-
137
- ######### 2 extract snplist2
138
- log.write(" -Loading "+label[1]+" SNP list in memory...")
139
-
140
- if type(path2) is Sumstats:
141
- sumstats = path2.data[[cols_name_list_2[0]]].copy()
142
- elif type(path2) is pd.DataFrame:
143
- sumstats = path2[[cols_name_list_2[0]]].copy()
130
+ if adjust_text_kwargs_l is None:
131
+ adjust_text_kwargs_l = adjust_text_kwargs_l_default
144
132
  else:
145
- sumstats=pd.read_table(path2,sep=sep[1],usecols=[cols_name_list_2[0]])
146
-
147
- common_snp_set=set(sumstats[cols_name_list_2[0]].values)
148
-
149
- ######### 3 extract snplist1
150
- if snplist is not None:
151
- cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1]]
133
+ for key, value in adjust_text_kwargs_l_default.items():
134
+ if key not in adjust_text_kwargs_l:
135
+ adjust_text_kwargs_l[key] = value
136
+
137
+ if adjust_text_kwargs_r is None:
138
+ adjust_text_kwargs_r = adjust_text_kwargs_r_default
152
139
  else:
153
- cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1],cols_name_list_1[4],cols_name_list_1[5]]
154
-
155
- ######### 4 load sumstats1
156
- log.write(" -Loading sumstats for "+label[0]+":",",".join(cols_to_extract))
140
+ for key, value in adjust_text_kwargs_r_default.items():
141
+ if key not in adjust_text_kwargs_r:
142
+ adjust_text_kwargs_r[key] = value
157
143
 
158
- if type(path1) is Sumstats:
159
- sumstats = path1.data[cols_to_extract].copy()
160
- elif type(path1) is pd.DataFrame:
161
- sumstats = path1[cols_to_extract].copy()
144
+ if adjust_text_kwargs is not None:
145
+ for key, value in adjust_text_kwargs.items():
146
+ adjust_text_kwargs_l[key] = value
147
+ adjust_text_kwargs_r[key] = value
162
148
  else:
163
- sumstats = pd.read_table(path1,sep=sep[0],usecols=cols_to_extract)
164
-
165
- gc.collect()
149
+ adjust_text_kwargs = {}
166
150
 
167
- if scaled1==True:
168
- sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
169
- ######### 5 extract the common set
170
- common_snp_set = common_snp_set.intersection(sumstats[cols_name_list_1[0]].values)
171
- log.write(" -Counting variants available for both datasets:",len(common_snp_set)," variants...")
151
+
152
+ save_kwargs = _extract_kwargs("save", save_args, locals())
153
+ anno_kwargs = _extract_kwargs("anno", anno_args, locals())
154
+ err_kwargs = _extract_kwargs("err", errargs, locals())
155
+ plt_kwargs = _extract_kwargs("plt", plt_args, locals())
156
+ scatter_kwargs = _extract_kwargs("scatter", scatterargs, locals())
157
+ font_kwargs = _extract_kwargs("font",fontargs, locals())
158
+
159
+ log.write("Start to process the raw sumstats for plotting...", verbose=verbose)
172
160
 
173
- ######### 6 rename the sumstats
174
- rename_dict = { cols_name_list_1[0]:"SNPID",
175
- cols_name_list_1[1]:"P",
176
- }
161
+ # configure headers
162
+ cols_name_list_1,cols_name_list_2, effect_cols_list_1, effect_cols_list_2 = configure_headers(mode,
163
+ path1,
164
+ path2,
165
+ cols_name_list_1,
166
+ cols_name_list_2,
167
+ effect_cols_list_1,
168
+ effect_cols_list_2,
169
+ scaled1,
170
+ scaled2,
171
+ log,
172
+ verbose)
177
173
 
178
- if snplist is None:
179
- rename_dict[cols_name_list_1[4]]="CHR"
180
- rename_dict[cols_name_list_1[5]]="POS"
174
+ # extract common variants / load sumstats 1
175
+ sumstats, common_snp_set = configure_common_snp_set(path1,path2,
176
+ snplist,
177
+ label,
178
+ cols_name_list_1,
179
+ cols_name_list_2,
180
+ sep,
181
+ scaled1,
182
+ scaled2,
183
+ log,verbose)
181
184
 
182
- sumstats.rename(columns=rename_dict,inplace=True)
185
+ # rename sumstats headers -> keywords in gwaslab
186
+ sumstats = rename_sumtats(sumstats=sumstats,
187
+ cols_name_list = cols_name_list_1,
188
+ scaled=scaled1,
189
+ snplist=snplist)
183
190
 
184
- ######### 7 exctract only available variants from sumstats1
191
+ # exctract only available variants from sumstats1
185
192
  sumstats = sumstats.loc[sumstats["SNPID"].isin(common_snp_set),:]
193
+ log.write(" -Using only variants available for both datasets...", verbose=verbose)
186
194
 
187
- log.write(" -Using only variants available for both datasets...")
188
195
  ######### 8 extact SNPs for comparison
189
-
190
- if snplist is not None:
191
- ######### 8.1 if a snplist is provided, use the snp list
192
- log.write(" -Extract variants in the given list from "+label[0]+"...")
193
- sig_list_1 = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
194
- else:
195
- ######### 8,2 otherwise use the sutomatically detected lead SNPs
196
- log.write(" -Extract lead variants from "+label[0]+"...")
197
- sig_list_1 = getsig(sumstats,"SNPID","CHR","POS","P", verbose=verbose,sig_level=sig_level,**get_lead_args)
198
-
199
- if drop==True:
200
- sig_list_1 = drop_duplicate_and_na(sig_list_1, sort_by="P", log=log ,verbose=verbose)
196
+ sig_list_1 = extract_snp_for_comparison(sumstats,
197
+ snplist,
198
+ label=label[0],
199
+ get_lead_args=get_lead_args,
200
+ build=build,
201
+ drop=drop,
202
+ anno=anno,
203
+ sig_level=sig_level,
204
+ scaled = scaled1,
205
+ log = log,
206
+ verbose = verbose)
207
+
208
+
209
+ ######### load sumstats1
201
210
 
202
211
  ######### 9 extract snplist2
203
212
  if snplist is not None:
@@ -205,158 +214,97 @@ def compare_effect(path1,
205
214
  else:
206
215
  cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1],cols_name_list_2[4],cols_name_list_2[5]]
207
216
 
208
- log.write(" -Loading sumstats for "+label[1]+":",",".join(cols_to_extract))
209
-
210
- if type(path2) is Sumstats:
211
- sumstats = path2.data[cols_to_extract].copy()
212
- elif type(path2) is pd.DataFrame:
213
- sumstats = path2[cols_to_extract].copy()
214
- else:
215
- sumstats = pd.read_table(path2,sep=sep[1],usecols=cols_to_extract)
216
-
217
+ sumstats = load_sumstats(path=path2,
218
+ usecols=cols_to_extract,
219
+ label=label[1],
220
+ log=log,
221
+ verbose= verbose,
222
+ sep=sep[1])
217
223
  gc.collect()
218
224
 
219
- if scaled2==True:
220
- sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
221
- ######### 10 rename sumstats2
222
- rename_dict = { cols_name_list_2[0]:"SNPID",
223
- cols_name_list_2[1]:"P",
224
- }
225
- if snplist is None:
226
- rename_dict[cols_name_list_2[4]]="CHR"
227
- rename_dict[cols_name_list_2[5]]="POS"
228
- sumstats.rename(columns=rename_dict,inplace=True)
229
-
225
+ #if scaled2==True:
226
+ # sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
227
+
228
+ sumstats = rename_sumtats(sumstats=sumstats,
229
+ cols_name_list = cols_name_list_2,
230
+ scaled=scaled2,
231
+ snplist=snplist)
230
232
  ######### 11 exctract only overlapping variants from sumstats2
231
233
  sumstats = sumstats.loc[sumstats["SNPID"].isin(common_snp_set),:]
232
-
233
- ######## 12 extact SNPs for comparison
234
- if snplist is not None:
235
- ######### 12.1 if a snplist is provided, use the snp list
236
- log.write(" -Extract snps in the given list from "+label[1]+"...")
237
- sig_list_2 = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
238
- else:
239
- log.write(" -Extract lead snps from "+label[1]+"...")
240
- ######### 12.2 otherwise use the sutomatically detected lead SNPs
241
- sig_list_2 = getsig(sumstats,"SNPID","CHR","POS","P",
242
- verbose=verbose,sig_level=sig_level,**get_lead_args)
243
- if drop==True:
244
- sig_list_2 = drop_duplicate_and_na(sig_list_2, sort_by="P", log=log ,verbose=verbose)
234
+ sig_list_2 = extract_snp_for_comparison(sumstats,
235
+ snplist,
236
+ label=label[1],
237
+ get_lead_args=get_lead_args,
238
+ build=build,
239
+ drop=drop,
240
+ anno=anno,
241
+ sig_level=sig_level,
242
+ scaled = scaled2,
243
+ log = log,
244
+ verbose = verbose)
245
245
 
246
246
  ######### 13 Merge two list using SNPID
247
- ##############################################################################
248
- log.write("Merging snps from "+label[0]+" and "+label[1]+"...")
249
-
250
- sig_list_merged = pd.merge(sig_list_1,sig_list_2,left_on="SNPID",right_on="SNPID",how="outer",suffixes=('_1', '_2'))
251
- # SNPID P_1 P_2
252
- #0 rs117986209 0.142569 0.394455
253
- #1 rs6704312 0.652104 0.143750
247
+ sig_list_merged = merge_list(sig_list_1,
248
+ sig_list_2,
249
+ anno = anno,
250
+ labels=label,
251
+ log=log,
252
+ verbose=verbose)
254
253
 
255
254
  ###############################################################################
256
-
257
- ########## 14 Merging sumstats1
258
-
259
- if mode=="beta" or mode=="BETA" or mode=="Beta":
260
- #[snpid,p,ea,nea] ,[effect,se]
261
- #[snpid,p,ea,nea,chr,pos],[effect,se]
262
- #[snpid,p,ea,nea,chr,pos],[OR,OR_l,OR_h]
263
- cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1], cols_name_list_1[2],cols_name_list_1[3], effect_cols_list_1[0], effect_cols_list_1[1]]
264
- else:
265
- cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1], cols_name_list_1[2],cols_name_list_1[3], effect_cols_list_1[0], effect_cols_list_1[1], effect_cols_list_1[2]]
266
-
267
- if len(eaf)>0: cols_to_extract.append(eaf[0])
268
- log.write(" -Extract statistics of selected variants from "+label[0]+" : ",",".join(cols_to_extract) )
269
-
270
- if type(path1) is Sumstats:
271
- sumstats = path1.data[cols_to_extract].copy()
272
- elif type(path1) is pd.DataFrame:
273
- sumstats = path1[cols_to_extract].copy()
274
- else:
275
- sumstats = pd.read_table(path1,sep=sep[0],usecols=cols_to_extract)
276
-
277
- if scaled1==True:
278
- sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
279
-
280
- if mode=="beta" or mode=="BETA" or mode=="Beta":
281
- rename_dict = { cols_name_list_1[0]:"SNPID",
282
- cols_name_list_1[1]:"P_1",
283
- cols_name_list_1[2]:"EA_1",
284
- cols_name_list_1[3]:"NEA_1",
285
- effect_cols_list_1[0]:"EFFECT_1",
286
- effect_cols_list_1[1]:"SE_1",
287
- }
288
-
289
- else:
290
- # if or
291
- rename_dict = { cols_name_list_1[0]:"SNPID",
292
- cols_name_list_1[1]:"P_1",
293
- cols_name_list_1[2]:"EA_1",
294
- cols_name_list_1[3]:"NEA_1",
295
- effect_cols_list_1[0]:"OR_1",
296
- effect_cols_list_1[1]:"OR_L_1",
297
- effect_cols_list_1[2]:"OR_H_1"
298
- }
299
- ## check if eaf column is provided.
300
- if len(eaf)>0: rename_dict[eaf[0]]="EAF_1"
301
- sumstats.rename(columns=rename_dict, inplace=True)
255
+ cols_to_extract = configure_cols_to_extract(mode=mode,
256
+ cols_name_list = cols_name_list_1,
257
+ effect_cols_list= effect_cols_list_1,
258
+ eaf = eaf)
259
+ sumstats = load_sumstats(path=path1,
260
+ usecols=cols_to_extract,
261
+ label=label[0],
262
+ log=log,
263
+ verbose= verbose,
264
+ sep=sep[0])
302
265
 
303
- # drop na and duplicate
304
- if drop==True:
305
- sumstats = drop_duplicate_and_na(sumstats, sort_by="P_1", log=log , verbose=verbose)
306
- sumstats.drop("P_1",axis=1,inplace=True)
266
+ #if scaled1==True:
267
+ # sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
268
+ sumstats = rename_sumstats_full(mode, sumstats,
269
+ index=1,
270
+ cols_name_list = cols_name_list_1,
271
+ effect_cols_list = effect_cols_list_1,
272
+ eaf = eaf,
273
+ drop = drop,
274
+ scaled=scaled1,
275
+ log=log, verbose=verbose)
307
276
 
308
277
  log.write(" -Merging "+label[0]+" effect information...", verbose=verbose)
309
-
310
278
  sig_list_merged = pd.merge(sig_list_merged,sumstats,
311
279
  left_on="SNPID",right_on="SNPID",
312
280
  how="left")
313
281
 
314
282
  ############ 15 merging sumstats2
283
+ cols_to_extract = configure_cols_to_extract(mode=mode,
284
+ cols_name_list = cols_name_list_2,
285
+ effect_cols_list= effect_cols_list_2,
286
+ eaf = eaf)
315
287
 
316
- if mode=="beta" or mode=="BETA" or mode=="Beta":
317
- cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1],cols_name_list_2[2],cols_name_list_2[3], effect_cols_list_2[0], effect_cols_list_2[1]]
318
- else:
319
- # if or
320
- cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1],cols_name_list_2[2],cols_name_list_2[3], effect_cols_list_2[0], effect_cols_list_2[1], effect_cols_list_2[2]]
321
- ## check if eaf column is provided.
322
- if len(eaf)>0: cols_to_extract.append(eaf[1])
323
-
324
- log.write(" -Extract statistics of selected variants from "+label[1]+" : ",",".join(cols_to_extract), verbose=verbose )
325
- if type(path2) is Sumstats:
326
- sumstats = path2.data[cols_to_extract].copy()
327
- elif type(path2) is pd.DataFrame:
328
- sumstats = path2[cols_to_extract].copy()
329
- else:
330
- sumstats = pd.read_table(path2,sep=sep[1],usecols=cols_to_extract)
288
+ sumstats = load_sumstats(path=path2,
289
+ usecols=cols_to_extract,
290
+ label=label[1],
291
+ log=log,
292
+ verbose= verbose,
293
+ sep=sep[1])
331
294
 
332
- if scaled2==True:
333
- sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
295
+ #if scaled2==True:
296
+ # sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
334
297
 
335
298
  gc.collect()
336
299
 
337
- if mode=="beta" or mode=="BETA" or mode=="Beta":
338
- rename_dict = { cols_name_list_2[0]:"SNPID",
339
- cols_name_list_2[1]:"P_2",
340
- cols_name_list_2[2]:"EA_2",
341
- cols_name_list_2[3]:"NEA_2",
342
- effect_cols_list_2[0]:"EFFECT_2",
343
- effect_cols_list_2[1]:"SE_2",
344
- }
345
- else:
346
- rename_dict = { cols_name_list_2[0]:"SNPID",
347
- cols_name_list_2[1]:"P_2",
348
- cols_name_list_2[2]:"EA_2",
349
- cols_name_list_2[3]:"NEA_2",
350
- effect_cols_list_2[0]:"OR_2",
351
- effect_cols_list_2[1]:"OR_L_2",
352
- effect_cols_list_2[2]:"OR_H_2"
353
- }
354
- if len(eaf)>0: rename_dict[eaf[1]]="EAF_2"
355
- sumstats.rename(columns=rename_dict, inplace=True)
356
- # drop na and duplicate
357
- if drop==True:
358
- sumstats = drop_duplicate_and_na(sumstats, sort_by="P_2", log=log, verbose=verbose)
359
- sumstats.drop("P_2",axis=1,inplace=True)
300
+ sumstats = rename_sumstats_full(mode, sumstats,
301
+ index=2,
302
+ cols_name_list = cols_name_list_2,
303
+ effect_cols_list = effect_cols_list_2,
304
+ eaf = eaf,
305
+ drop = drop,
306
+ scaled=scaled2,
307
+ log=log, verbose=verbose)
360
308
 
361
309
  log.write(" -Merging "+label[1]+" effect information...", verbose=verbose)
362
310
  sig_list_merged = pd.merge(sig_list_merged,sumstats,
@@ -366,130 +314,49 @@ def compare_effect(path1,
366
314
  sig_list_merged.set_index("SNPID",inplace=True)
367
315
 
368
316
  ################ 16 update sumstats1
369
- log.write(" -Updating missing information for "+label[0]+" ...", verbose=verbose)
370
- if type(path1) is Sumstats:
371
- sumstats = path1.data[[cols_name_list_1[0],cols_name_list_1[1]]].copy()
372
- elif type(path1) is pd.DataFrame:
373
- sumstats = path1[[cols_name_list_1[0],cols_name_list_1[1]]].copy()
374
- else:
375
- sumstats = pd.read_table(path1,sep=sep[0],usecols=[cols_name_list_1[0],cols_name_list_1[1]])
376
- if scaled1==True:
377
- sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
378
- sumstats.rename(columns={
379
- cols_name_list_1[0]:"SNPID",
380
- cols_name_list_1[1]:"P_1"
381
- },
382
- inplace=True)
383
- # drop na and duplicate
384
- if drop==True:
385
- sumstats = drop_duplicate_and_na(sumstats, sort_by="P_1", log=log, verbose=verbose)
386
-
387
- sumstats.set_index("SNPID",inplace=True)
388
- sig_list_merged.update(sumstats)
317
+
318
+ sig_list_merged = update_stats(sig_list_merged = sig_list_merged,
319
+ path = path1,
320
+ cols_name_list = cols_name_list_1,
321
+ index=1,
322
+ sep=sep[0],
323
+ snplist = snplist,
324
+ label=label[0],
325
+ drop = drop,
326
+ scaled=scaled1,
327
+ log=log,
328
+ verbose = verbose)
389
329
 
390
330
  ################# 17 update sumstats2
391
- log.write(" -Updating missing information for "+label[1]+" ...", verbose=verbose)
392
- if type(path2) is Sumstats:
393
- sumstats = path2.data[[cols_name_list_2[0],cols_name_list_2[1]]].copy()
394
- elif type(path2) is pd.DataFrame:
395
- sumstats = path2[[cols_name_list_2[0],cols_name_list_2[1]]].copy()
396
- else:
397
- sumstats = pd.read_table(path2,sep=sep[1],usecols=[cols_name_list_2[0],cols_name_list_2[1]])
331
+ sig_list_merged = update_stats(sig_list_merged = sig_list_merged,
332
+ path = path2,
333
+ cols_name_list = cols_name_list_2,
334
+ index=2,
335
+ sep=sep[1],
336
+ snplist = snplist,
337
+ label=label[1],
338
+ drop = drop,
339
+ scaled=scaled2,
340
+ log=log,
341
+ verbose = verbose)
398
342
 
399
- if scaled2==True:
400
- sumstats[cols_name_list_2[1]] = np.power(10,-sumstats[cols_name_list_2[1]])
401
- sumstats.rename(columns={
402
- cols_name_list_2[0]:"SNPID",
403
- cols_name_list_2[1]:"P_2"
404
- },
405
- inplace=True)
406
- # drop na and duplicate
407
- if drop==True:
408
- sumstats = drop_duplicate_and_na(sumstats, sort_by="P_2", log=log, verbose=verbose)
343
+ #if scaled1 ==True :
344
+ # log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
345
+ # sig_list_merged["P_1"] = np.power(10,-sig_list_merged["P_1"])
346
+ #if scaled2 ==True :
347
+ # log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
348
+ # sig_list_merged["P_2"] = np.power(10,-sig_list_merged["P_2"])
409
349
 
410
- sumstats.set_index("SNPID",inplace=True)
411
- sig_list_merged.update(sumstats)
412
-
413
- if scaled1 ==True :
414
- log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
415
- sig_list_merged["P_1"] = np.power(10,-sig_list_merged["P_1"])
416
- if scaled2 ==True :
417
- log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
418
- sig_list_merged["P_2"] = np.power(10,-sig_list_merged["P_2"])
419
- ####
420
350
  #################################################################################
421
- ############## 18 init indicator
422
- log.write(" -Assigning indicator ...", verbose=verbose)
423
- # 0-> 0
424
- # 1 -> sig in sumstats1
425
- # 2 -> sig in sumsatts2
426
- # 3-> sig in both sumstats1 + sumstats2
427
- sig_list_merged["indicator"] = 0
428
- sig_list_merged.loc[sig_list_merged["P_1"]<sig_level,"indicator"]=1+sig_list_merged.loc[sig_list_merged["P_1"]<sig_level,"indicator"]
429
- sig_list_merged.loc[sig_list_merged["P_2"]<sig_level,"indicator"]=2+sig_list_merged.loc[sig_list_merged["P_2"]<sig_level,"indicator"]
430
-
431
- if snplist is None:
432
- sig_list_merged["CHR"]=np.max(sig_list_merged[["CHR_1","CHR_2"]], axis=1).astype(int)
433
- sig_list_merged["POS"]=np.max(sig_list_merged[["POS_1","POS_2"]], axis=1).astype(int)
434
- sig_list_merged.drop(labels=['CHR_1', 'CHR_2','POS_1', 'POS_2'], axis=1,inplace=True)
351
+ sig_list_merged = assign_indicator(sig_list_merged, snplist, sig_level, scaled1, scaled2, log, verbose)
435
352
 
436
- log.write(" -Aligning "+label[1]+" EA with "+label[0]+" EA ...", verbose=verbose)
437
- ############### 19 align allele effect with sumstats 1
438
- sig_list_merged["EA_1"]=sig_list_merged["EA_1"].astype("string")
439
- sig_list_merged["EA_2"]=sig_list_merged["EA_2"].astype("string")
440
- sig_list_merged["NEA_1"]=sig_list_merged["NEA_1"].astype("string")
441
- sig_list_merged["NEA_2"]=sig_list_merged["NEA_2"].astype("string")
442
- if mode=="beta" or mode=="BETA" or mode=="Beta":
443
- # copy raw
444
- sig_list_merged["EA_2_aligned"]=sig_list_merged["EA_2"]
445
- sig_list_merged["NEA_2_aligned"]=sig_list_merged["NEA_2"]
446
- sig_list_merged["EFFECT_2_aligned"]=sig_list_merged["EFFECT_2"]
447
-
448
- #filp ea/nea and beta for sumstats2
449
- sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EA_2_aligned"]= sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"NEA_2"]
450
- sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"NEA_2_aligned"]= sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EA_2"]
451
- sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EFFECT_2_aligned"]= -sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EFFECT_2"]
452
- else:
453
- #flip for OR or - +
353
+ sig_list_merged = align_alleles(sig_list_merged, label, mode, eaf, log, verbose)
454
354
 
455
- sig_list_merged["EA_2_aligned"]=sig_list_merged["EA_2"]
456
- sig_list_merged["NEA_2_aligned"]=sig_list_merged["NEA_2"]
457
- sig_list_merged["OR_2_aligned"]=sig_list_merged["OR_2"]
458
- sig_list_merged["OR_L_2_aligned"]=sig_list_merged["OR_L_2"]
459
- sig_list_merged["OR_H_2_aligned"]=sig_list_merged["OR_H_2"]
355
+ sig_list_merged = check_allele_match(sig_list_merged, allele_match, label, log,verbose)
460
356
 
461
- sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EA_2_aligned"]= sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"NEA_2"]
462
- sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"NEA_2_aligned"]= sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EA_2"]
463
- sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_2_aligned"]= 1/sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_2"]
464
- sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_H_2_aligned"]= 1/sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_L_2"]
465
- sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_L_2_aligned"]= 1/sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_H_2"]
466
-
467
- sig_list_merged["BETA_1"]=np.log(sig_list_merged["OR_1"])
468
- sig_list_merged["BETA_2_aligned"]=np.log(sig_list_merged["OR_2_aligned"])
469
- sig_list_merged["SE_1"]=(np.log(sig_list_merged["OR_H_1"]) - np.log(sig_list_merged["OR_1"]))/ss.norm.ppf(0.975)
470
- sig_list_merged["SE_2"]=(np.log(sig_list_merged["OR_H_2_aligned"]) - np.log(sig_list_merged["OR_2_aligned"]))/ss.norm.ppf(0.975)
471
-
472
- sig_list_merged["OR_L_1_err"]=np.abs(sig_list_merged["OR_L_1"]-sig_list_merged["OR_1"])
473
- sig_list_merged["OR_H_1_err"]=np.abs(sig_list_merged["OR_H_1"]-sig_list_merged["OR_1"])
474
- sig_list_merged["OR_L_2_aligned_err"]=np.abs(sig_list_merged["OR_L_2_aligned"]-sig_list_merged["OR_2_aligned"])
475
- sig_list_merged["OR_H_2_aligned_err"]=np.abs(sig_list_merged["OR_H_2_aligned"]-sig_list_merged["OR_2_aligned"])
476
-
477
- if len(eaf)>0:
478
- # flip eaf
479
- sig_list_merged["EAF_2_aligned"]=sig_list_merged["EAF_2"]
480
- sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EAF_2_aligned"]= 1 -sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EAF_2"]
481
-
482
- # checking effect allele matching
483
- nonmatch = np.nansum(sig_list_merged["EA_1"] != sig_list_merged["EA_2_aligned"])
484
- log.write(" -Aligned all EAs in {} with EAs in {} ...".format(label[1],label[0]), verbose=verbose)
485
- if nonmatch>0:
486
- log.warning("Alleles for {} variants do not match...".format(nonmatch))
487
- if allele_match==True:
488
- if nonmatch>0:
489
- sig_list_merged = sig_list_merged.loc[sig_list_merged["EA_1"] == sig_list_merged["EA_2_aligned"]]
490
- else:
491
- log.write(" -No variants with EA not matching...", verbose=verbose)
492
- if fdr==True:
357
+ sig_list_merged = filter_by_maf(sig_list_merged, eaf, maf_level, log, verbose)
358
+
359
+ if fdr==True and scaled==False:
493
360
  log.write(" -Using FDR...", verbose=verbose)
494
361
  #sig_list_merged["P_1"] = fdrcorrection(sig_list_merged["P_1"])[1]
495
362
  #sig_list_merged["P_2"] = fdrcorrection(sig_list_merged["P_2"])[1]
@@ -498,64 +365,28 @@ def compare_effect(path1,
498
365
 
499
366
  ####################################################################################################################################
500
367
  ## winner's curse correction using aligned beta
501
- if mode=="beta":
502
- if wc_correction == "all":
503
- log.write(" -Correcting BETA for winner's curse with threshold at {} for all variants...".format(sig_level), verbose=verbose)
504
- sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
505
- sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
506
-
507
- log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(~sig_list_merged["EFFECT_1"].isna())), verbose=verbose)
508
- sig_list_merged["EFFECT_1"] = sig_list_merged[["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
509
-
510
- log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(~sig_list_merged["EFFECT_2_aligned"].isna())), verbose=verbose)
511
- sig_list_merged["EFFECT_2_aligned"] = sig_list_merged[["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
512
-
513
- elif wc_correction == "sig" :
514
- log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants...".format(sig_level), verbose=verbose)
515
- sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
516
- sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
517
- log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(sig_list_merged["P_1"]<sig_level)), verbose=verbose)
518
- sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, "EFFECT_1"] = sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
519
- log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(sig_list_merged["P_2"]<sig_level)), verbose=verbose)
520
- sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, "EFFECT_2_aligned"] = sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
521
-
522
- elif wc_correction == "sumstats1" :
523
- log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats1...".format(sig_level), verbose=verbose)
524
- sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
525
- log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(sig_list_merged["P_1"]<sig_level)), verbose=verbose)
526
- sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, "EFFECT_1"] = sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
527
-
528
- elif wc_correction == "sumstats2" :
529
- log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats2...".format(sig_level), verbose=verbose)
530
- sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
531
- log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(sig_list_merged["P_2"]<sig_level)), verbose=verbose)
532
- sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, "EFFECT_2_aligned"] = sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
368
+ sig_list_merged = winnerscurse_correction(sig_list_merged, mode, wc_correction, sig_level,scaled1, scaled2, log, verbose)
533
369
 
534
370
  ########################## Het test############################################################
535
371
  ## heterogeneity test
536
- if (is_q is True):
372
+ if (is_q == True):
537
373
  log.write(" -Calculating Cochran's Q statistics and peform chisq test...", verbose=verbose)
538
374
  if mode=="beta" or mode=="BETA" or mode=="Beta":
539
375
  sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
540
376
  else:
541
377
  sig_list_merged = test_q(sig_list_merged,"BETA_1","SE_1","BETA_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
542
378
 
379
+ # heterogeneity summary
380
+ log.write(" -Significant het:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:]), verbose=verbose)
381
+ log.write(" -All sig:" ,len(sig_list_merged), verbose=verbose)
382
+ log.write(" -Het rate:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:])/len(sig_list_merged), verbose=verbose)
383
+
543
384
  ######################### save ###############################################################
544
385
  ## save the merged data
545
386
  save_path = label[0]+"_"+label[1]+"_beta_sig_list_merged.tsv"
546
387
  log.write(" -Saving the merged data to:",save_path, verbose=verbose)
547
- sig_list_merged.to_csv(save_path,"\t")
548
-
549
- ########################## maf_threshold#############################################################
550
- if (len(eaf)>0) and (maf_level is not None):
551
- both_eaf_clear = (sig_list_merged["EAF_1"]>maf_level)&(sig_list_merged["EAF_1"]<1-maf_level)&(sig_list_merged["EAF_2"]>maf_level)&(sig_list_merged["EAF_2"]<1-maf_level)
552
- log.write(" -Exclude "+str(len(sig_list_merged) -sum(both_eaf_clear))+ " variants with maf <",maf_level, verbose=verbose)
553
- sig_list_merged = sig_list_merged.loc[both_eaf_clear,:]
554
- # heterogeneity summary
555
- if (is_q is True):
556
- log.write(" -Significant het:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:]), verbose=verbose)
557
- log.write(" -All sig:" ,len(sig_list_merged), verbose=verbose)
558
- log.write(" -Het rate:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:])/len(sig_list_merged), verbose=verbose)
388
+ sig_list_merged = reorder_columns(sig_list_merged)
389
+ sig_list_merged.to_csv(save_path,sep="\t")
559
390
 
560
391
  # extract group
561
392
  if include_all==True:
@@ -582,62 +413,640 @@ def compare_effect(path1,
582
413
  log.write("Creating the scatter plot for effect sizes comparison...", verbose=verbose)
583
414
  #plt.style.use("ggplot")
584
415
  sns.set_style("ticks")
585
- fig,ax = plt.subplots(**plt_args)
416
+ fig,ax = plt.subplots(**plt_kwargs)
586
417
  legend_elements=[]
587
418
  if mode=="beta" or mode=="BETA" or mode=="Beta":
588
419
  if len(sum0)>0:
589
420
  ax.errorbar(sum0["EFFECT_1"],sum0["EFFECT_2_aligned"], xerr=sum0["SE_1"],yerr=sum0["SE_2"],
590
- linewidth=0,zorder=1,**errargs)
421
+ linewidth=0,zorder=1,**err_kwargs)
591
422
 
592
- ax.scatter(sum0["EFFECT_1"],sum0["EFFECT_2_aligned"],label=label[3],zorder=2,color="#cccccc",edgecolors=sum0["Edge_color"],marker=".",**scatterargs)
423
+ ax.scatter(sum0["EFFECT_1"],sum0["EFFECT_2_aligned"],label=label[3],zorder=2,color="#cccccc",edgecolors=sum0["Edge_color"],marker=".",**scatter_kwargs)
593
424
  #legend_elements.append(mpatches.Circle(facecolor='#cccccc', edgecolor='white', label=label[3]))
594
425
  legend_elements.append(label[3])
595
426
  if len(sum1only)>0:
596
427
  ax.errorbar(sum1only["EFFECT_1"],sum1only["EFFECT_2_aligned"], xerr=sum1only["SE_1"],yerr=sum1only["SE_2"],
597
- linewidth=0,zorder=1,**errargs)
598
- ax.scatter(sum1only["EFFECT_1"],sum1only["EFFECT_2_aligned"],label=label[0],zorder=2,color="#e6320e",edgecolors=sum1only["Edge_color"],marker="^",**scatterargs)
428
+ linewidth=0,zorder=1,**err_kwargs)
429
+ ax.scatter(sum1only["EFFECT_1"],sum1only["EFFECT_2_aligned"],label=label[0],zorder=2,color="#e6320e",edgecolors=sum1only["Edge_color"],marker="^",**scatter_kwargs)
599
430
  #legend_elements.append(mpatches.Patch(facecolor='#e6320e', edgecolor='white', label=label[0]))
600
431
  legend_elements.append(label[0])
601
432
  if len(sum2only)>0:
602
433
  ax.errorbar(sum2only["EFFECT_1"],sum2only["EFFECT_2_aligned"], xerr=sum2only["SE_1"],yerr=sum2only["SE_2"],
603
- linewidth=0,zorder=1,**errargs)
604
- ax.scatter(sum2only["EFFECT_1"],sum2only["EFFECT_2_aligned"],label=label[1],zorder=2,color="#41e620",edgecolors=sum2only["Edge_color"],marker="o",**scatterargs)
434
+ linewidth=0,zorder=1,**err_kwargs)
435
+ ax.scatter(sum2only["EFFECT_1"],sum2only["EFFECT_2_aligned"],label=label[1],zorder=2,color="#41e620",edgecolors=sum2only["Edge_color"],marker="o",**scatter_kwargs)
605
436
  #legend_elements.append(mpatches.Circle(facecolor='#41e620', edgecolor='white', label=label[1]))
606
437
  legend_elements.append(label[1])
607
438
  if len(both)>0:
608
439
  ax.errorbar(both["EFFECT_1"],both["EFFECT_2_aligned"], xerr=both["SE_1"],yerr=both["SE_2"],
609
- linewidth=0,zorder=1,**errargs)
610
- ax.scatter(both["EFFECT_1"],both["EFFECT_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**scatterargs)
440
+ linewidth=0,zorder=1,**err_kwargs)
441
+ ax.scatter(both["EFFECT_1"],both["EFFECT_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**scatter_kwargs)
611
442
  #legend_elements.append(mpatches.Patch(facecolor='#205be6', edgecolor='white', label=label[2]))
612
443
  legend_elements.append(label[2])
613
444
  else:
614
445
  ## if OR
615
446
  if len(sum0)>0:
616
447
  ax.errorbar(sum0["OR_1"],sum0["OR_2_aligned"], xerr=sum0[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum0[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
617
- linewidth=0,zorder=1,**errargs)
618
- ax.scatter(sum0["OR_1"],sum0["OR_2_aligned"],label=label[3],zorder=2,color="#cccccc",edgecolors=sum0["Edge_color"],marker=".",**scatterargs)
448
+ linewidth=0,zorder=1,**err_kwargs)
449
+ ax.scatter(sum0["OR_1"],sum0["OR_2_aligned"],label=label[3],zorder=2,color="#cccccc",edgecolors=sum0["Edge_color"],marker=".",**scatter_kwargs)
619
450
  legend_elements.append(label[3])
620
451
  if len(sum1only)>0:
621
452
  ax.errorbar(sum1only["OR_1"],sum1only["OR_2_aligned"], xerr=sum1only[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum1only[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
622
- linewidth=0,zorder=1,**errargs)
623
- ax.scatter(sum1only["OR_1"],sum1only["OR_2_aligned"],label=label[0],zorder=2,color="#e6320e",edgecolors=sum1only["Edge_color"],marker="^",**scatterargs)
453
+ linewidth=0,zorder=1,**err_kwargs)
454
+ ax.scatter(sum1only["OR_1"],sum1only["OR_2_aligned"],label=label[0],zorder=2,color="#e6320e",edgecolors=sum1only["Edge_color"],marker="^",**scatter_kwargs)
624
455
  legend_elements.append(label[0])
625
456
  if len(sum2only)>0:
626
457
  ax.errorbar(sum2only["OR_1"],sum2only["OR_2_aligned"], xerr=sum2only[["OR_L_1_err","OR_H_1_err"]].T,yerr=sum2only[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
627
- linewidth=0,zorder=1,**errargs)
628
- ax.scatter(sum2only["OR_1"],sum2only["OR_2_aligned"],label=label[1],zorder=2,color="#41e620",edgecolors=sum2only["Edge_color"],marker="o",**scatterargs)
458
+ linewidth=0,zorder=1,**err_kwargs)
459
+ ax.scatter(sum2only["OR_1"],sum2only["OR_2_aligned"],label=label[1],zorder=2,color="#41e620",edgecolors=sum2only["Edge_color"],marker="o",**scatter_kwargs)
629
460
  legend_elements.append(label[1])
630
461
  if len(both)>0:
631
462
  ax.errorbar(both["OR_1"],both["OR_2_aligned"], xerr=both[["OR_L_1_err","OR_H_1_err"]].T,yerr=both[["OR_L_2_aligned_err","OR_H_2_aligned_err"]].T,
632
- linewidth=0,zorder=1,**errargs)
633
- ax.scatter(both["OR_1"],both["OR_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**scatterargs)
463
+ linewidth=0,zorder=1,**err_kwargs)
464
+ ax.scatter(both["OR_1"],both["OR_2_aligned"],label=label[2],zorder=2,color="#205be6",edgecolors=both["Edge_color"],marker="s",**scatter_kwargs)
634
465
  legend_elements.append(label[2])
635
466
  ## annotation #################################################################################################################
636
- if anno==True:
637
- sig_list_toanno = sig_list_merged.dropna(axis=0)
638
- if is_q==True and anno_het == True:
639
- sig_list_toanno = sig_list_toanno.loc[sig_list_toanno["Edge_color"]=="black",:]
467
+ ax = scatter_annotation(ax, sig_list_merged,anno, anno_het, is_q, mode,
468
+ anno_min,anno_min1,anno_min2,anno_diff,anno_kwargs,adjust_text_kwargs_l,adjust_text_kwargs_r,
469
+ log,verbose
470
+ )
471
+ #################################################################################################################################
472
+
473
+ # plot x=0,y=0, and a 45 degree line
474
+ xl,xh=ax.get_xlim()
475
+ yl,yh=ax.get_ylim()
476
+
477
+ if mode=="beta" or mode=="BETA" or mode=="Beta":
478
+ #if using beta
479
+ ax.axhline(y=0, zorder=1,**helper_line_args)
480
+ ax.axvline(x=0, zorder=1,**helper_line_args)
481
+ else:
482
+ #if using OR
483
+ ax.axhline(y=1, zorder=1,**helper_line_args)
484
+ ax.axvline(x=1, zorder=1,**helper_line_args)
485
+
486
+ for spine in ['top', 'right']:
487
+ ax.spines[spine].set_visible(False)
488
+
489
+ ###regression line##############################################################################################################################
490
+ ax = confire_regression_line(is_reg,reg_box, sig_list_merged, ax, mode,xl,yl,xh,yh, null_beta, r_se,
491
+ is_45_helper_line,helper_line_args, font_kwargs,
492
+ log, verbose)
640
493
 
494
+
495
+ ax.set_xlabel(xylabel_prefix+label[0],**font_kwargs)
496
+ ax.set_ylabel(xylabel_prefix+label[1],**font_kwargs)
497
+
498
+ ax = configure_legend(fig, ax, legend_mode, is_q, is_q_mc, legend_elements, legend_pos, q_level,
499
+ font_kwargs,scatterargs,legend_args,
500
+ legend_title, legend_title2 )
501
+ ##plot finished########################################################################################
502
+ gc.collect()
503
+
504
+ save_figure(fig, save, keyword="esc",save_args=save_kwargs, log=log, verbose=verbose)
505
+
506
+ sig_list_merged = reorder_columns(sig_list_merged)
507
+
508
+ return [sig_list_merged, fig,log]
509
+
510
+ ###############################################################################################
511
+ ###############################################################################################
512
+ ###############################################################################################
513
+ ###############################################################################################
514
+ ###############################################################################################
515
+ ###############################################################################################
516
+ ###############################################################################################
517
+ ###############################################################################################
518
+ ###############################################################################################
519
+ ###############################################################################################
520
+ ###############################################################################################
521
+ ###############################################################################################
522
+ ###############################################################################################
523
+
524
+ def load_sumstats(path, usecols, label, log, verbose, sep):
525
+ if type(usecols) is not list:
526
+ usecols = [usecols]
527
+
528
+ log.write(" -Loading sumstats for {} : {}".format(label,",".join(usecols)), verbose=verbose)
529
+ #log.write(" -Loading {} SNP list in memory...".format(label), verbose=verbose)
530
+
531
+ if type(path) is Sumstats:
532
+ sumstats = path.data.loc[:,usecols].copy()
533
+ elif type(path) is pd.DataFrame:
534
+ sumstats = path.loc[:,usecols].copy()
535
+ else:
536
+ sumstats=pd.read_table(path,sep=sep,usecols=usecols)
537
+ return sumstats
538
+
539
+ def configure_headers(mode,
540
+ path1,
541
+ path2,
542
+ cols_name_list_1,
543
+ cols_name_list_2,
544
+ effect_cols_list_1,
545
+ effect_cols_list_2,
546
+ scaled1,
547
+ scaled2,
548
+ log,
549
+ verbose):
550
+
551
+ if mode not in ["Beta","beta","BETA","OR","or"]:
552
+ raise ValueError("Please input Beta or OR")
553
+
554
+ if type(path1) is Sumstats:
555
+ log.write("Path1 is gwaslab Sumstats object...", verbose=verbose)
556
+ if cols_name_list_1 is None:
557
+ cols_name_list_1 = ["SNPID","P","EA","NEA","CHR","POS"]
558
+ if scaled1==True:
559
+ cols_name_list_1 = ["SNPID","MLOG10P","EA","NEA","CHR","POS"]
560
+ if effect_cols_list_1 is None:
561
+ if mode=="beta":
562
+ effect_cols_list_1 = ["BETA","SE"]
563
+ else:
564
+ effect_cols_list_1 = ["OR","OR_95L","OR_95U"]
565
+ elif type(path1) is pd.DataFrame:
566
+ log.write("Path1 is pandas DataFrame object...", verbose=verbose)
567
+
568
+ if type(path2) is Sumstats:
569
+ log.write("Path2 is gwaslab Sumstats object...", verbose=verbose)
570
+ if cols_name_list_2 is None:
571
+ cols_name_list_2 = ["SNPID","P","EA","NEA","CHR","POS"]
572
+ if scaled2==True:
573
+ cols_name_list_2 = ["SNPID","MLOG10P","EA","NEA","CHR","POS"]
574
+ if effect_cols_list_2 is None:
575
+ if mode=="beta":
576
+ effect_cols_list_2 = ["BETA","SE"]
577
+ else:
578
+ effect_cols_list_2 = ["OR","OR_95L","OR_95U"]
579
+ elif type(path2) is pd.DataFrame:
580
+ log.write("Path2 is pandas DataFrame object...", verbose=verbose)
581
+
582
+ return cols_name_list_1,cols_name_list_2, effect_cols_list_1, effect_cols_list_2
583
+
584
+ def configure_common_snp_set(path1,path2,
585
+ snplist,
586
+ label,
587
+ cols_name_list_1,cols_name_list_2,
588
+ sep,
589
+ scaled1,
590
+ scaled2,
591
+ log,verbose):
592
+
593
+ ######### load sumstats2
594
+ sumstats = load_sumstats(path=path2,
595
+ usecols=cols_name_list_2[0],
596
+ label=label[1],
597
+ log=log,
598
+ verbose= verbose,
599
+ sep=sep[1])
600
+
601
+ common_snp_set=set(sumstats[cols_name_list_2[0]].values)
602
+
603
+ ######### extract snplist1
604
+ if snplist is not None:
605
+ #use only SNPID, P
606
+ cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1]]
607
+ else:
608
+ # use SNPID, P, chr pos
609
+ cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1],cols_name_list_1[4],cols_name_list_1[5]]
610
+
611
+ ######### load sumstats1
612
+ sumstats = load_sumstats(path=path1,
613
+ usecols=cols_to_extract,
614
+ label=label[0],
615
+ log=log,
616
+ verbose= verbose,
617
+ sep=sep[0])
618
+
619
+ gc.collect()
620
+
621
+ #if scaled1==True:
622
+ # sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
623
+ ######### 5 extract the common set
624
+
625
+ common_snp_set = common_snp_set.intersection(sumstats[cols_name_list_1[0]].values)
626
+
627
+ log.write(" -Counting variants available for both datasets:",len(common_snp_set)," variants...", verbose=verbose)
628
+
629
+ return sumstats, common_snp_set
630
+
631
+ def rename_sumtats(sumstats, cols_name_list, snplist, scaled,suffix=""):
632
+ ######### 6 rename the sumstats
633
+ rename_dict = { cols_name_list[0]:"SNPID",
634
+ cols_name_list[1]:"P{}".format(suffix),
635
+ }
636
+ if scaled==True:
637
+ rename_dict[cols_name_list[1]] = "MLOG10P{}".format(suffix)
638
+
639
+ if snplist is None:
640
+ rename_dict[cols_name_list[4]]="CHR"
641
+ rename_dict[cols_name_list[5]]="POS"
642
+
643
+ sumstats = sumstats.rename(columns=rename_dict)
644
+ return sumstats
645
+
646
+
647
+ def extract_snp_for_comparison(sumstats, snplist, label,
648
+ get_lead_args, build, drop, anno,
649
+ sig_level,scaled, log, verbose):
650
+ ######### 8 extact SNPs for comparison
651
+ if snplist is not None:
652
+ ######### 8.1 if a snplist is provided, use the snp list
653
+ log.write(" -Extract variants in the given list from "+label+"...")
654
+ sig_list = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
655
+ if anno=="GENENAME":
656
+ sig_list = annogene(sig_list,"SNPID","CHR","POS", build=build, verbose=verbose, **get_lead_args)
657
+ else:
658
+ ######### 8,2 otherwise use the automatically detected lead SNPs
659
+ log.write(" -Extract lead variants from "+label +"...", verbose=verbose)
660
+ sig_list = getsig(sumstats,"SNPID","CHR","POS","P","MLOG10P", build=build, verbose=verbose,sig_level=sig_level,**get_lead_args)
661
+
662
+ if drop==True:
663
+ if scaled==True:
664
+ sig_list = drop_duplicate_and_na(sig_list, sort_by="MLOG10P",ascending=False, log=log , verbose=verbose)
665
+ else:
666
+ sig_list = drop_duplicate_and_na(sig_list, sort_by="P", ascending=True, log=log , verbose=verbose)
667
+
668
+ return sig_list
669
+
670
+ def merge_list(sig_list_1, sig_list_2, anno,labels,log, verbose):
671
+
672
+ log.write("Merging snps from "+labels[0]+" and "+labels[1]+"...", verbose=verbose)
673
+
674
+ if anno == "GENENAME":
675
+ if "GENE" not in sig_list_1.columns:
676
+ sig_list_1["GENE"]=pd.NA
677
+ sig_list_1["LOCATION"]=pd.NA
678
+ if "GENE" not in sig_list_2.columns:
679
+ sig_list_2["GENE"]=pd.NA
680
+ sig_list_2["LOCATION"]=pd.NA
681
+
682
+ sig_list_merged = pd.merge(sig_list_1,sig_list_2,left_on="SNPID",right_on="SNPID",how="outer",suffixes=('_1', '_2'))
683
+
684
+ if anno == "GENENAME":
685
+ sig_list_merged.loc[sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENENAME"] = sig_list_merged.loc[sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENE_1"]
686
+ sig_list_merged.loc[~sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENENAME"] = sig_list_merged.loc[~sig_list_merged["SNPID"].isin((sig_list_1["SNPID"])),"GENE_2"]
687
+ sig_list_merged = sig_list_merged.drop(columns=["GENE_1","GENE_2","LOCATION_1","LOCATION_2"])
688
+ # SNPID P_1 P_2
689
+ #0 rs117986209 0.142569 0.394455
690
+ #1 rs6704312 0.652104 0.143750
691
+ return sig_list_merged
692
+
693
+ def configure_cols_to_extract(mode,
694
+ cols_name_list,
695
+ effect_cols_list,
696
+ eaf):
697
+
698
+ if mode=="beta" or mode=="BETA" or mode=="Beta":
699
+ #[snpid,p,ea,nea] ,[effect,se]
700
+ #[snpid,p,ea,nea,chr,pos],[effect,se]
701
+ #[snpid,p,ea,nea,chr,pos],[OR,OR_l,OR_h]
702
+ cols_to_extract = [cols_name_list[0],cols_name_list[1], cols_name_list[2],cols_name_list[3], effect_cols_list[0], effect_cols_list[1]]
703
+ else:
704
+ cols_to_extract = [cols_name_list[0],cols_name_list[1], cols_name_list[2],cols_name_list[3], effect_cols_list[0], effect_cols_list[1], effect_cols_list[2]]
705
+
706
+ if len(eaf)>0:
707
+ cols_to_extract.append(eaf[0])
708
+
709
+ return cols_to_extract
710
+
711
+ def rename_sumstats_full(mode, sumstats, cols_name_list, effect_cols_list, eaf, drop, index, scaled, log, verbose):
712
+ if mode=="beta" or mode=="BETA" or mode=="Beta":
713
+ rename_dict = { cols_name_list[0]:"SNPID",
714
+ cols_name_list[1]:"P_{}".format(index),
715
+ cols_name_list[2]:"EA_{}".format(index),
716
+ cols_name_list[3]:"NEA_{}".format(index),
717
+ effect_cols_list[0]:"EFFECT_{}".format(index),
718
+ effect_cols_list[1]:"SE_{}".format(index)}
719
+
720
+
721
+ else:
722
+ # if or
723
+ rename_dict = { cols_name_list[0]:"SNPID",
724
+ cols_name_list[1]:"P_{}".format(index),
725
+ cols_name_list[2]:"EA_{}".format(index),
726
+ cols_name_list[3]:"NEA_{}".format(index),
727
+ effect_cols_list[0]:"OR_{}".format(index),
728
+ effect_cols_list[1]:"OR_L_{}".format(index),
729
+ effect_cols_list[2]:"OR_H_{}".format(index)}
730
+ if scaled==True:
731
+ rename_dict[cols_name_list[1]]="MLOG10P_{}".format(index)
732
+ ## check if eaf column is provided.
733
+ if len(eaf)>0:
734
+ rename_dict[eaf[index-1]]="EAF_{}".format(index)
735
+ sumstats = sumstats.rename(columns=rename_dict)
736
+
737
+ # drop na and duplicate
738
+ if drop==True:
739
+ if scaled==True:
740
+ sumstats = drop_duplicate_and_na(sumstats, sort_by="MLOG10P_{}".format(index),ascending=False, log=log , verbose=verbose)
741
+ else:
742
+ sumstats = drop_duplicate_and_na(sumstats, sort_by="P_{}".format(index), ascending=True, log=log , verbose=verbose)
743
+
744
+ if scaled==True:
745
+ sumstats.drop("MLOG10P_{}".format(index),axis=1,inplace=True)
746
+ else:
747
+ sumstats.drop("P_{}".format(index),axis=1,inplace=True)
748
+ return sumstats
749
+
750
+ def update_stats(sig_list_merged,
751
+ path,
752
+ cols_name_list,
753
+ sep,
754
+ snplist,
755
+ label,
756
+ drop,
757
+ index,
758
+ scaled,
759
+ log,
760
+ verbose):
761
+
762
+ log.write(" -Updating missing information for "+label+" ...", verbose=verbose)
763
+ cols_to_extract = [cols_name_list[0], cols_name_list[1]]
764
+
765
+ sumstats = load_sumstats(path=path,
766
+ usecols=cols_to_extract,
767
+ label=label,
768
+ log=log,
769
+ verbose= verbose,
770
+ sep=sep)
771
+ #if scaled1==True:
772
+ # sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
773
+
774
+ sumstats = rename_sumtats(sumstats = sumstats,
775
+ cols_name_list = cols_name_list,
776
+ snplist = snplist,
777
+ scaled=scaled,
778
+ suffix="_{}".format(index))
779
+ # drop na and duplicate
780
+ if drop==True:
781
+ if scaled==True:
782
+ sumstats = drop_duplicate_and_na(sumstats, sort_by="MLOG10P_{}".format(index),ascending=False, log=log , verbose=verbose)
783
+ else:
784
+ sumstats = drop_duplicate_and_na(sumstats, sort_by="P_{}".format(index), ascending=True, log=log , verbose=verbose)
785
+
786
+
787
+ sumstats = sumstats.set_index("SNPID")
788
+ sig_list_merged.update(sumstats)
789
+
790
+ return sig_list_merged
791
+
792
+
793
+ def assign_indicator(sig_list_merged, snplist, sig_level, scaled1, scaled2, log, verbose):
794
+ ############## 18 init indicator
795
+ log.write(" -Assigning indicator ...", verbose=verbose)
796
+ # 0-> 0
797
+ # 1 -> sig in sumstats1
798
+ # 2 -> sig in sumsatts2
799
+ # 3-> sig in both sumstats1 + sumstats2
800
+ sig_list_merged["indicator"] = 0
801
+
802
+ if scaled1==True:
803
+ sig_list_merged.loc[sig_list_merged["MLOG10P_1"]>-np.log10(sig_level),"indicator"]=1+sig_list_merged.loc[sig_list_merged["MLOG10P_1"]>-np.log10(sig_level),"indicator"]
804
+ else:
805
+ sig_list_merged.loc[sig_list_merged["P_1"]<sig_level,"indicator"]=1+sig_list_merged.loc[sig_list_merged["P_1"]<sig_level,"indicator"]
806
+
807
+ if scaled2==True:
808
+ sig_list_merged.loc[sig_list_merged["MLOG10P_2"]>-np.log10(sig_level),"indicator"]=2+sig_list_merged.loc[sig_list_merged["MLOG10P_2"]>-np.log10(sig_level),"indicator"]
809
+ else:
810
+ sig_list_merged.loc[sig_list_merged["P_2"]<sig_level,"indicator"]=2+sig_list_merged.loc[sig_list_merged["P_2"]<sig_level,"indicator"]
811
+
812
+ if snplist is None:
813
+ sig_list_merged["CHR"]=np.max(sig_list_merged[["CHR_1","CHR_2"]], axis=1).astype(int)
814
+ sig_list_merged["POS"]=np.max(sig_list_merged[["POS_1","POS_2"]], axis=1).astype(int)
815
+ sig_list_merged.drop(labels=['CHR_1', 'CHR_2','POS_1', 'POS_2'], axis=1,inplace=True)
816
+ return sig_list_merged
817
+
818
+ def align_alleles(sig_list_merged, label,mode,eaf, log, verbose):
819
+ log.write(" -Aligning "+label[1]+" EA with "+label[0]+" EA ...", verbose=verbose)
820
+ ############### 19 align allele effect with sumstats 1
821
+ sig_list_merged["EA_1"]=sig_list_merged["EA_1"].astype("string")
822
+ sig_list_merged["EA_2"]=sig_list_merged["EA_2"].astype("string")
823
+ sig_list_merged["NEA_1"]=sig_list_merged["NEA_1"].astype("string")
824
+ sig_list_merged["NEA_2"]=sig_list_merged["NEA_2"].astype("string")
825
+ if mode=="beta" or mode=="BETA" or mode=="Beta":
826
+ # copy raw
827
+ sig_list_merged["EA_2_aligned"]=sig_list_merged["EA_2"]
828
+ sig_list_merged["NEA_2_aligned"]=sig_list_merged["NEA_2"]
829
+ sig_list_merged["EFFECT_2_aligned"]=sig_list_merged["EFFECT_2"]
830
+
831
+ #filp ea/nea and beta for sumstats2
832
+ sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EA_2_aligned"]= sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"NEA_2"]
833
+ sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"NEA_2_aligned"]= sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EA_2"]
834
+ sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EFFECT_2_aligned"]= -sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EFFECT_2"]
835
+ else:
836
+ #flip for OR or - +
837
+
838
+ sig_list_merged["EA_2_aligned"]=sig_list_merged["EA_2"]
839
+ sig_list_merged["NEA_2_aligned"]=sig_list_merged["NEA_2"]
840
+ sig_list_merged["OR_2_aligned"]=sig_list_merged["OR_2"]
841
+ sig_list_merged["OR_L_2_aligned"]=sig_list_merged["OR_L_2"]
842
+ sig_list_merged["OR_H_2_aligned"]=sig_list_merged["OR_H_2"]
843
+
844
+ sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EA_2_aligned"]= sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"NEA_2"]
845
+ sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"NEA_2_aligned"]= sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EA_2"]
846
+ sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_2_aligned"]= 1/sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_2"]
847
+ sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_H_2_aligned"]= 1/sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_L_2"]
848
+ sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_L_2_aligned"]= 1/sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"OR_H_2"]
849
+
850
+ sig_list_merged["BETA_1"]=np.log(sig_list_merged["OR_1"])
851
+ sig_list_merged["BETA_2_aligned"]=np.log(sig_list_merged["OR_2_aligned"])
852
+ sig_list_merged["SE_1"]=(np.log(sig_list_merged["OR_H_1"]) - np.log(sig_list_merged["OR_1"]))/ss.norm.ppf(0.975)
853
+ sig_list_merged["SE_2"]=(np.log(sig_list_merged["OR_H_2_aligned"]) - np.log(sig_list_merged["OR_2_aligned"]))/ss.norm.ppf(0.975)
854
+
855
+ sig_list_merged["OR_L_1_err"]=np.abs(sig_list_merged["OR_L_1"]-sig_list_merged["OR_1"])
856
+ sig_list_merged["OR_H_1_err"]=np.abs(sig_list_merged["OR_H_1"]-sig_list_merged["OR_1"])
857
+ sig_list_merged["OR_L_2_aligned_err"]=np.abs(sig_list_merged["OR_L_2_aligned"]-sig_list_merged["OR_2_aligned"])
858
+ sig_list_merged["OR_H_2_aligned_err"]=np.abs(sig_list_merged["OR_H_2_aligned"]-sig_list_merged["OR_2_aligned"])
859
+
860
+ if len(eaf)>0:
861
+ # flip eaf
862
+ sig_list_merged["EAF_2_aligned"]=sig_list_merged["EAF_2"]
863
+ sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EAF_2_aligned"]= 1 -sig_list_merged.loc[sig_list_merged["EA_1"]!=sig_list_merged["EA_2"],"EAF_2"]
864
+ return sig_list_merged
865
+
866
+ #########################################################################################################################
867
+ #########################################################################################################################
868
+ #########################################################################################################################
869
+ #########################################################################################################################
870
+ #########################################################################################################################
871
+ #########################################################################################################################
872
+ #########################################################################################################################
873
+ #########################################################################################################################
874
+ #########################################################################################################################
875
+
876
+ def check_allele_match(sig_list_merged, allele_match, label, log,verbose):
877
+ # checking effect allele matching
878
+ nonmatch = np.nansum(sig_list_merged["EA_1"] != sig_list_merged["EA_2_aligned"])
879
+ log.write(" -Aligned all EAs in {} with EAs in {} ...".format(label[1],label[0]), verbose=verbose)
880
+ if nonmatch>0:
881
+ log.warning("Alleles for {} variants do not match...".format(nonmatch))
882
+ if allele_match==True:
883
+ if nonmatch>0:
884
+ sig_list_merged = sig_list_merged.loc[sig_list_merged["EA_1"] == sig_list_merged["EA_2_aligned"]]
885
+ else:
886
+ log.write(" -No variants with EA not matching...", verbose=verbose)
887
+ return sig_list_merged
888
+
889
+ def winnerscurse_correction(sig_list_merged, mode, wc_correction, sig_level, scaled1, scaled2, log, verbose):
890
+ if mode=="beta":
891
+ if scaled1==True:
892
+ match1= sig_list_merged["MLOG10P_1"]>-np.log10(sig_level)
893
+ else:
894
+ match1 = sig_list_merged["P_1"]<sig_level
895
+ if scaled2==True:
896
+ match2= sig_list_merged["MLOG10P_2"]>-np.log10(sig_level)
897
+ else:
898
+ match2 = sig_list_merged["P_2"]<sig_level
899
+
900
+ if wc_correction == "all":
901
+ log.write(" -Correcting BETA for winner's curse with threshold at {} for all variants...".format(sig_level), verbose=verbose)
902
+ sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
903
+ sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
904
+
905
+ log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(~sig_list_merged["EFFECT_1"].isna())), verbose=verbose)
906
+ sig_list_merged["EFFECT_1"] = sig_list_merged[["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
907
+
908
+ log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(~sig_list_merged["EFFECT_2_aligned"].isna())), verbose=verbose)
909
+ sig_list_merged["EFFECT_2_aligned"] = sig_list_merged[["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
910
+
911
+ elif wc_correction == "sig" :
912
+
913
+ log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants...".format(sig_level), verbose=verbose)
914
+ sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
915
+ sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
916
+ log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(match1)), verbose=verbose)
917
+ sig_list_merged.loc[match1, "EFFECT_1"] = sig_list_merged.loc[match1, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
918
+ log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(match2)), verbose=verbose)
919
+ sig_list_merged.loc[match2, "EFFECT_2_aligned"] = sig_list_merged.loc[match2, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
920
+
921
+ elif wc_correction == "sumstats1" :
922
+ log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats1...".format(sig_level), verbose=verbose)
923
+ sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
924
+ log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(match1)), verbose=verbose)
925
+ sig_list_merged.loc[match1, "EFFECT_1"] = sig_list_merged.loc[match1, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
926
+
927
+ elif wc_correction == "sumstats2" :
928
+ log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats2...".format(sig_level), verbose=verbose)
929
+ sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
930
+ log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(match2)), verbose=verbose)
931
+ sig_list_merged.loc[match2, "EFFECT_2_aligned"] = sig_list_merged.loc[match2, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
932
+ return sig_list_merged
933
+
934
+ def filter_by_maf(sig_list_merged, eaf, maf_level, log, verbose):
935
+ if (len(eaf)>0) and (maf_level is not None):
936
+ both_eaf_clear = (sig_list_merged["EAF_1"]>maf_level)&(sig_list_merged["EAF_1"]<1-maf_level)&(sig_list_merged["EAF_2"]>maf_level)&(sig_list_merged["EAF_2"]<1-maf_level)
937
+ log.write(" -Exclude "+str(len(sig_list_merged) -sum(both_eaf_clear))+ " variants with maf <",maf_level, verbose=verbose)
938
+ sig_list_merged = sig_list_merged.loc[both_eaf_clear,:]
939
+ return sig_list_merged
940
+
941
+
942
+
943
+
944
+
945
+ def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose=False):
946
+ w1="Weight_1"
947
+ w2="Weight_2"
948
+ beta="BETA_FE"
949
+ q="Q"
950
+ pq="HetP"
951
+ rawpq="RAW_HetP"
952
+ i2="I2"
953
+ df[w1]=1/(df[se1])**2
954
+ df[w2]=1/(df[se2])**2
955
+ df[beta] =(df[w1]*df[beta1] + df[w2]*df[beta2])/(df[w1]+df[w2])
956
+
957
+ # Cochran(1954)
958
+ df[q] = df[w1]*(df[beta1]-df[beta])**2 + df[w2]*(df[beta2]-df[beta])**2
959
+ df[pq] = ss.chi2.sf(df[q], 1)
960
+ df["Edge_color"]="white"
961
+
962
+ if is_q_mc=="fdr":
963
+ log.write(" -FDR correction applied...", verbose=verbose)
964
+ df[rawpq] = df[pq]
965
+ df[pq] = ss.false_discovery_control(df[pq])
966
+
967
+ elif is_q_mc=="bon":
968
+ log.write(" -Bonferroni correction applied...", verbose=verbose)
969
+ df[rawpq] = df[pq]
970
+ df[pq] = df[pq] * len(df[pq])
971
+
972
+ df.loc[df[pq]<q_level,"Edge_color"]="black"
973
+ df.drop(columns=["Weight_1","Weight_2","BETA_FE"],inplace=True)
974
+ # Huedo-Medina, T. B., Sánchez-Meca, J., Marín-Martínez, F., & Botella, J. (2006). Assessing heterogeneity in meta-analysis: Q statistic or I² index?. Psychological methods, 11(2), 193.
975
+
976
+ # calculate I2
977
+ df[i2] = (df[q] - 1)/df[q]
978
+ df.loc[df[i2]<0,i2] = 0
979
+
980
+ return df
981
+
982
+ def jackknife_r(df,x="EFFECT_1",y="EFFECT_2_aligned"):
983
+ """Jackknife estimation of se for rsq
984
+
985
+ """
986
+
987
+ # dropna
988
+ df_nona = df.loc[:,[x,y]].dropna()
989
+
990
+ # non-empty entries
991
+ n=len(df)
992
+
993
+ # assign row number
994
+ df_nona["nrow"] = range(n)
995
+
996
+ # a list to store r2
997
+ r_list=[]
998
+
999
+ # estimate r
1000
+ for i in range(n):
1001
+ # exclude 1 record
1002
+ records_to_use = df_nona["nrow"]!=i
1003
+ # estimate r
1004
+ reg_jackknife = ss.linregress(df_nona.loc[records_to_use, x],df_nona.loc[records_to_use,y])
1005
+ # add r_i to list
1006
+ r_list.append(reg_jackknife[2])
1007
+
1008
+ # convert list to array
1009
+ rs = np.array(r_list)
1010
+ # https://en.wikipedia.org/wiki/Jackknife_resampling
1011
+ r_se = np.sqrt( (n-1)/n * np.sum((rs - np.mean(rs))**2) )
1012
+ return r_se
1013
+
1014
+ def drop_duplicate_and_na(df,snpid="SNPID",sort_by=False,log=Log(),ascending=True,verbose=True):
1015
+
1016
+ length_before = len(df)
1017
+
1018
+ if sort_by!=False:
1019
+ df.sort_values(by = sort_by, ascending=ascending, inplace=True)
1020
+
1021
+ df.dropna(axis="index",subset=[snpid],inplace=True)
1022
+ df.drop_duplicates(subset=[snpid], keep='first', inplace=True)
1023
+
1024
+ length_after= len(df)
1025
+ if length_before != length_after:
1026
+ log.write(" -Dropped {} duplicates or NAs...".format(length_before - length_after), verbose=verbose)
1027
+ return df
1028
+
1029
+
1030
+
1031
+ #########################################################################################################################
1032
+ #########################################################################################################################
1033
+ #########################################################################################################################
1034
+ #########################################################################################################################
1035
+ #########################################################################################################################
1036
+ #########################################################################################################################
1037
+ #########################################################################################################################
1038
+ #########################################################################################################################
1039
+ #########################################################################################################################
1040
+
1041
+ def scatter_annotation(ax, sig_list_merged,anno, anno_het, is_q, mode,
1042
+ anno_min,anno_min1,anno_min2,anno_diff,anno_kwargs,adjust_text_kwargs_l,adjust_text_kwargs_r,
1043
+ log,verbose
1044
+ ):
1045
+ if anno==True or anno=="GENENAME":
1046
+ sig_list_toanno = sig_list_merged.dropna(axis=0)
1047
+ if is_q==True and anno_het == True:
1048
+ sig_list_toanno = sig_list_toanno.loc[sig_list_toanno["Edge_color"]=="black",:]
1049
+
641
1050
  if mode=="beta":
642
1051
  sig_list_toanno = sig_list_toanno.loc[sig_list_toanno["EFFECT_1"].abs() >=anno_min1 ,:]
643
1052
  sig_list_toanno = sig_list_toanno.loc[sig_list_toanno["EFFECT_2_aligned"].abs() >=anno_min2 ,:]
@@ -651,22 +1060,38 @@ def compare_effect(path1,
651
1060
 
652
1061
  texts_l=[]
653
1062
  texts_r=[]
1063
+
1064
+ if anno==True:
1065
+ log.write("Annotating variants using {}".format("SNPID"), verbose=verbose)
1066
+ elif anno=="GENENAME":
1067
+ log.write("Annotating variants using {}".format("GENENAME"), verbose=verbose)
1068
+
654
1069
  for index, row in sig_list_toanno.iterrows():
1070
+ #log.write("Annotating {}...".format(row), verbose=verbose)
1071
+ if anno==True:
1072
+ to_anno_text = index
1073
+ elif type(anno) is str:
1074
+ if not pd.isna(row[anno]):
1075
+ to_anno_text = row[anno]
1076
+ else:
1077
+ to_anno_text = index
1078
+
655
1079
  if mode=="beta" or mode=="BETA" or mode=="Beta":
656
1080
  if row["EFFECT_1"] < row["EFFECT_2_aligned"]:
657
- texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],index,ha="right",va="bottom"))
1081
+ texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],to_anno_text,ha="right",va="bottom", **anno_kwargs))
658
1082
  else:
659
- texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],index,ha="left",va="top"))
1083
+ texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],to_anno_text,ha="left",va="top", **anno_kwargs))
660
1084
  else:
661
1085
  if row["OR_1"] < row["OR_2_aligned"]:
662
- texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],index, ha='right', va='bottom'))
1086
+ texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],to_anno_text, ha='right', va='bottom', **anno_kwargs))
663
1087
  else:
664
- texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],index, ha='left', va='top'))
665
-
666
- adjust_text(texts_l,autoalign =False,precision =0.001,lim=1000, ha="right",va="bottom", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects=(0.8,0.8) ,arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
667
- adjust_text(texts_r,autoalign =False,precision =0.001,lim=1000, ha="left",va="top", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects =(0.8,0.8),arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
668
-
1088
+ texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],to_anno_text, ha='left', va='top', **anno_kwargs))
1089
+ if len(texts_l)>0:
1090
+ adjust_text(texts_l,ax=ax,**adjust_text_kwargs_l)
1091
+ if len(texts_r)>0:
1092
+ adjust_text(texts_r,ax=ax,**adjust_text_kwargs_r)
669
1093
  elif type(anno) is dict:
1094
+ sig_list_toanno = sig_list_merged.dropna(axis=0)
670
1095
  # if input is a dict
671
1096
  sig_list_toanno = sig_list_toanno.loc[sig_list_toanno.index.isin(list(anno.keys())),:]
672
1097
  if is_q==True and anno_het == True:
@@ -688,37 +1113,24 @@ def compare_effect(path1,
688
1113
  for index, row in sig_list_toanno.iterrows():
689
1114
  if mode=="beta" or mode=="BETA" or mode=="Beta":
690
1115
  if row["EFFECT_1"] < row["EFFECT_2_aligned"]:
691
- texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="right",va="bottom"))
1116
+ texts_l.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="right",va="bottom", **anno_kwargs))
692
1117
  else:
693
- texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="left",va="top"))
1118
+ texts_r.append(plt.text(row["EFFECT_1"], row["EFFECT_2_aligned"],anno[index],ha="left",va="top", **anno_kwargs))
694
1119
  else:
695
1120
  if row["OR_1"] < row["OR_2_aligned"]:
696
- texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='right', va='bottom'))
1121
+ texts_l.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='right', va='bottom', **anno_kwargs))
697
1122
  else:
698
- texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='left', va='top'))
1123
+ texts_r.append(plt.text(row["OR_1"], row["OR_2_aligned"],anno[index], ha='left', va='top', **anno_kwargs))
1124
+ if len(texts_l)>0:
1125
+ adjust_text(texts_l,ax=ax,**adjust_text_kwargs_l)
1126
+ if len(texts_r)>0:
1127
+ adjust_text(texts_r,ax=ax,**adjust_text_kwargs_r)
1128
+ return ax
699
1129
 
700
- adjust_text(texts_l,autoalign =False,precision =0.001,lim=1000, ha="right",va="bottom", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects=(0.8,0.8) ,arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
701
- adjust_text(texts_r,autoalign =False,precision =0.001,lim=1000, ha="left",va="top", expand_text=(1,1.8) , expand_objects=(0.1,0.1), expand_points=(1.8,1.8) ,force_objects =(0.8,0.8),arrowprops=dict(arrowstyle='-|>', color='grey'),ax=ax)
702
- #################################################################################################################################
703
-
704
- # plot x=0,y=0, and a 45 degree line
705
- xl,xh=ax.get_xlim()
706
- yl,yh=ax.get_ylim()
707
-
708
- if mode=="beta" or mode=="BETA" or mode=="Beta":
709
- #if using beta
710
- ax.axhline(y=0, zorder=1,**helper_line_args)
711
- ax.axvline(x=0, zorder=1,**helper_line_args)
712
- else:
713
- #if using OR
714
- ax.axhline(y=1, zorder=1,**helper_line_args)
715
- ax.axvline(x=1, zorder=1,**helper_line_args)
716
-
717
- for spine in ['top', 'right']:
718
- ax.spines[spine].set_visible(False)
719
-
720
1130
 
721
- ###regression line##############################################################################################################################
1131
+ def confire_regression_line(is_reg, reg_box, sig_list_merged, ax, mode,xl,yl,xh,yh, null_beta, r_se,
1132
+ is_45_helper_line,helper_line_args, font_kwargs,
1133
+ log, verbose):
722
1134
  if len(sig_list_merged)<3: is_reg=False
723
1135
  if is_reg is True:
724
1136
  if mode=="beta" or mode=="BETA" or mode=="Beta":
@@ -764,7 +1176,7 @@ def compare_effect(path1,
764
1176
  pe="0"
765
1177
  p_text="$p = " + p12 + " \\times 10^{"+pe+"}$"
766
1178
  p_latex= f'{p_text}'
767
- ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $+$ "+ "{:.2f}".format(reg[0])+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes, bbox=reg_box, **fontargs)
1179
+ ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $+$ "+ "{:.2f}".format(reg[0])+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes, bbox=reg_box, **font_kwargs)
768
1180
  else:
769
1181
  #if regression coeeficient <0 : auxiliary line slope = -1
770
1182
  if is_45_helper_line is True:
@@ -781,7 +1193,7 @@ def compare_effect(path1,
781
1193
  pe="0"
782
1194
  p_text="$p = " + p12 + " \\times 10^{"+pe+"}$"
783
1195
  p_latex= f'{p_text}'
784
- ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $-$ "+ "{:.2f}".format(abs(reg[0]))+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes,bbox=reg_box,**fontargs)
1196
+ ax.text(0.98,0.02,"$y =$ "+"{:.2f}".format(reg[1]) +" $-$ "+ "{:.2f}".format(abs(reg[0]))+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string, va="bottom",ha="right",transform=ax.transAxes,bbox=reg_box,**font_kwargs)
785
1197
 
786
1198
  if mode=="beta" or mode=="BETA" or mode=="Beta":
787
1199
  middle = sig_list_merged["EFFECT_1"].mean()
@@ -792,11 +1204,12 @@ def compare_effect(path1,
792
1204
  ax.axline(xy1=(0,reg[1]),slope=reg[0],color="#cccccc",linestyle='--',zorder=1)
793
1205
  else:
794
1206
  ax.axline(xy1=(1,reg[0]+reg[1]),slope=reg[0],color="#cccccc",linestyle='--',zorder=1)
795
-
796
-
797
- ax.set_xlabel(xylabel_prefix+label[0],**fontargs)
798
- ax.set_ylabel(xylabel_prefix+label[1],**fontargs)
799
-
1207
+ return ax
1208
+
1209
+
1210
+ def configure_legend(fig, ax, legend_mode, is_q, is_q_mc, legend_elements, legend_pos, q_level,
1211
+ font_kwargs,scatterargs,legend_args,
1212
+ legend_title, legend_title2 ):
800
1213
  legend_args_to_use ={
801
1214
  "framealpha":1,
802
1215
  "handlelength":0.7,
@@ -864,16 +1277,10 @@ def compare_effect(path1,
864
1277
  label.set_ha('left')
865
1278
  label.set_position((-8*width,0))
866
1279
 
867
- ax.tick_params(axis='both', labelsize=fontargs["fontsize"])
868
- plt.setp(L.texts,**fontargs)
869
- plt.setp(L.get_title(),**fontargs)
870
- ##plot finished########################################################################################
871
- gc.collect()
872
-
873
- save_figure(fig, save, keyword="esc",save_args=save_args, log=log, verbose=verbose)
874
-
875
-
876
- return [sig_list_merged, fig,log]
1280
+ ax.tick_params(axis='both', labelsize=font_kwargs["fontsize"])
1281
+ plt.setp(L.texts,**font_kwargs)
1282
+ plt.setp(L.get_title(),**font_kwargs)
1283
+ return ax
877
1284
 
878
1285
  def reorderLegend(ax=None, order=None, add=None):
879
1286
  handles, labels = ax.get_legend_handles_labels()
@@ -882,78 +1289,18 @@ def reorderLegend(ax=None, order=None, add=None):
882
1289
  new_handles = [info[l] for l in order]
883
1290
  return new_handles, order
884
1291
 
885
- def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose=False):
886
- w1="Weight_1"
887
- w2="Weight_2"
888
- beta="BETA_FE"
889
- q="Q"
890
- pq="HetP"
891
- i2="I2"
892
- df[w1]=1/(df[se1])**2
893
- df[w2]=1/(df[se2])**2
894
- df[beta] =(df[w1]*df[beta1] + df[w2]*df[beta2])/(df[w1]+df[w2])
1292
+ def reorder_columns(sig_list_merged):
1293
+ order=[ 'CHR', 'POS', 'GENENAME',
1294
+ 'EA_1', 'NEA_1', 'EFFECT_1', 'SE_1', 'P_1', 'MLOG10P_1',
1295
+ 'EA_2_aligned','NEA_2_aligned', 'EFFECT_2_aligned', 'SE_2','P_2','MLOG10P_2', 'EA_2', 'NEA_2', 'EFFECT_2',
1296
+ 'indicator' ]
895
1297
 
896
- # Cochran(1954)
897
- df[q] = df[w1]*(df[beta1]-df[beta])**2 + df[w2]*(df[beta2]-df[beta])**2
898
- df[pq] = ss.chi2.sf(df[q], 1)
899
- df["Edge_color"]="white"
900
-
901
- if is_q_mc=="fdr":
902
- log.write(" -FDR correction applied...", verbose=verbose)
903
- df[pq] = ss.false_discovery_control(df[pq])
904
- elif is_q_mc=="bon":
905
- log.write(" -Bonferroni correction applied...", verbose=verbose)
906
- df[pq] = df[pq] * len(df[pq])
907
-
908
- df.loc[df[pq]<q_level,"Edge_color"]="black"
909
- df.drop(columns=["Weight_1","Weight_2","BETA_FE"],inplace=True)
910
- # Huedo-Medina, T. B., Sánchez-Meca, J., Marín-Martínez, F., & Botella, J. (2006). Assessing heterogeneity in meta-analysis: Q statistic or I² index?. Psychological methods, 11(2), 193.
911
-
912
- # calculate I2
913
- df[i2] = (df[q] - 1)/df[q]
914
- df.loc[df[i2]<0,i2] = 0
915
-
916
- return df
917
-
918
- def jackknife_r(df,x="EFFECT_1",y="EFFECT_2_aligned"):
919
- """Jackknife estimation of se for rsq
920
-
921
- """
922
-
923
- # dropna
924
- df_nona = df.loc[:,[x,y]].dropna()
925
-
926
- # non-empty entries
927
- n=len(df)
928
-
929
- # assign row number
930
- df_nona["nrow"] = range(n)
931
-
932
- # a list to store r2
933
- r_list=[]
1298
+ new_order=[]
1299
+ for i in order:
1300
+ if i in sig_list_merged.columns:
1301
+ new_order.append(i)
1302
+ for i in sig_list_merged.columns:
1303
+ if i not in new_order:
1304
+ new_order.append(i)
934
1305
 
935
- # estimate r
936
- for i in range(n):
937
- # exclude 1 record
938
- records_to_use = df_nona["nrow"]!=i
939
- # estimate r
940
- reg_jackknife = ss.linregress(df_nona.loc[records_to_use, x],df_nona.loc[records_to_use,y])
941
- # add r_i to list
942
- r_list.append(reg_jackknife[2])
943
-
944
- # convert list to array
945
- rs = np.array(r_list)
946
- # https://en.wikipedia.org/wiki/Jackknife_resampling
947
- r_se = np.sqrt( (n-1)/n * np.sum((rs - np.mean(rs))**2) )
948
- return r_se
949
-
950
- def drop_duplicate_and_na(df,snpid="SNPID",sort_by=False,log=Log(),verbose=True):
951
- length_before = len(df)
952
- if sort_by!=False:
953
- df.sort_values(by = sort_by, inplace=True)
954
- df.dropna(axis="index",subset=[snpid],inplace=True)
955
- df.drop_duplicates(subset=[snpid], keep='first', inplace=True)
956
- length_after= len(df)
957
- if length_before != length_after:
958
- log.write(" -Dropped {} duplicates or NAs...".format(length_before - length_after), verbose=verbose)
959
- return df
1306
+ return sig_list_merged[new_order]