gwaslab 3.4.35__py3-none-any.whl → 3.4.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/__init__.py +1 -1
- gwaslab/bd_common_data.py +4 -2
- gwaslab/g_Sumstats.py +56 -33
- gwaslab/g_meta.py +13 -3
- gwaslab/g_version.py +2 -2
- gwaslab/hm_harmonize_sumstats.py +43 -18
- gwaslab/io_preformat_input.py +9 -0
- gwaslab/qc_check_datatype.py +14 -0
- gwaslab/qc_fix_sumstats.py +278 -119
- gwaslab/util_ex_process_h5.py +26 -17
- gwaslab/util_in_fill_data.py +50 -12
- gwaslab/viz_aux_quickfix.py +53 -52
- gwaslab/viz_plot_compare_effect.py +27 -8
- gwaslab/viz_plot_forestplot.py +1 -1
- gwaslab/viz_plot_mqqplot.py +127 -48
- gwaslab/viz_plot_regionalplot.py +20 -9
- gwaslab/viz_plot_rg_heatmap.py +8 -4
- {gwaslab-3.4.35.dist-info → gwaslab-3.4.37.dist-info}/METADATA +5 -6
- {gwaslab-3.4.35.dist-info → gwaslab-3.4.37.dist-info}/RECORD +22 -22
- {gwaslab-3.4.35.dist-info → gwaslab-3.4.37.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.35.dist-info → gwaslab-3.4.37.dist-info}/WHEEL +0 -0
- {gwaslab-3.4.35.dist-info → gwaslab-3.4.37.dist-info}/top_level.txt +0 -0
gwaslab/util_ex_process_h5.py
CHANGED
|
@@ -3,31 +3,40 @@ import os
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
from gwaslab.g_Log import Log
|
|
5
5
|
|
|
6
|
-
def
|
|
6
|
+
def process_vcf_to_hfd5(vcf,
|
|
7
|
+
directory=None,
|
|
8
|
+
chr_dict=None,
|
|
9
|
+
group_size=20000000,
|
|
10
|
+
complevel=9,
|
|
11
|
+
chunksize=20000000,
|
|
12
|
+
log=Log(),
|
|
13
|
+
verbose=True):
|
|
14
|
+
|
|
7
15
|
#load vcf
|
|
8
|
-
log.write("Start
|
|
9
|
-
log.write(" -Reference VCF path:{}".format(vcf))
|
|
10
|
-
log.write(" -Output group size:{}".format(group_size))
|
|
11
|
-
log.write(" -Compression level:{}".format(complevel))
|
|
12
|
-
log.write(" -Loading chunksize:{}".format(chunksize))
|
|
16
|
+
log.write("Start to process VCF file to HDF5:", verbose=verbose)
|
|
17
|
+
log.write(" -Reference VCF path:{}".format(vcf), verbose=verbose)
|
|
18
|
+
log.write(" -Output group size:{}".format(group_size), verbose=verbose)
|
|
19
|
+
log.write(" -Compression level:{}".format(complevel), verbose=verbose)
|
|
20
|
+
log.write(" -Loading chunksize:{}".format(chunksize), verbose=verbose)
|
|
13
21
|
|
|
22
|
+
vcf_file_name = os.path.basename(vcf)
|
|
23
|
+
vcf_dir_path = os.path.dirname(vcf)
|
|
24
|
+
|
|
14
25
|
if directory is None:
|
|
15
|
-
directory=
|
|
16
|
-
|
|
26
|
+
directory = vcf_dir_path
|
|
17
27
|
elif directory[-1] == "/":
|
|
18
28
|
directory = directory.rstrip('/')
|
|
19
29
|
|
|
20
|
-
h5_path = "{}/rsID_CHR_POS_groups_{}.h5".format(directory,int(group_size))
|
|
21
|
-
log_path = "{}/rsID_CHR_POS_groups_{}.log".format(directory,int(group_size))
|
|
22
|
-
log.write(" -HDF5 Output path: {}".format(h5_path))
|
|
23
|
-
log.write(" -Log output path: {}".format(log_path))
|
|
30
|
+
h5_path = "{}/{}.rsID_CHR_POS_groups_{}.h5".format(directory,vcf_file_name,int(group_size))
|
|
31
|
+
log_path = "{}/{}.rsID_CHR_POS_groups_{}.log".format(directory,vcf_file_name, int(group_size))
|
|
32
|
+
log.write(" -HDF5 Output path: {}".format(h5_path), verbose=verbose)
|
|
33
|
+
log.write(" -Log output path: {}".format(log_path), verbose=verbose)
|
|
24
34
|
df = pd.read_table(vcf,comment="#",usecols=[0,1,2],header=None,chunksize=chunksize)
|
|
25
35
|
|
|
26
|
-
|
|
27
|
-
log.write(" -Processing chunk: ",end="")
|
|
36
|
+
log.write(" -Processing chunk: ",end="", verbose=verbose)
|
|
28
37
|
|
|
29
38
|
for index,chunk in enumerate(df):
|
|
30
|
-
log.write(index,end=" ",show_time=False)
|
|
39
|
+
log.write(index,end=" ",show_time=False, verbose=verbose)
|
|
31
40
|
chunk = chunk.rename(columns={0:"CHR",1:"POS",2:"rsn"})
|
|
32
41
|
if chr_dict is not None:
|
|
33
42
|
chunk["CHR"] = chunk["CHR"].map(chr_dict)
|
|
@@ -47,5 +56,5 @@ def process_ref_vcf(vcf, directory=None, chr_dict=None, group_size=20000000,comp
|
|
|
47
56
|
dropna=True,
|
|
48
57
|
format="table",
|
|
49
58
|
complevel=complevel)
|
|
50
|
-
log.write("Processing finished!")
|
|
51
|
-
log.save(log_path, verbose=
|
|
59
|
+
log.write("Processing finished!", verbose=verbose)
|
|
60
|
+
log.save(log_path, verbose=verbose)
|
gwaslab/util_in_fill_data.py
CHANGED
|
@@ -4,13 +4,13 @@ import scipy.stats as ss
|
|
|
4
4
|
from scipy import stats
|
|
5
5
|
from gwaslab.g_Log import Log
|
|
6
6
|
import gc
|
|
7
|
-
from gwaslab.qc_fix_sumstats import sortcolumn
|
|
7
|
+
#from gwaslab.qc_fix_sumstats import sortcolumn
|
|
8
8
|
from gwaslab.g_version import _get_version
|
|
9
9
|
from gwaslab.qc_check_datatype import check_datatype
|
|
10
10
|
|
|
11
11
|
def filldata(
|
|
12
12
|
sumstats,
|
|
13
|
-
to_fill=
|
|
13
|
+
to_fill=None,
|
|
14
14
|
df=None,
|
|
15
15
|
overwrite=False,
|
|
16
16
|
verbose=True,
|
|
@@ -38,7 +38,7 @@ def filldata(
|
|
|
38
38
|
for i in skip_cols:
|
|
39
39
|
to_fill.remove(i)
|
|
40
40
|
if verbose: log.write(" -Skipping columns: ",skip_cols)
|
|
41
|
-
if len(set(to_fill) & set(["OR","
|
|
41
|
+
if len(set(to_fill) & set(["OR","OR_95L","OR_95U","BETA","SE","P","Z","CHISQ","MLOG10P","MAF"]))==0:
|
|
42
42
|
log.write(" -No available columns to fill. Skipping.", verbose=verbose)
|
|
43
43
|
log.write("Finished filling data using existing columns.", verbose=verbose)
|
|
44
44
|
return sumstats
|
|
@@ -46,7 +46,7 @@ def filldata(
|
|
|
46
46
|
fill_iteratively(sumstats,to_fill,log,only_sig,df,extreme,verbose,sig_level)
|
|
47
47
|
|
|
48
48
|
# ###################################################################################
|
|
49
|
-
sumstats = sortcolumn(sumstats, verbose=verbose, log=log)
|
|
49
|
+
#sumstats = sortcolumn(sumstats, verbose=verbose, log=log)
|
|
50
50
|
gc.collect()
|
|
51
51
|
if verbose: log.write("Finished filling data using existing columns.")
|
|
52
52
|
return sumstats
|
|
@@ -224,12 +224,12 @@ def fill_extreme_mlog10(sumstats, z):
|
|
|
224
224
|
return sumstats
|
|
225
225
|
|
|
226
226
|
####################################################################################################################
|
|
227
|
-
def fill_iteratively(sumstats,
|
|
227
|
+
def fill_iteratively(sumstats,raw_to_fill,log,only_sig,df,extreme,verbose,sig_level):
|
|
228
|
+
to_fill = raw_to_fill.copy()
|
|
228
229
|
if verbose: log.write(" - Filling Columns iteratively...")
|
|
229
|
-
|
|
230
|
-
previous_count=0
|
|
230
|
+
|
|
231
231
|
filled_count=0
|
|
232
|
-
for i in range(len(to_fill)):
|
|
232
|
+
for i in range(len(to_fill)+1):
|
|
233
233
|
# beta to or ####################################################################################################
|
|
234
234
|
if "OR" in to_fill:
|
|
235
235
|
status, filled_count = fill_or(sumstats,log,verbose=verbose,filled_count=filled_count)
|
|
@@ -269,9 +269,47 @@ def fill_iteratively(sumstats,to_fill,log,only_sig,df,extreme,verbose,sig_level)
|
|
|
269
269
|
else:
|
|
270
270
|
status,filled_count = fill_mlog10p(sumstats,log,verbose=verbose)
|
|
271
271
|
if status == 1 : to_fill.remove("MLOG10P")
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
if previous_count == filled_count:
|
|
272
|
+
|
|
273
|
+
if filled_count == 0:
|
|
275
274
|
break
|
|
276
275
|
|
|
277
|
-
|
|
276
|
+
###Base functions########################################################################################
|
|
277
|
+
|
|
278
|
+
def _convert_betase_to_z(beta, se):
|
|
279
|
+
return beta/se
|
|
280
|
+
|
|
281
|
+
def _convert_betase_to_p(beta, se):
|
|
282
|
+
z = _convert_betase_to_z(beta, se)
|
|
283
|
+
p = _convert_z_to_p(z)
|
|
284
|
+
return p
|
|
285
|
+
|
|
286
|
+
def _convert_betase_to_mlog10p(beta, se):
|
|
287
|
+
z = _convert_betase_to_z(beta, se)
|
|
288
|
+
mlog10p = _convert_z_to_mlog10p(z)
|
|
289
|
+
return mlog10p
|
|
290
|
+
|
|
291
|
+
def _convert_p_to_chisq(p):
|
|
292
|
+
return ss.chi2.isf(p, 1)
|
|
293
|
+
|
|
294
|
+
def _convert_z_to_chisq(z):
|
|
295
|
+
return (z)**2
|
|
296
|
+
|
|
297
|
+
def _convert_z_to_p(z):
|
|
298
|
+
return ss.chi2.sf(z**2,1)
|
|
299
|
+
|
|
300
|
+
def _convert_z_to_mlog10p(z):
|
|
301
|
+
log_pvalue = np.log(2) + ss.norm.logsf(np.abs(z)) #two-sided
|
|
302
|
+
mlog10p = log_pvalue/np.log(10)
|
|
303
|
+
return -mlog10p
|
|
304
|
+
|
|
305
|
+
def _conver_chisq_to_p(chisq):
|
|
306
|
+
return ss.chi2.sf(chisq,1)
|
|
307
|
+
|
|
308
|
+
def _convert_mlog10p_to_p(mlog10p):
|
|
309
|
+
return np.power(10, -mlog10p)
|
|
310
|
+
|
|
311
|
+
def _convert_or_to_beta(OR):
|
|
312
|
+
return np.log(OR)
|
|
313
|
+
|
|
314
|
+
def _convert_beta_to_or(beta):
|
|
315
|
+
return np.exp(beta)
|
gwaslab/viz_aux_quickfix.py
CHANGED
|
@@ -159,63 +159,64 @@ def _quick_assign_i(sumstats, chrom="CHR",pos="POS"):
|
|
|
159
159
|
return sumstats, chrom_df
|
|
160
160
|
|
|
161
161
|
def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos="POS",drop_chr_start=False,_posdiccul=None):
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
162
|
+
# align all variants on a single axis (i)
|
|
163
|
+
sumstats = sumstats.sort_values([chrom,pos])
|
|
164
|
+
if use_rank is True:
|
|
165
|
+
sumstats["_POS_RANK"] = sumstats.groupby(chrom)[pos].rank("dense", ascending=True)
|
|
166
|
+
pos="_POS_RANK"
|
|
167
|
+
sumstats["_ID"]=range(len(sumstats))
|
|
168
|
+
sumstats=sumstats.set_index("_ID")
|
|
169
|
+
|
|
170
|
+
#create a df , groupby by chromosomes , and get the maximum position
|
|
171
|
+
if use_rank is True:
|
|
172
|
+
posdic = sumstats.groupby(chrom)["_POS_RANK"].max()
|
|
173
|
+
else:
|
|
174
|
+
posdic = sumstats.groupby(chrom)[pos].max()
|
|
175
|
+
|
|
176
|
+
if _posdiccul is None:
|
|
177
|
+
# convert to dictionary
|
|
178
|
+
posdiccul = dict(posdic)
|
|
174
179
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
posdiccul
|
|
178
|
-
|
|
179
|
-
|
|
180
|
+
# fill empty chr with 0
|
|
181
|
+
for i in range(0,sumstats[chrom].max()+1):
|
|
182
|
+
if i in posdiccul:
|
|
183
|
+
continue
|
|
184
|
+
else:
|
|
185
|
+
posdiccul[i]=0
|
|
186
|
+
|
|
187
|
+
# cumulative sum dictionary
|
|
188
|
+
for i in range(1,sumstats[chrom].max()+1):
|
|
189
|
+
posdiccul[i]= posdiccul[i-1] + posdiccul[i] + sumstats[pos].max()*chrpad
|
|
190
|
+
else:
|
|
191
|
+
posdiccul = _posdiccul
|
|
192
|
+
|
|
193
|
+
# convert base pair postion to x axis position using the cumulative sum dictionary
|
|
194
|
+
sumstats["_ADD"]=sumstats[chrom].apply(lambda x : posdiccul[int(x)-1])
|
|
195
|
+
|
|
196
|
+
if drop_chr_start==True:
|
|
197
|
+
posdic_min = sumstats.groupby(chrom)[pos].min()
|
|
198
|
+
posdiccul_min= dict(posdic_min)
|
|
180
199
|
for i in range(0,sumstats[chrom].max()+1):
|
|
181
|
-
if i in
|
|
200
|
+
if i in posdiccul_min:
|
|
182
201
|
continue
|
|
183
202
|
else:
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
# cumulative sum dictionary
|
|
203
|
+
posdiccul_min[i]=0
|
|
187
204
|
for i in range(1,sumstats[chrom].max()+1):
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
posdiccul = _posdiccul
|
|
191
|
-
|
|
192
|
-
# convert base pair postion to x axis position using the cumulative sum dictionary
|
|
193
|
-
sumstats["_ADD"]=sumstats[chrom].apply(lambda x : posdiccul[int(x)-1])
|
|
194
|
-
|
|
195
|
-
if drop_chr_start==True:
|
|
196
|
-
posdic_min = sumstats.groupby(chrom)[pos].min()
|
|
197
|
-
posdiccul_min= dict(posdic_min)
|
|
198
|
-
for i in range(0,sumstats[chrom].max()+1):
|
|
199
|
-
if i in posdiccul_min:
|
|
200
|
-
continue
|
|
201
|
-
else:
|
|
202
|
-
posdiccul_min[i]=0
|
|
203
|
-
for i in range(1,sumstats[chrom].max()+1):
|
|
204
|
-
posdiccul_min[i]= posdiccul_min[i-1] + posdiccul_min[i]
|
|
205
|
-
sumstats["_ADD"]=sumstats["_ADD"] - sumstats[chrom].apply(lambda x : posdiccul_min[int(x)])
|
|
206
|
-
|
|
207
|
-
if use_rank is True:
|
|
208
|
-
sumstats["i"]=sumstats["_POS_RANK"]+sumstats["_ADD"]
|
|
209
|
-
else:
|
|
210
|
-
sumstats["i"]=sumstats[pos]+sumstats["_ADD"]
|
|
205
|
+
posdiccul_min[i]= posdiccul_min[i-1] + posdiccul_min[i]
|
|
206
|
+
sumstats["_ADD"]=sumstats["_ADD"] - sumstats[chrom].apply(lambda x : posdiccul_min[int(x)])
|
|
211
207
|
|
|
208
|
+
if use_rank is True:
|
|
209
|
+
sumstats["i"]=sumstats["_POS_RANK"]+sumstats["_ADD"]
|
|
210
|
+
else:
|
|
211
|
+
sumstats["i"]=sumstats[pos]+sumstats["_ADD"]
|
|
212
|
+
|
|
212
213
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
214
|
+
#for plot, get the chr text tick position
|
|
215
|
+
chrom_df=sumstats.groupby(chrom)['i'].agg(lambda x: (x.min()+x.max())/2)
|
|
216
|
+
#sumstats["i"] = sumstats["i"]+((sumstats[chrom].map(dict(chrom_df)).astype("int")))*0.02
|
|
217
|
+
#sumstats["i"] = sumstats["i"].astype("Int64")
|
|
218
|
+
sumstats["i"] = np.floor(pd.to_numeric(sumstats["i"], errors='coerce')).astype('Int64')
|
|
219
|
+
return sumstats, chrom_df
|
|
219
220
|
|
|
220
221
|
def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_level=5e-6, lower_level=5e-4):
|
|
221
222
|
size_series = series.copy()
|
|
@@ -286,9 +287,9 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose,lines_to_plo
|
|
|
286
287
|
maxy = series.max()
|
|
287
288
|
series = series.copy()
|
|
288
289
|
if "b" not in mode:
|
|
289
|
-
if verbose: log.write(" -Maximum -log10(P)
|
|
290
|
+
if verbose: log.write(" -Maximum -log10(P) value is "+str(maxy) +" .")
|
|
290
291
|
elif "b" in mode:
|
|
291
|
-
if verbose: log.write(" -Maximum DENSITY
|
|
292
|
+
if verbose: log.write(" -Maximum DENSITY value is "+str(maxy) +" .")
|
|
292
293
|
|
|
293
294
|
maxticker=int(np.round(series.max(skipna=True)))
|
|
294
295
|
|
|
@@ -4,7 +4,7 @@ import matplotlib.pyplot as plt
|
|
|
4
4
|
import scipy.stats as ss
|
|
5
5
|
import seaborn as sns
|
|
6
6
|
import gc
|
|
7
|
-
|
|
7
|
+
import scipy.stats as ss
|
|
8
8
|
from matplotlib.patches import Rectangle
|
|
9
9
|
from adjustText import adjust_text
|
|
10
10
|
from gwaslab.viz_aux_save_figure import save_figure
|
|
@@ -36,6 +36,7 @@ def compare_effect(path1,
|
|
|
36
36
|
wc_correction=False,
|
|
37
37
|
null_beta=0,
|
|
38
38
|
is_q=False,
|
|
39
|
+
is_q_mc = False,
|
|
39
40
|
include_all=True,
|
|
40
41
|
q_level=0.05,
|
|
41
42
|
sig_level=5e-8,
|
|
@@ -485,8 +486,10 @@ def compare_effect(path1,
|
|
|
485
486
|
if verbose: log.write(" -No variants with EA not matching...")
|
|
486
487
|
if fdr==True:
|
|
487
488
|
if verbose: log.write(" -Using FDR...")
|
|
488
|
-
sig_list_merged["P_1"] = fdrcorrection(sig_list_merged["P_1"])[1]
|
|
489
|
-
sig_list_merged["P_2"] = fdrcorrection(sig_list_merged["P_2"])[1]
|
|
489
|
+
#sig_list_merged["P_1"] = fdrcorrection(sig_list_merged["P_1"])[1]
|
|
490
|
+
#sig_list_merged["P_2"] = fdrcorrection(sig_list_merged["P_2"])[1]
|
|
491
|
+
sig_list_merged["P_1"] =ss.false_discovery_control(sig_list_merged["P_1"])
|
|
492
|
+
sig_list_merged["P_2"] =ss.false_discovery_control(sig_list_merged["P_2"])
|
|
490
493
|
|
|
491
494
|
####################################################################################################################################
|
|
492
495
|
## winner's curse correction using aligned beta
|
|
@@ -528,9 +531,10 @@ def compare_effect(path1,
|
|
|
528
531
|
if (is_q is True):
|
|
529
532
|
if verbose: log.write(" -Calculating Cochran's Q statistics and peform chisq test...")
|
|
530
533
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
531
|
-
sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level)
|
|
534
|
+
sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
|
|
532
535
|
else:
|
|
533
|
-
sig_list_merged = test_q(sig_list_merged,"BETA_1","SE_1","BETA_2_aligned","SE_2",q_level=q_level)
|
|
536
|
+
sig_list_merged = test_q(sig_list_merged,"BETA_1","SE_1","BETA_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
|
|
537
|
+
|
|
534
538
|
######################### save ###############################################################
|
|
535
539
|
## save the merged data
|
|
536
540
|
save_path = label[0]+"_"+label[1]+"_beta_sig_list_merged.tsv"
|
|
@@ -804,8 +808,15 @@ def compare_effect(path1,
|
|
|
804
808
|
if legend_mode == "full" and is_q==True :
|
|
805
809
|
title_proxy = Rectangle((0,0), 0, 0, color='w',label=legend_title)
|
|
806
810
|
title_proxy2 = Rectangle((0,0), 0, 0, color='w',label=legend_title2)
|
|
807
|
-
|
|
808
|
-
|
|
811
|
+
if is_q_mc=="fdr":
|
|
812
|
+
het_label_sig = r"$FDR_{het} < $" + "${}$".format(q_level)
|
|
813
|
+
het_label_sig2 = r"$FDR_{het} > $" + "${}$".format(q_level)
|
|
814
|
+
elif is_q_mc=="bon":
|
|
815
|
+
het_label_sig = r"$P_{het,bon} < $" + "${}$".format(q_level)
|
|
816
|
+
het_label_sig2 = r"$P_{het,bon} > $" + "${}$".format(q_level)
|
|
817
|
+
else:
|
|
818
|
+
het_label_sig = r"$P_{het} < $" + "${}$".format(q_level)
|
|
819
|
+
het_label_sig2 = r"$P_{het} > $" + "${}$".format(q_level)
|
|
809
820
|
het_sig = Rectangle((0,0), 0, 0, facecolor='#cccccc',edgecolor="black", linewidth=1, label=het_label_sig)
|
|
810
821
|
het_nonsig = Rectangle((0,0), 0, 0, facecolor='#cccccc',edgecolor="white",linewidth=1, label=het_label_sig2)
|
|
811
822
|
|
|
@@ -874,7 +885,7 @@ def reorderLegend(ax=None, order=None, add=None):
|
|
|
874
885
|
new_handles = [info[l] for l in order]
|
|
875
886
|
return new_handles, order
|
|
876
887
|
|
|
877
|
-
def test_q(df,beta1,se1,beta2,se2,q_level=0.05):
|
|
888
|
+
def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose=False):
|
|
878
889
|
w1="Weight_1"
|
|
879
890
|
w2="Weight_2"
|
|
880
891
|
beta="BETA_FE"
|
|
@@ -889,6 +900,14 @@ def test_q(df,beta1,se1,beta2,se2,q_level=0.05):
|
|
|
889
900
|
df[q] = df[w1]*(df[beta1]-df[beta])**2 + df[w2]*(df[beta2]-df[beta])**2
|
|
890
901
|
df[pq] = ss.chi2.sf(df[q], 1)
|
|
891
902
|
df["Edge_color"]="white"
|
|
903
|
+
|
|
904
|
+
if is_q_mc=="fdr":
|
|
905
|
+
if verbose: log.write(" -FDR correction applied...")
|
|
906
|
+
df[pq] = ss.false_discovery_control(df[pq])
|
|
907
|
+
elif is_q_mc=="bon":
|
|
908
|
+
if verbose: log.write(" -Bonferroni correction applied...")
|
|
909
|
+
df[pq] = df[pq] * len(df[pq])
|
|
910
|
+
|
|
892
911
|
df.loc[df[pq]<q_level,"Edge_color"]="black"
|
|
893
912
|
df.drop(columns=["Weight_1","Weight_2","BETA_FE"],inplace=True)
|
|
894
913
|
# Huedo-Medina, T. B., Sánchez-Meca, J., Marín-Martínez, F., & Botella, J. (2006). Assessing heterogeneity in meta-analysis: Q statistic or I² index?. Psychological methods, 11(2), 193.
|
gwaslab/viz_plot_forestplot.py
CHANGED
|
@@ -2,7 +2,7 @@ import pandas as pd
|
|
|
2
2
|
import numpy as np
|
|
3
3
|
import matplotlib.pyplot as plt
|
|
4
4
|
from scipy import stats, optimize
|
|
5
|
-
from statsmodels.stats.meta_analysis import combine_effects
|
|
5
|
+
#from statsmodels.stats.meta_analysis import combine_effects
|
|
6
6
|
from matplotlib.patches import Polygon
|
|
7
7
|
from matplotlib.collections import PatchCollection
|
|
8
8
|
# plot_forest
|