gwaslab 3.4.37__py3-none-any.whl → 3.4.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/bd_common_data.py +6 -3
- gwaslab/bd_download.py +9 -9
- gwaslab/bd_get_hapmap3.py +43 -9
- gwaslab/data/formatbook.json +722 -721
- gwaslab/g_Log.py +22 -5
- gwaslab/g_Sumstats.py +110 -163
- gwaslab/g_SumstatsPair.py +76 -25
- gwaslab/g_SumstatsT.py +2 -2
- gwaslab/g_Sumstats_summary.py +3 -3
- gwaslab/g_version.py +10 -10
- gwaslab/hm_casting.py +36 -17
- gwaslab/hm_harmonize_sumstats.py +354 -221
- gwaslab/hm_rsid_to_chrpos.py +1 -1
- gwaslab/io_preformat_input.py +49 -43
- gwaslab/io_read_ldsc.py +49 -1
- gwaslab/io_to_formats.py +428 -295
- gwaslab/ldsc_irwls.py +198 -0
- gwaslab/ldsc_jackknife.py +514 -0
- gwaslab/ldsc_ldscore.py +417 -0
- gwaslab/ldsc_parse.py +294 -0
- gwaslab/ldsc_regressions.py +747 -0
- gwaslab/ldsc_sumstats.py +629 -0
- gwaslab/qc_check_datatype.py +3 -3
- gwaslab/qc_fix_sumstats.py +891 -778
- gwaslab/util_ex_calculate_ldmatrix.py +31 -13
- gwaslab/util_ex_gwascatalog.py +25 -25
- gwaslab/util_ex_ldproxyfinder.py +10 -10
- gwaslab/util_ex_ldsc.py +189 -0
- gwaslab/util_ex_process_ref.py +3 -3
- gwaslab/util_ex_run_coloc.py +26 -4
- gwaslab/util_in_calculate_gc.py +6 -6
- gwaslab/util_in_calculate_power.py +42 -43
- gwaslab/util_in_convert_h2.py +8 -8
- gwaslab/util_in_fill_data.py +30 -30
- gwaslab/util_in_filter_value.py +201 -74
- gwaslab/util_in_get_density.py +10 -10
- gwaslab/util_in_get_sig.py +445 -71
- gwaslab/viz_aux_annotate_plot.py +12 -12
- gwaslab/viz_aux_quickfix.py +42 -37
- gwaslab/viz_aux_reposition_text.py +10 -7
- gwaslab/viz_aux_save_figure.py +18 -8
- gwaslab/viz_plot_compare_af.py +32 -33
- gwaslab/viz_plot_compare_effect.py +63 -71
- gwaslab/viz_plot_miamiplot2.py +34 -26
- gwaslab/viz_plot_mqqplot.py +126 -75
- gwaslab/viz_plot_qqplot.py +11 -8
- gwaslab/viz_plot_regionalplot.py +36 -33
- gwaslab/viz_plot_rg_heatmap.py +28 -26
- gwaslab/viz_plot_stackedregional.py +40 -21
- gwaslab/viz_plot_trumpetplot.py +65 -61
- gwaslab-3.4.39.dist-info/LICENSE +674 -0
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/METADATA +5 -4
- gwaslab-3.4.39.dist-info/RECORD +80 -0
- gwaslab-3.4.37.dist-info/RECORD +0 -72
- /gwaslab-3.4.37.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
- {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0
|
@@ -99,14 +99,14 @@ def compare_effect(path1,
|
|
|
99
99
|
if anno_het ==True:
|
|
100
100
|
is_q=True
|
|
101
101
|
|
|
102
|
-
|
|
102
|
+
log.write("Start to process the raw sumstats for plotting...")
|
|
103
103
|
|
|
104
104
|
######### 1 check the value used to plot
|
|
105
105
|
if mode not in ["Beta","beta","BETA","OR","or"]:
|
|
106
106
|
raise ValueError("Please input Beta or OR")
|
|
107
107
|
|
|
108
108
|
if type(path1) is Sumstats:
|
|
109
|
-
|
|
109
|
+
log.write("Path1 is gwaslab Sumstats object...")
|
|
110
110
|
if cols_name_list_1 is None:
|
|
111
111
|
cols_name_list_1 = ["SNPID","P","EA","NEA","CHR","POS"]
|
|
112
112
|
if effect_cols_list_1 is None:
|
|
@@ -115,10 +115,10 @@ def compare_effect(path1,
|
|
|
115
115
|
else:
|
|
116
116
|
effect_cols_list_1 = ["OR","OR_95L","OR_95U"]
|
|
117
117
|
elif type(path1) is pd.DataFrame:
|
|
118
|
-
|
|
118
|
+
log.write("Path1 is pandas DataFrame object...")
|
|
119
119
|
|
|
120
120
|
if type(path2) is Sumstats:
|
|
121
|
-
|
|
121
|
+
log.write("Path2 is gwaslab Sumstats object...")
|
|
122
122
|
if cols_name_list_2 is None:
|
|
123
123
|
cols_name_list_2 = ["SNPID","P","EA","NEA","CHR","POS"]
|
|
124
124
|
if effect_cols_list_2 is None:
|
|
@@ -127,10 +127,10 @@ def compare_effect(path1,
|
|
|
127
127
|
else:
|
|
128
128
|
effect_cols_list_2 = ["OR","OR_95L","OR_95U"]
|
|
129
129
|
elif type(path2) is pd.DataFrame:
|
|
130
|
-
|
|
130
|
+
log.write("Path2 is pandas DataFrame object...")
|
|
131
131
|
|
|
132
132
|
######### 2 extract snplist2
|
|
133
|
-
|
|
133
|
+
log.write(" -Loading "+label[1]+" SNP list in memory...")
|
|
134
134
|
|
|
135
135
|
if type(path2) is Sumstats:
|
|
136
136
|
sumstats = path2.data[[cols_name_list_2[0]]].copy()
|
|
@@ -148,7 +148,7 @@ def compare_effect(path1,
|
|
|
148
148
|
cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1],cols_name_list_1[4],cols_name_list_1[5]]
|
|
149
149
|
|
|
150
150
|
######### 4 load sumstats1
|
|
151
|
-
|
|
151
|
+
log.write(" -Loading sumstats for "+label[0]+":",",".join(cols_to_extract))
|
|
152
152
|
|
|
153
153
|
if type(path1) is Sumstats:
|
|
154
154
|
sumstats = path1.data[cols_to_extract].copy()
|
|
@@ -163,7 +163,7 @@ def compare_effect(path1,
|
|
|
163
163
|
sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
|
|
164
164
|
######### 5 extract the common set
|
|
165
165
|
common_snp_set = common_snp_set.intersection(sumstats[cols_name_list_1[0]].values)
|
|
166
|
-
|
|
166
|
+
log.write(" -Counting variants available for both datasets:",len(common_snp_set)," variants...")
|
|
167
167
|
|
|
168
168
|
######### 6 rename the sumstats
|
|
169
169
|
rename_dict = { cols_name_list_1[0]:"SNPID",
|
|
@@ -179,16 +179,16 @@ def compare_effect(path1,
|
|
|
179
179
|
######### 7 exctract only available variants from sumstats1
|
|
180
180
|
sumstats = sumstats.loc[sumstats["SNPID"].isin(common_snp_set),:]
|
|
181
181
|
|
|
182
|
-
|
|
182
|
+
log.write(" -Using only variants available for both datasets...")
|
|
183
183
|
######### 8 extact SNPs for comparison
|
|
184
184
|
|
|
185
185
|
if snplist is not None:
|
|
186
186
|
######### 8.1 if a snplist is provided, use the snp list
|
|
187
|
-
|
|
187
|
+
log.write(" -Extract variants in the given list from "+label[0]+"...")
|
|
188
188
|
sig_list_1 = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
|
|
189
189
|
else:
|
|
190
190
|
######### 8,2 otherwise use the sutomatically detected lead SNPs
|
|
191
|
-
|
|
191
|
+
log.write(" -Extract lead variants from "+label[0]+"...")
|
|
192
192
|
sig_list_1 = getsig(sumstats,"SNPID","CHR","POS","P", verbose=verbose,sig_level=sig_level,**get_lead_args)
|
|
193
193
|
|
|
194
194
|
if drop==True:
|
|
@@ -200,7 +200,7 @@ def compare_effect(path1,
|
|
|
200
200
|
else:
|
|
201
201
|
cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1],cols_name_list_2[4],cols_name_list_2[5]]
|
|
202
202
|
|
|
203
|
-
|
|
203
|
+
log.write(" -Loading sumstats for "+label[1]+":",",".join(cols_to_extract))
|
|
204
204
|
|
|
205
205
|
if type(path2) is Sumstats:
|
|
206
206
|
sumstats = path2.data[cols_to_extract].copy()
|
|
@@ -228,10 +228,10 @@ def compare_effect(path1,
|
|
|
228
228
|
######## 12 extact SNPs for comparison
|
|
229
229
|
if snplist is not None:
|
|
230
230
|
######### 12.1 if a snplist is provided, use the snp list
|
|
231
|
-
|
|
231
|
+
log.write(" -Extract snps in the given list from "+label[1]+"...")
|
|
232
232
|
sig_list_2 = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
|
|
233
233
|
else:
|
|
234
|
-
|
|
234
|
+
log.write(" -Extract lead snps from "+label[1]+"...")
|
|
235
235
|
######### 12.2 otherwise use the sutomatically detected lead SNPs
|
|
236
236
|
sig_list_2 = getsig(sumstats,"SNPID","CHR","POS","P",
|
|
237
237
|
verbose=verbose,sig_level=sig_level,**get_lead_args)
|
|
@@ -240,7 +240,7 @@ def compare_effect(path1,
|
|
|
240
240
|
|
|
241
241
|
######### 13 Merge two list using SNPID
|
|
242
242
|
##############################################################################
|
|
243
|
-
|
|
243
|
+
log.write("Merging snps from "+label[0]+" and "+label[1]+"...")
|
|
244
244
|
|
|
245
245
|
sig_list_merged = pd.merge(sig_list_1,sig_list_2,left_on="SNPID",right_on="SNPID",how="outer",suffixes=('_1', '_2'))
|
|
246
246
|
# SNPID P_1 P_2
|
|
@@ -260,7 +260,7 @@ def compare_effect(path1,
|
|
|
260
260
|
cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1], cols_name_list_1[2],cols_name_list_1[3], effect_cols_list_1[0], effect_cols_list_1[1], effect_cols_list_1[2]]
|
|
261
261
|
|
|
262
262
|
if len(eaf)>0: cols_to_extract.append(eaf[0])
|
|
263
|
-
|
|
263
|
+
log.write(" -Extract statistics of selected variants from "+label[0]+" : ",",".join(cols_to_extract) )
|
|
264
264
|
|
|
265
265
|
if type(path1) is Sumstats:
|
|
266
266
|
sumstats = path1.data[cols_to_extract].copy()
|
|
@@ -300,7 +300,7 @@ def compare_effect(path1,
|
|
|
300
300
|
sumstats = drop_duplicate_and_na(sumstats, sort_by="P_1", log=log , verbose=verbose)
|
|
301
301
|
sumstats.drop("P_1",axis=1,inplace=True)
|
|
302
302
|
|
|
303
|
-
|
|
303
|
+
log.write(" -Merging "+label[0]+" effect information...", verbose=verbose)
|
|
304
304
|
|
|
305
305
|
sig_list_merged = pd.merge(sig_list_merged,sumstats,
|
|
306
306
|
left_on="SNPID",right_on="SNPID",
|
|
@@ -316,7 +316,7 @@ def compare_effect(path1,
|
|
|
316
316
|
## check if eaf column is provided.
|
|
317
317
|
if len(eaf)>0: cols_to_extract.append(eaf[1])
|
|
318
318
|
|
|
319
|
-
|
|
319
|
+
log.write(" -Extract statistics of selected variants from "+label[1]+" : ",",".join(cols_to_extract), verbose=verbose )
|
|
320
320
|
if type(path2) is Sumstats:
|
|
321
321
|
sumstats = path2.data[cols_to_extract].copy()
|
|
322
322
|
elif type(path2) is pd.DataFrame:
|
|
@@ -353,7 +353,7 @@ def compare_effect(path1,
|
|
|
353
353
|
sumstats = drop_duplicate_and_na(sumstats, sort_by="P_2", log=log, verbose=verbose)
|
|
354
354
|
sumstats.drop("P_2",axis=1,inplace=True)
|
|
355
355
|
|
|
356
|
-
|
|
356
|
+
log.write(" -Merging "+label[1]+" effect information...", verbose=verbose)
|
|
357
357
|
sig_list_merged = pd.merge(sig_list_merged,sumstats,
|
|
358
358
|
left_on="SNPID",right_on="SNPID",
|
|
359
359
|
how="left")
|
|
@@ -361,7 +361,7 @@ def compare_effect(path1,
|
|
|
361
361
|
sig_list_merged.set_index("SNPID",inplace=True)
|
|
362
362
|
|
|
363
363
|
################ 16 update sumstats1
|
|
364
|
-
|
|
364
|
+
log.write(" -Updating missing information for "+label[0]+" ...", verbose=verbose)
|
|
365
365
|
if type(path1) is Sumstats:
|
|
366
366
|
sumstats = path1.data[[cols_name_list_1[0],cols_name_list_1[1]]].copy()
|
|
367
367
|
elif type(path1) is pd.DataFrame:
|
|
@@ -383,7 +383,7 @@ def compare_effect(path1,
|
|
|
383
383
|
sig_list_merged.update(sumstats)
|
|
384
384
|
|
|
385
385
|
################# 17 update sumstats2
|
|
386
|
-
|
|
386
|
+
log.write(" -Updating missing information for "+label[1]+" ...", verbose=verbose)
|
|
387
387
|
if type(path2) is Sumstats:
|
|
388
388
|
sumstats = path2.data[[cols_name_list_2[0],cols_name_list_2[1]]].copy()
|
|
389
389
|
elif type(path2) is pd.DataFrame:
|
|
@@ -406,15 +406,15 @@ def compare_effect(path1,
|
|
|
406
406
|
sig_list_merged.update(sumstats)
|
|
407
407
|
|
|
408
408
|
if scaled1 ==True :
|
|
409
|
-
|
|
409
|
+
log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
|
|
410
410
|
sig_list_merged["P_1"] = np.power(10,-sig_list_merged["P_1"])
|
|
411
411
|
if scaled2 ==True :
|
|
412
|
-
|
|
412
|
+
log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
|
|
413
413
|
sig_list_merged["P_2"] = np.power(10,-sig_list_merged["P_2"])
|
|
414
414
|
####
|
|
415
415
|
#################################################################################
|
|
416
416
|
############## 18 init indicator
|
|
417
|
-
|
|
417
|
+
log.write(" -Assigning indicator ...", verbose=verbose)
|
|
418
418
|
# 0-> 0
|
|
419
419
|
# 1 -> sig in sumstats1
|
|
420
420
|
# 2 -> sig in sumsatts2
|
|
@@ -428,7 +428,7 @@ def compare_effect(path1,
|
|
|
428
428
|
sig_list_merged["POS"]=np.max(sig_list_merged[["POS_1","POS_2"]], axis=1).astype(int)
|
|
429
429
|
sig_list_merged.drop(labels=['CHR_1', 'CHR_2','POS_1', 'POS_2'], axis=1,inplace=True)
|
|
430
430
|
|
|
431
|
-
|
|
431
|
+
log.write(" -Aligning "+label[1]+" EA with "+label[0]+" EA ...", verbose=verbose)
|
|
432
432
|
############### 19 align allele effect with sumstats 1
|
|
433
433
|
sig_list_merged["EA_1"]=sig_list_merged["EA_1"].astype("string")
|
|
434
434
|
sig_list_merged["EA_2"]=sig_list_merged["EA_2"].astype("string")
|
|
@@ -476,16 +476,16 @@ def compare_effect(path1,
|
|
|
476
476
|
|
|
477
477
|
# checking effect allele matching
|
|
478
478
|
nonmatch = np.nansum(sig_list_merged["EA_1"] != sig_list_merged["EA_2_aligned"])
|
|
479
|
-
|
|
479
|
+
log.write(" -Aligned all EAs in {} with EAs in {} ...".format(label[1],label[0]), verbose=verbose)
|
|
480
480
|
if nonmatch>0:
|
|
481
|
-
|
|
481
|
+
log.warning("Alleles for {} variants do not match...".format(nonmatch))
|
|
482
482
|
if allele_match==True:
|
|
483
483
|
if nonmatch>0:
|
|
484
484
|
sig_list_merged = sig_list_merged.loc[sig_list_merged["EA_1"] == sig_list_merged["EA_2_aligned"]]
|
|
485
485
|
else:
|
|
486
|
-
|
|
486
|
+
log.write(" -No variants with EA not matching...", verbose=verbose)
|
|
487
487
|
if fdr==True:
|
|
488
|
-
|
|
488
|
+
log.write(" -Using FDR...", verbose=verbose)
|
|
489
489
|
#sig_list_merged["P_1"] = fdrcorrection(sig_list_merged["P_1"])[1]
|
|
490
490
|
#sig_list_merged["P_2"] = fdrcorrection(sig_list_merged["P_2"])[1]
|
|
491
491
|
sig_list_merged["P_1"] =ss.false_discovery_control(sig_list_merged["P_1"])
|
|
@@ -495,41 +495,41 @@ def compare_effect(path1,
|
|
|
495
495
|
## winner's curse correction using aligned beta
|
|
496
496
|
if mode=="beta":
|
|
497
497
|
if wc_correction == "all":
|
|
498
|
-
|
|
498
|
+
log.write(" -Correcting BETA for winner's curse with threshold at {} for all variants...".format(sig_level), verbose=verbose)
|
|
499
499
|
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
500
500
|
sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
|
|
501
501
|
|
|
502
|
-
|
|
502
|
+
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(~sig_list_merged["EFFECT_1"].isna())), verbose=verbose)
|
|
503
503
|
sig_list_merged["EFFECT_1"] = sig_list_merged[["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
|
|
504
504
|
|
|
505
|
-
|
|
505
|
+
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(~sig_list_merged["EFFECT_2_aligned"].isna())), verbose=verbose)
|
|
506
506
|
sig_list_merged["EFFECT_2_aligned"] = sig_list_merged[["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
|
|
507
507
|
|
|
508
508
|
elif wc_correction == "sig" :
|
|
509
|
-
|
|
509
|
+
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants...".format(sig_level), verbose=verbose)
|
|
510
510
|
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
511
511
|
sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
|
|
512
|
-
|
|
512
|
+
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(sig_list_merged["P_1"]<sig_level)), verbose=verbose)
|
|
513
513
|
sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, "EFFECT_1"] = sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
514
|
-
|
|
514
|
+
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(sig_list_merged["P_2"]<sig_level)), verbose=verbose)
|
|
515
515
|
sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, "EFFECT_2_aligned"] = sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
516
516
|
|
|
517
517
|
elif wc_correction == "sumstats1" :
|
|
518
|
-
|
|
518
|
+
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats1...".format(sig_level), verbose=verbose)
|
|
519
519
|
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
520
|
-
|
|
520
|
+
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(sig_list_merged["P_1"]<sig_level)), verbose=verbose)
|
|
521
521
|
sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, "EFFECT_1"] = sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
522
522
|
|
|
523
523
|
elif wc_correction == "sumstats2" :
|
|
524
|
-
|
|
524
|
+
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats2...".format(sig_level), verbose=verbose)
|
|
525
525
|
sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
|
|
526
|
-
|
|
526
|
+
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(sig_list_merged["P_2"]<sig_level)), verbose=verbose)
|
|
527
527
|
sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, "EFFECT_2_aligned"] = sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
528
528
|
|
|
529
529
|
########################## Het test############################################################
|
|
530
530
|
## heterogeneity test
|
|
531
531
|
if (is_q is True):
|
|
532
|
-
|
|
532
|
+
log.write(" -Calculating Cochran's Q statistics and peform chisq test...", verbose=verbose)
|
|
533
533
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
534
534
|
sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
|
|
535
535
|
else:
|
|
@@ -538,19 +538,19 @@ def compare_effect(path1,
|
|
|
538
538
|
######################### save ###############################################################
|
|
539
539
|
## save the merged data
|
|
540
540
|
save_path = label[0]+"_"+label[1]+"_beta_sig_list_merged.tsv"
|
|
541
|
-
|
|
541
|
+
log.write(" -Saving the merged data to:",save_path, verbose=verbose)
|
|
542
542
|
sig_list_merged.to_csv(save_path,"\t")
|
|
543
543
|
|
|
544
544
|
########################## maf_threshold#############################################################
|
|
545
545
|
if (len(eaf)>0) and (maf_level is not None):
|
|
546
546
|
both_eaf_clear = (sig_list_merged["EAF_1"]>maf_level)&(sig_list_merged["EAF_1"]<1-maf_level)&(sig_list_merged["EAF_2"]>maf_level)&(sig_list_merged["EAF_2"]<1-maf_level)
|
|
547
|
-
|
|
547
|
+
log.write(" -Exclude "+str(len(sig_list_merged) -sum(both_eaf_clear))+ " variants with maf <",maf_level, verbose=verbose)
|
|
548
548
|
sig_list_merged = sig_list_merged.loc[both_eaf_clear,:]
|
|
549
549
|
# heterogeneity summary
|
|
550
550
|
if (is_q is True):
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
551
|
+
log.write(" -Significant het:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:]), verbose=verbose)
|
|
552
|
+
log.write(" -All sig:" ,len(sig_list_merged), verbose=verbose)
|
|
553
|
+
log.write(" -Het rate:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:])/len(sig_list_merged), verbose=verbose)
|
|
554
554
|
|
|
555
555
|
# extract group
|
|
556
556
|
if include_all==True:
|
|
@@ -568,13 +568,13 @@ def compare_effect(path1,
|
|
|
568
568
|
sum2only["Edge_color"]="none"
|
|
569
569
|
both["Edge_color"]="none"
|
|
570
570
|
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
571
|
+
log.write(" -Identified "+str(len(sum0)) + " variants which are not significant in " + label[3]+".", verbose=verbose)
|
|
572
|
+
log.write(" -Identified "+str(len(sum1only)) + " variants which are only significant in " + label[0]+".", verbose=verbose)
|
|
573
|
+
log.write(" -Identified "+str(len(sum2only)) + " variants which are only significant in " + label[1]+".", verbose=verbose)
|
|
574
|
+
log.write(" -Identified "+str(len(both)) + " variants which are significant in " + label[2] + ".", verbose=verbose)
|
|
575
575
|
|
|
576
576
|
##plot########################################################################################
|
|
577
|
-
|
|
577
|
+
log.write("Creating the scatter plot for effect sizes comparison...", verbose=verbose)
|
|
578
578
|
#plt.style.use("ggplot")
|
|
579
579
|
sns.set_style("ticks")
|
|
580
580
|
fig,ax = plt.subplots(**plt_args)
|
|
@@ -721,7 +721,7 @@ def compare_effect(path1,
|
|
|
721
721
|
|
|
722
722
|
# estimate se for r
|
|
723
723
|
if r_se==True:
|
|
724
|
-
|
|
724
|
+
log.write(" -Estimating SE for rsq using Jackknife method.", verbose=verbose)
|
|
725
725
|
r_se_jackknife = jackknife_r(sig_list_merged)
|
|
726
726
|
r_se_jackknife_string = " ({:.2f})".format(r_se_jackknife)
|
|
727
727
|
else:
|
|
@@ -731,19 +731,19 @@ def compare_effect(path1,
|
|
|
731
731
|
r_se_jackknife_string= ""
|
|
732
732
|
|
|
733
733
|
#### calculate p values based on selected value , default = 0
|
|
734
|
-
|
|
734
|
+
log.write(" -Calculating p values based on given null slope :",null_beta, verbose=verbose)
|
|
735
735
|
t_score = (reg[0]-null_beta) / reg[4]
|
|
736
736
|
degree = len(sig_list_merged.dropna())-2
|
|
737
737
|
p = reg[3]
|
|
738
738
|
#ss.t.sf(abs(t_score), df=degree)*2
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
#
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
739
|
+
log.write(" -Beta = ", reg[0], verbose=verbose)
|
|
740
|
+
log.write(" -Beta_se = ", reg[4], verbose=verbose)
|
|
741
|
+
#log.write(" -H0 beta = ", null_beta, ", recalculated p = ", "{:.2e}".format(p), verbose=verbose)
|
|
742
|
+
log.write(" -H0 beta = 0",", default p = ", "{:.2e}".format(reg[3]), verbose=verbose)
|
|
743
|
+
log.write(" -Peason correlation coefficient = ", "{:.2f}".format(reg[2]), verbose=verbose)
|
|
744
|
+
log.write(" -r2 = ", "{:.2f}".format(reg[2]**2), verbose=verbose)
|
|
745
745
|
if r_se==True:
|
|
746
|
-
|
|
746
|
+
log.write(" -R se (jackknife) = {:.2e}".format(r_se_jackknife), verbose=verbose)
|
|
747
747
|
|
|
748
748
|
if reg[0] > 0:
|
|
749
749
|
#if regression coeeficient >0 : auxiliary line slope = 1
|
|
@@ -866,15 +866,7 @@ def compare_effect(path1,
|
|
|
866
866
|
gc.collect()
|
|
867
867
|
|
|
868
868
|
save_figure(fig, save, keyword="esc",save_args=save_args, log=log, verbose=verbose)
|
|
869
|
-
|
|
870
|
-
#if save:
|
|
871
|
-
# if verbose: log.write("Saving plot:")
|
|
872
|
-
# if save==True:
|
|
873
|
-
# fig.savefig("./{}_{}_effect_comparison_plot.png".format(label[0],label[1]),bbox_inches="tight",**save_args)
|
|
874
|
-
# log.write(" -Saved to "+ "./{}_{}_effect_comparison_plot.png".format(label[0],label[1]) + " successfully!" )
|
|
875
|
-
# else:
|
|
876
|
-
# fig.savefig(save,bbox_inches="tight",**save_args)
|
|
877
|
-
# log.write(" -Saved to "+ save + " successfully!" )
|
|
869
|
+
|
|
878
870
|
|
|
879
871
|
return [sig_list_merged, fig,log]
|
|
880
872
|
|
|
@@ -902,10 +894,10 @@ def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose
|
|
|
902
894
|
df["Edge_color"]="white"
|
|
903
895
|
|
|
904
896
|
if is_q_mc=="fdr":
|
|
905
|
-
|
|
897
|
+
log.write(" -FDR correction applied...", verbose=verbose)
|
|
906
898
|
df[pq] = ss.false_discovery_control(df[pq])
|
|
907
899
|
elif is_q_mc=="bon":
|
|
908
|
-
|
|
900
|
+
log.write(" -Bonferroni correction applied...", verbose=verbose)
|
|
909
901
|
df[pq] = df[pq] * len(df[pq])
|
|
910
902
|
|
|
911
903
|
df.loc[df[pq]<q_level,"Edge_color"]="black"
|
|
@@ -958,5 +950,5 @@ def drop_duplicate_and_na(df,snpid="SNPID",sort_by=False,log=Log(),verbose=True)
|
|
|
958
950
|
df.drop_duplicates(subset=[snpid], keep='first', inplace=True)
|
|
959
951
|
length_after= len(df)
|
|
960
952
|
if length_before != length_after:
|
|
961
|
-
|
|
953
|
+
log.write(" -Dropped {} duplicates or NAs...".format(length_before - length_after), verbose=verbose)
|
|
962
954
|
return df
|
gwaslab/viz_plot_miamiplot2.py
CHANGED
|
@@ -38,6 +38,7 @@ from gwaslab.g_Sumstats import Sumstats
|
|
|
38
38
|
from gwaslab.viz_aux_save_figure import save_figure
|
|
39
39
|
from gwaslab.viz_plot_mqqplot import mqqplot
|
|
40
40
|
from gwaslab.g_version import _get_version
|
|
41
|
+
|
|
41
42
|
def plot_miami2(
|
|
42
43
|
path1=None,
|
|
43
44
|
path2=None,
|
|
@@ -86,7 +87,7 @@ def plot_miami2(
|
|
|
86
87
|
log=Log(),
|
|
87
88
|
**mqq_args
|
|
88
89
|
):
|
|
89
|
-
|
|
90
|
+
log.write("Start to create miami plot {}:".format(_get_version()), verbose=verbose)
|
|
90
91
|
## figuring arguments ###########################################################################################################
|
|
91
92
|
# figure columns to use
|
|
92
93
|
if scaled == True:
|
|
@@ -98,6 +99,7 @@ def plot_miami2(
|
|
|
98
99
|
cols = ["CHR","POS","MLOG10P"]
|
|
99
100
|
else:
|
|
100
101
|
cols = ["CHR","POS","P"]
|
|
102
|
+
|
|
101
103
|
if cols1 is None:
|
|
102
104
|
cols1 = cols.copy()
|
|
103
105
|
if cols2 is None:
|
|
@@ -151,9 +153,8 @@ def plot_miami2(
|
|
|
151
153
|
fig_args, scatter_args = _figure_args_for_vector_plot(save, fig_args, scatter_args)
|
|
152
154
|
|
|
153
155
|
# add suffix if ids are the same
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
if verbose: log.write("Start to plot miami plot {}:".format(_get_version()))
|
|
156
|
+
id1_1, id2_2, mqq_args1, mqq_args2 = _solve_id_contradictory(id0, id1, id2, mqq_args1, mqq_args2)
|
|
157
|
+
|
|
157
158
|
if dpi!=100:
|
|
158
159
|
fig_args["dpi"] = dpi
|
|
159
160
|
if xtickpad is None:
|
|
@@ -176,6 +177,7 @@ def plot_miami2(
|
|
|
176
177
|
titles_pad_adjusted[0]= 1 + titles_pad[0]
|
|
177
178
|
if "anno2" in mqq_args.keys():
|
|
178
179
|
titles_pad_adjusted[1]= - titles_pad[1]
|
|
180
|
+
|
|
179
181
|
if merged_sumstats is None:
|
|
180
182
|
## load sumstats1 ###########################################################################################################
|
|
181
183
|
sumstats1 = _figure_type_load_sumstats(name="Sumstats1",
|
|
@@ -198,8 +200,8 @@ def plot_miami2(
|
|
|
198
200
|
else:
|
|
199
201
|
cols1[2] += suffixes[0]
|
|
200
202
|
cols2[2] += suffixes[1]
|
|
201
|
-
sumstats1 = merged_sumstats
|
|
202
|
-
sumstats2 = merged_sumstats
|
|
203
|
+
sumstats1 = merged_sumstats[cols1].copy()
|
|
204
|
+
sumstats2 = merged_sumstats[cols2].copy()
|
|
203
205
|
|
|
204
206
|
## rename and quick fix ###########################################################################################################
|
|
205
207
|
renaming_dict1 = {cols1[0]:"CHR",cols1[1]:"POS",cols1[2]:"P"}
|
|
@@ -217,7 +219,7 @@ def plot_miami2(
|
|
|
217
219
|
## create merge index ###########################################################################################################
|
|
218
220
|
sumstats1 = _quick_add_tchrpos(sumstats1,large_number=large_number, dropchrpos=False, verbose=verbose, log=log)
|
|
219
221
|
sumstats2 = _quick_add_tchrpos(sumstats2,large_number=large_number, dropchrpos=False, verbose=verbose, log=log)
|
|
220
|
-
|
|
222
|
+
log.write(" -Merging sumstats using chr and pos...",verbose=verbose)
|
|
221
223
|
|
|
222
224
|
###### merge #####################################################################################################
|
|
223
225
|
merged_sumstats = _quick_merge_sumstats(sumstats1=sumstats1,sumstats2=sumstats2)
|
|
@@ -231,7 +233,9 @@ def plot_miami2(
|
|
|
231
233
|
drop_chr_start=False)
|
|
232
234
|
|
|
233
235
|
# P_1 scaled_P_1 P_2 scaled_P_2 TCHR+POS CHR POS
|
|
234
|
-
|
|
236
|
+
log.write(" -Columns in merged sumstats: {}".format(",".join(merged_sumstats.columns)), verbose=verbose)
|
|
237
|
+
|
|
238
|
+
|
|
235
239
|
del(sumstats1)
|
|
236
240
|
del(sumstats2)
|
|
237
241
|
garbage_collect.collect()
|
|
@@ -243,13 +247,14 @@ def plot_miami2(
|
|
|
243
247
|
plt.subplots_adjust(hspace=region_hspace)
|
|
244
248
|
else:
|
|
245
249
|
fig, ax1, ax5 = figax
|
|
246
|
-
|
|
250
|
+
|
|
251
|
+
log.write("Start to create Manhattan plot for sumstats1...", verbose=verbose)
|
|
247
252
|
fig,log = mqqplot(merged_sumstats,
|
|
248
253
|
chrom="CHR",
|
|
249
254
|
pos="POS",
|
|
250
255
|
p="P_1",
|
|
251
256
|
mlog10p="scaled_P_1",
|
|
252
|
-
snpid=
|
|
257
|
+
snpid=id1_1,
|
|
253
258
|
scaled=scaled1,
|
|
254
259
|
log=log,
|
|
255
260
|
mode=mode,
|
|
@@ -260,15 +265,16 @@ def plot_miami2(
|
|
|
260
265
|
_if_quick_qc=False,
|
|
261
266
|
**mqq_args1
|
|
262
267
|
)
|
|
268
|
+
log.write("Finished creating Manhattan plot for sumstats1".format(_get_version()), verbose=verbose)
|
|
263
269
|
|
|
264
|
-
|
|
270
|
+
log.write("Start to create Manhattan plot for sumstats2...", verbose=verbose)
|
|
265
271
|
fig,log = mqqplot(merged_sumstats,
|
|
266
272
|
chrom="CHR",
|
|
267
273
|
pos="POS",
|
|
268
274
|
p="P_2",
|
|
269
275
|
mlog10p="scaled_P_2",
|
|
270
276
|
scaled=scaled2,
|
|
271
|
-
snpid=
|
|
277
|
+
snpid=id2_2,
|
|
272
278
|
log=log,
|
|
273
279
|
mode=mode,
|
|
274
280
|
figax=(fig,ax5),
|
|
@@ -277,7 +283,8 @@ def plot_miami2(
|
|
|
277
283
|
_invert=True,
|
|
278
284
|
_if_quick_qc=False,
|
|
279
285
|
**mqq_args2)
|
|
280
|
-
|
|
286
|
+
log.write("Finished creating Manhattan plot for sumstats2".format(_get_version()), verbose=verbose)
|
|
287
|
+
|
|
281
288
|
if same_ylim==True:
|
|
282
289
|
ylim1_converted = ax1.get_ylim()
|
|
283
290
|
ylim2_converted = ax5.get_ylim()
|
|
@@ -285,8 +292,6 @@ def plot_miami2(
|
|
|
285
292
|
ax5.set_ylim(ylim1_converted)
|
|
286
293
|
else:
|
|
287
294
|
ax1.set_ylim(ylim2_converted)
|
|
288
|
-
|
|
289
|
-
|
|
290
295
|
#####################################################################################################################
|
|
291
296
|
|
|
292
297
|
ax5.set_xlabel("")
|
|
@@ -316,7 +321,7 @@ def plot_miami2(
|
|
|
316
321
|
|
|
317
322
|
garbage_collect.collect()
|
|
318
323
|
|
|
319
|
-
|
|
324
|
+
log.write("Finished creating miami plot successfully", verbose=verbose)
|
|
320
325
|
#Return matplotlib figure object #######################################################################################
|
|
321
326
|
return fig, log
|
|
322
327
|
|
|
@@ -337,24 +342,27 @@ def _sort_args_to_12(mqq_args):
|
|
|
337
342
|
return mqq_args1, mqq_args2
|
|
338
343
|
|
|
339
344
|
def _solve_id_contradictory(id0, id1, id2, mqq_args1, mqq_args2):
|
|
340
|
-
if id1 is not None and id2 is not None:
|
|
345
|
+
if (id1 is not None) and (id2 is not None):
|
|
341
346
|
if id1 == id2:
|
|
342
347
|
id1_1 = id1 + "_1"
|
|
343
348
|
id2_2 = id2 + "_2"
|
|
344
349
|
if "anno" in mqq_args1.keys():
|
|
345
350
|
if mqq_args1["anno"] == id1:
|
|
346
351
|
mqq_args1["anno"] = id1_1
|
|
347
|
-
if "anno" in
|
|
348
|
-
if
|
|
349
|
-
|
|
352
|
+
if "anno" in mqq_args2.keys():
|
|
353
|
+
if mqq_args2["anno"] == id2:
|
|
354
|
+
mqq_args2["anno"] = id2_2
|
|
350
355
|
else:
|
|
351
356
|
id1_1 = id1
|
|
352
357
|
id2_2 = id2
|
|
358
|
+
|
|
353
359
|
if id1 is None:
|
|
354
360
|
id1_1 = id0
|
|
361
|
+
|
|
355
362
|
if id2 is None:
|
|
356
363
|
id2_2 = id0
|
|
357
|
-
|
|
364
|
+
|
|
365
|
+
return (id1_1, id2_2, mqq_args1, mqq_args2)
|
|
358
366
|
|
|
359
367
|
def _figure_args_for_vector_plot(save, fig_args, scatter_kwargs ):
|
|
360
368
|
if save is not None:
|
|
@@ -379,19 +387,19 @@ def _set_spine_visibility(ax1,ax5):
|
|
|
379
387
|
|
|
380
388
|
def _figure_type_load_sumstats(name, path, sep, cols, readcsv_args, loadmode, log, verbose):
|
|
381
389
|
if type(path) is str:
|
|
382
|
-
|
|
383
|
-
|
|
390
|
+
log.write(" -Loading {} ({} mode): {}".format(name, loadmode, path), verbose=verbose)
|
|
391
|
+
log.write(" -Obtaining {} CHR, POS, P and annotation from: {}".format(name, cols), verbose=verbose)
|
|
384
392
|
|
|
385
393
|
if loadmode=="pickle":
|
|
386
394
|
sumstats = load_data_from_pickle(path,usecols=cols)
|
|
387
395
|
else:
|
|
388
396
|
if type(path) is Sumstats:
|
|
389
|
-
|
|
397
|
+
log.write(" -Loading {} from gwaslab.Sumstats Object".format(name), verbose=verbose)
|
|
390
398
|
sumstats = path.data[cols].copy()
|
|
391
399
|
elif type(path) is pd.DataFrame:
|
|
392
|
-
|
|
400
|
+
log.write(" -Loading {} from pandas.DataFrame Object".format(name), verbose=verbose)
|
|
393
401
|
sumstats = path[cols].copy()
|
|
394
402
|
else:
|
|
395
|
-
|
|
403
|
+
log.write(" -Loading {} from tabular files".format(name), verbose=verbose)
|
|
396
404
|
sumstats=pd.read_table(path,sep=sep,usecols=cols,dtype={cols[0]:"string",cols[1]:"Int64",cols[2]:"float64"},**readcsv_args)
|
|
397
405
|
return sumstats
|