gwaslab 3.4.38__py3-none-any.whl → 3.4.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/bd_common_data.py +6 -3
- gwaslab/bd_download.py +9 -9
- gwaslab/bd_get_hapmap3.py +43 -9
- gwaslab/g_Log.py +14 -5
- gwaslab/g_Sumstats.py +86 -18
- gwaslab/g_SumstatsPair.py +70 -23
- gwaslab/g_SumstatsT.py +2 -2
- gwaslab/g_version.py +10 -10
- gwaslab/hm_casting.py +9 -4
- gwaslab/hm_harmonize_sumstats.py +88 -83
- gwaslab/io_preformat_input.py +14 -14
- gwaslab/io_read_ldsc.py +49 -1
- gwaslab/ldsc_irwls.py +198 -0
- gwaslab/ldsc_jackknife.py +514 -0
- gwaslab/ldsc_ldscore.py +417 -0
- gwaslab/ldsc_parse.py +294 -0
- gwaslab/ldsc_regressions.py +747 -0
- gwaslab/ldsc_sumstats.py +629 -0
- gwaslab/qc_check_datatype.py +1 -1
- gwaslab/qc_fix_sumstats.py +163 -161
- gwaslab/util_ex_calculate_ldmatrix.py +2 -2
- gwaslab/util_ex_gwascatalog.py +24 -24
- gwaslab/util_ex_ldproxyfinder.py +9 -9
- gwaslab/util_ex_ldsc.py +189 -0
- gwaslab/util_in_calculate_gc.py +6 -6
- gwaslab/util_in_calculate_power.py +42 -43
- gwaslab/util_in_convert_h2.py +8 -8
- gwaslab/util_in_fill_data.py +28 -28
- gwaslab/util_in_filter_value.py +91 -52
- gwaslab/util_in_get_density.py +8 -8
- gwaslab/util_in_get_sig.py +407 -65
- gwaslab/viz_aux_annotate_plot.py +12 -12
- gwaslab/viz_aux_quickfix.py +18 -18
- gwaslab/viz_aux_reposition_text.py +3 -3
- gwaslab/viz_aux_save_figure.py +14 -5
- gwaslab/viz_plot_compare_af.py +29 -30
- gwaslab/viz_plot_compare_effect.py +63 -71
- gwaslab/viz_plot_miamiplot2.py +6 -6
- gwaslab/viz_plot_mqqplot.py +17 -3
- gwaslab/viz_plot_qqplot.py +1 -1
- gwaslab/viz_plot_regionalplot.py +33 -32
- gwaslab/viz_plot_rg_heatmap.py +28 -26
- gwaslab/viz_plot_stackedregional.py +40 -21
- gwaslab/viz_plot_trumpetplot.py +50 -55
- gwaslab-3.4.39.dist-info/LICENSE +674 -0
- {gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/METADATA +4 -3
- gwaslab-3.4.39.dist-info/RECORD +80 -0
- gwaslab-3.4.38.dist-info/RECORD +0 -72
- /gwaslab-3.4.38.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
- {gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0
|
@@ -99,14 +99,14 @@ def compare_effect(path1,
|
|
|
99
99
|
if anno_het ==True:
|
|
100
100
|
is_q=True
|
|
101
101
|
|
|
102
|
-
|
|
102
|
+
log.write("Start to process the raw sumstats for plotting...")
|
|
103
103
|
|
|
104
104
|
######### 1 check the value used to plot
|
|
105
105
|
if mode not in ["Beta","beta","BETA","OR","or"]:
|
|
106
106
|
raise ValueError("Please input Beta or OR")
|
|
107
107
|
|
|
108
108
|
if type(path1) is Sumstats:
|
|
109
|
-
|
|
109
|
+
log.write("Path1 is gwaslab Sumstats object...")
|
|
110
110
|
if cols_name_list_1 is None:
|
|
111
111
|
cols_name_list_1 = ["SNPID","P","EA","NEA","CHR","POS"]
|
|
112
112
|
if effect_cols_list_1 is None:
|
|
@@ -115,10 +115,10 @@ def compare_effect(path1,
|
|
|
115
115
|
else:
|
|
116
116
|
effect_cols_list_1 = ["OR","OR_95L","OR_95U"]
|
|
117
117
|
elif type(path1) is pd.DataFrame:
|
|
118
|
-
|
|
118
|
+
log.write("Path1 is pandas DataFrame object...")
|
|
119
119
|
|
|
120
120
|
if type(path2) is Sumstats:
|
|
121
|
-
|
|
121
|
+
log.write("Path2 is gwaslab Sumstats object...")
|
|
122
122
|
if cols_name_list_2 is None:
|
|
123
123
|
cols_name_list_2 = ["SNPID","P","EA","NEA","CHR","POS"]
|
|
124
124
|
if effect_cols_list_2 is None:
|
|
@@ -127,10 +127,10 @@ def compare_effect(path1,
|
|
|
127
127
|
else:
|
|
128
128
|
effect_cols_list_2 = ["OR","OR_95L","OR_95U"]
|
|
129
129
|
elif type(path2) is pd.DataFrame:
|
|
130
|
-
|
|
130
|
+
log.write("Path2 is pandas DataFrame object...")
|
|
131
131
|
|
|
132
132
|
######### 2 extract snplist2
|
|
133
|
-
|
|
133
|
+
log.write(" -Loading "+label[1]+" SNP list in memory...")
|
|
134
134
|
|
|
135
135
|
if type(path2) is Sumstats:
|
|
136
136
|
sumstats = path2.data[[cols_name_list_2[0]]].copy()
|
|
@@ -148,7 +148,7 @@ def compare_effect(path1,
|
|
|
148
148
|
cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1],cols_name_list_1[4],cols_name_list_1[5]]
|
|
149
149
|
|
|
150
150
|
######### 4 load sumstats1
|
|
151
|
-
|
|
151
|
+
log.write(" -Loading sumstats for "+label[0]+":",",".join(cols_to_extract))
|
|
152
152
|
|
|
153
153
|
if type(path1) is Sumstats:
|
|
154
154
|
sumstats = path1.data[cols_to_extract].copy()
|
|
@@ -163,7 +163,7 @@ def compare_effect(path1,
|
|
|
163
163
|
sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
|
|
164
164
|
######### 5 extract the common set
|
|
165
165
|
common_snp_set = common_snp_set.intersection(sumstats[cols_name_list_1[0]].values)
|
|
166
|
-
|
|
166
|
+
log.write(" -Counting variants available for both datasets:",len(common_snp_set)," variants...")
|
|
167
167
|
|
|
168
168
|
######### 6 rename the sumstats
|
|
169
169
|
rename_dict = { cols_name_list_1[0]:"SNPID",
|
|
@@ -179,16 +179,16 @@ def compare_effect(path1,
|
|
|
179
179
|
######### 7 exctract only available variants from sumstats1
|
|
180
180
|
sumstats = sumstats.loc[sumstats["SNPID"].isin(common_snp_set),:]
|
|
181
181
|
|
|
182
|
-
|
|
182
|
+
log.write(" -Using only variants available for both datasets...")
|
|
183
183
|
######### 8 extact SNPs for comparison
|
|
184
184
|
|
|
185
185
|
if snplist is not None:
|
|
186
186
|
######### 8.1 if a snplist is provided, use the snp list
|
|
187
|
-
|
|
187
|
+
log.write(" -Extract variants in the given list from "+label[0]+"...")
|
|
188
188
|
sig_list_1 = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
|
|
189
189
|
else:
|
|
190
190
|
######### 8,2 otherwise use the sutomatically detected lead SNPs
|
|
191
|
-
|
|
191
|
+
log.write(" -Extract lead variants from "+label[0]+"...")
|
|
192
192
|
sig_list_1 = getsig(sumstats,"SNPID","CHR","POS","P", verbose=verbose,sig_level=sig_level,**get_lead_args)
|
|
193
193
|
|
|
194
194
|
if drop==True:
|
|
@@ -200,7 +200,7 @@ def compare_effect(path1,
|
|
|
200
200
|
else:
|
|
201
201
|
cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1],cols_name_list_2[4],cols_name_list_2[5]]
|
|
202
202
|
|
|
203
|
-
|
|
203
|
+
log.write(" -Loading sumstats for "+label[1]+":",",".join(cols_to_extract))
|
|
204
204
|
|
|
205
205
|
if type(path2) is Sumstats:
|
|
206
206
|
sumstats = path2.data[cols_to_extract].copy()
|
|
@@ -228,10 +228,10 @@ def compare_effect(path1,
|
|
|
228
228
|
######## 12 extact SNPs for comparison
|
|
229
229
|
if snplist is not None:
|
|
230
230
|
######### 12.1 if a snplist is provided, use the snp list
|
|
231
|
-
|
|
231
|
+
log.write(" -Extract snps in the given list from "+label[1]+"...")
|
|
232
232
|
sig_list_2 = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
|
|
233
233
|
else:
|
|
234
|
-
|
|
234
|
+
log.write(" -Extract lead snps from "+label[1]+"...")
|
|
235
235
|
######### 12.2 otherwise use the sutomatically detected lead SNPs
|
|
236
236
|
sig_list_2 = getsig(sumstats,"SNPID","CHR","POS","P",
|
|
237
237
|
verbose=verbose,sig_level=sig_level,**get_lead_args)
|
|
@@ -240,7 +240,7 @@ def compare_effect(path1,
|
|
|
240
240
|
|
|
241
241
|
######### 13 Merge two list using SNPID
|
|
242
242
|
##############################################################################
|
|
243
|
-
|
|
243
|
+
log.write("Merging snps from "+label[0]+" and "+label[1]+"...")
|
|
244
244
|
|
|
245
245
|
sig_list_merged = pd.merge(sig_list_1,sig_list_2,left_on="SNPID",right_on="SNPID",how="outer",suffixes=('_1', '_2'))
|
|
246
246
|
# SNPID P_1 P_2
|
|
@@ -260,7 +260,7 @@ def compare_effect(path1,
|
|
|
260
260
|
cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1], cols_name_list_1[2],cols_name_list_1[3], effect_cols_list_1[0], effect_cols_list_1[1], effect_cols_list_1[2]]
|
|
261
261
|
|
|
262
262
|
if len(eaf)>0: cols_to_extract.append(eaf[0])
|
|
263
|
-
|
|
263
|
+
log.write(" -Extract statistics of selected variants from "+label[0]+" : ",",".join(cols_to_extract) )
|
|
264
264
|
|
|
265
265
|
if type(path1) is Sumstats:
|
|
266
266
|
sumstats = path1.data[cols_to_extract].copy()
|
|
@@ -300,7 +300,7 @@ def compare_effect(path1,
|
|
|
300
300
|
sumstats = drop_duplicate_and_na(sumstats, sort_by="P_1", log=log , verbose=verbose)
|
|
301
301
|
sumstats.drop("P_1",axis=1,inplace=True)
|
|
302
302
|
|
|
303
|
-
|
|
303
|
+
log.write(" -Merging "+label[0]+" effect information...", verbose=verbose)
|
|
304
304
|
|
|
305
305
|
sig_list_merged = pd.merge(sig_list_merged,sumstats,
|
|
306
306
|
left_on="SNPID",right_on="SNPID",
|
|
@@ -316,7 +316,7 @@ def compare_effect(path1,
|
|
|
316
316
|
## check if eaf column is provided.
|
|
317
317
|
if len(eaf)>0: cols_to_extract.append(eaf[1])
|
|
318
318
|
|
|
319
|
-
|
|
319
|
+
log.write(" -Extract statistics of selected variants from "+label[1]+" : ",",".join(cols_to_extract), verbose=verbose )
|
|
320
320
|
if type(path2) is Sumstats:
|
|
321
321
|
sumstats = path2.data[cols_to_extract].copy()
|
|
322
322
|
elif type(path2) is pd.DataFrame:
|
|
@@ -353,7 +353,7 @@ def compare_effect(path1,
|
|
|
353
353
|
sumstats = drop_duplicate_and_na(sumstats, sort_by="P_2", log=log, verbose=verbose)
|
|
354
354
|
sumstats.drop("P_2",axis=1,inplace=True)
|
|
355
355
|
|
|
356
|
-
|
|
356
|
+
log.write(" -Merging "+label[1]+" effect information...", verbose=verbose)
|
|
357
357
|
sig_list_merged = pd.merge(sig_list_merged,sumstats,
|
|
358
358
|
left_on="SNPID",right_on="SNPID",
|
|
359
359
|
how="left")
|
|
@@ -361,7 +361,7 @@ def compare_effect(path1,
|
|
|
361
361
|
sig_list_merged.set_index("SNPID",inplace=True)
|
|
362
362
|
|
|
363
363
|
################ 16 update sumstats1
|
|
364
|
-
|
|
364
|
+
log.write(" -Updating missing information for "+label[0]+" ...", verbose=verbose)
|
|
365
365
|
if type(path1) is Sumstats:
|
|
366
366
|
sumstats = path1.data[[cols_name_list_1[0],cols_name_list_1[1]]].copy()
|
|
367
367
|
elif type(path1) is pd.DataFrame:
|
|
@@ -383,7 +383,7 @@ def compare_effect(path1,
|
|
|
383
383
|
sig_list_merged.update(sumstats)
|
|
384
384
|
|
|
385
385
|
################# 17 update sumstats2
|
|
386
|
-
|
|
386
|
+
log.write(" -Updating missing information for "+label[1]+" ...", verbose=verbose)
|
|
387
387
|
if type(path2) is Sumstats:
|
|
388
388
|
sumstats = path2.data[[cols_name_list_2[0],cols_name_list_2[1]]].copy()
|
|
389
389
|
elif type(path2) is pd.DataFrame:
|
|
@@ -406,15 +406,15 @@ def compare_effect(path1,
|
|
|
406
406
|
sig_list_merged.update(sumstats)
|
|
407
407
|
|
|
408
408
|
if scaled1 ==True :
|
|
409
|
-
|
|
409
|
+
log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
|
|
410
410
|
sig_list_merged["P_1"] = np.power(10,-sig_list_merged["P_1"])
|
|
411
411
|
if scaled2 ==True :
|
|
412
|
-
|
|
412
|
+
log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
|
|
413
413
|
sig_list_merged["P_2"] = np.power(10,-sig_list_merged["P_2"])
|
|
414
414
|
####
|
|
415
415
|
#################################################################################
|
|
416
416
|
############## 18 init indicator
|
|
417
|
-
|
|
417
|
+
log.write(" -Assigning indicator ...", verbose=verbose)
|
|
418
418
|
# 0-> 0
|
|
419
419
|
# 1 -> sig in sumstats1
|
|
420
420
|
# 2 -> sig in sumsatts2
|
|
@@ -428,7 +428,7 @@ def compare_effect(path1,
|
|
|
428
428
|
sig_list_merged["POS"]=np.max(sig_list_merged[["POS_1","POS_2"]], axis=1).astype(int)
|
|
429
429
|
sig_list_merged.drop(labels=['CHR_1', 'CHR_2','POS_1', 'POS_2'], axis=1,inplace=True)
|
|
430
430
|
|
|
431
|
-
|
|
431
|
+
log.write(" -Aligning "+label[1]+" EA with "+label[0]+" EA ...", verbose=verbose)
|
|
432
432
|
############### 19 align allele effect with sumstats 1
|
|
433
433
|
sig_list_merged["EA_1"]=sig_list_merged["EA_1"].astype("string")
|
|
434
434
|
sig_list_merged["EA_2"]=sig_list_merged["EA_2"].astype("string")
|
|
@@ -476,16 +476,16 @@ def compare_effect(path1,
|
|
|
476
476
|
|
|
477
477
|
# checking effect allele matching
|
|
478
478
|
nonmatch = np.nansum(sig_list_merged["EA_1"] != sig_list_merged["EA_2_aligned"])
|
|
479
|
-
|
|
479
|
+
log.write(" -Aligned all EAs in {} with EAs in {} ...".format(label[1],label[0]), verbose=verbose)
|
|
480
480
|
if nonmatch>0:
|
|
481
|
-
|
|
481
|
+
log.warning("Alleles for {} variants do not match...".format(nonmatch))
|
|
482
482
|
if allele_match==True:
|
|
483
483
|
if nonmatch>0:
|
|
484
484
|
sig_list_merged = sig_list_merged.loc[sig_list_merged["EA_1"] == sig_list_merged["EA_2_aligned"]]
|
|
485
485
|
else:
|
|
486
|
-
|
|
486
|
+
log.write(" -No variants with EA not matching...", verbose=verbose)
|
|
487
487
|
if fdr==True:
|
|
488
|
-
|
|
488
|
+
log.write(" -Using FDR...", verbose=verbose)
|
|
489
489
|
#sig_list_merged["P_1"] = fdrcorrection(sig_list_merged["P_1"])[1]
|
|
490
490
|
#sig_list_merged["P_2"] = fdrcorrection(sig_list_merged["P_2"])[1]
|
|
491
491
|
sig_list_merged["P_1"] =ss.false_discovery_control(sig_list_merged["P_1"])
|
|
@@ -495,41 +495,41 @@ def compare_effect(path1,
|
|
|
495
495
|
## winner's curse correction using aligned beta
|
|
496
496
|
if mode=="beta":
|
|
497
497
|
if wc_correction == "all":
|
|
498
|
-
|
|
498
|
+
log.write(" -Correcting BETA for winner's curse with threshold at {} for all variants...".format(sig_level), verbose=verbose)
|
|
499
499
|
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
500
500
|
sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
|
|
501
501
|
|
|
502
|
-
|
|
502
|
+
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(~sig_list_merged["EFFECT_1"].isna())), verbose=verbose)
|
|
503
503
|
sig_list_merged["EFFECT_1"] = sig_list_merged[["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
|
|
504
504
|
|
|
505
|
-
|
|
505
|
+
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(~sig_list_merged["EFFECT_2_aligned"].isna())), verbose=verbose)
|
|
506
506
|
sig_list_merged["EFFECT_2_aligned"] = sig_list_merged[["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
|
|
507
507
|
|
|
508
508
|
elif wc_correction == "sig" :
|
|
509
|
-
|
|
509
|
+
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants...".format(sig_level), verbose=verbose)
|
|
510
510
|
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
511
511
|
sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
|
|
512
|
-
|
|
512
|
+
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(sig_list_merged["P_1"]<sig_level)), verbose=verbose)
|
|
513
513
|
sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, "EFFECT_1"] = sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
514
|
-
|
|
514
|
+
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(sig_list_merged["P_2"]<sig_level)), verbose=verbose)
|
|
515
515
|
sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, "EFFECT_2_aligned"] = sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
516
516
|
|
|
517
517
|
elif wc_correction == "sumstats1" :
|
|
518
|
-
|
|
518
|
+
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats1...".format(sig_level), verbose=verbose)
|
|
519
519
|
sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
|
|
520
|
-
|
|
520
|
+
log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(sig_list_merged["P_1"]<sig_level)), verbose=verbose)
|
|
521
521
|
sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, "EFFECT_1"] = sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
522
522
|
|
|
523
523
|
elif wc_correction == "sumstats2" :
|
|
524
|
-
|
|
524
|
+
log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats2...".format(sig_level), verbose=verbose)
|
|
525
525
|
sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
|
|
526
|
-
|
|
526
|
+
log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(sig_list_merged["P_2"]<sig_level)), verbose=verbose)
|
|
527
527
|
sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, "EFFECT_2_aligned"] = sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
|
|
528
528
|
|
|
529
529
|
########################## Het test############################################################
|
|
530
530
|
## heterogeneity test
|
|
531
531
|
if (is_q is True):
|
|
532
|
-
|
|
532
|
+
log.write(" -Calculating Cochran's Q statistics and peform chisq test...", verbose=verbose)
|
|
533
533
|
if mode=="beta" or mode=="BETA" or mode=="Beta":
|
|
534
534
|
sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
|
|
535
535
|
else:
|
|
@@ -538,19 +538,19 @@ def compare_effect(path1,
|
|
|
538
538
|
######################### save ###############################################################
|
|
539
539
|
## save the merged data
|
|
540
540
|
save_path = label[0]+"_"+label[1]+"_beta_sig_list_merged.tsv"
|
|
541
|
-
|
|
541
|
+
log.write(" -Saving the merged data to:",save_path, verbose=verbose)
|
|
542
542
|
sig_list_merged.to_csv(save_path,"\t")
|
|
543
543
|
|
|
544
544
|
########################## maf_threshold#############################################################
|
|
545
545
|
if (len(eaf)>0) and (maf_level is not None):
|
|
546
546
|
both_eaf_clear = (sig_list_merged["EAF_1"]>maf_level)&(sig_list_merged["EAF_1"]<1-maf_level)&(sig_list_merged["EAF_2"]>maf_level)&(sig_list_merged["EAF_2"]<1-maf_level)
|
|
547
|
-
|
|
547
|
+
log.write(" -Exclude "+str(len(sig_list_merged) -sum(both_eaf_clear))+ " variants with maf <",maf_level, verbose=verbose)
|
|
548
548
|
sig_list_merged = sig_list_merged.loc[both_eaf_clear,:]
|
|
549
549
|
# heterogeneity summary
|
|
550
550
|
if (is_q is True):
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
551
|
+
log.write(" -Significant het:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:]), verbose=verbose)
|
|
552
|
+
log.write(" -All sig:" ,len(sig_list_merged), verbose=verbose)
|
|
553
|
+
log.write(" -Het rate:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:])/len(sig_list_merged), verbose=verbose)
|
|
554
554
|
|
|
555
555
|
# extract group
|
|
556
556
|
if include_all==True:
|
|
@@ -568,13 +568,13 @@ def compare_effect(path1,
|
|
|
568
568
|
sum2only["Edge_color"]="none"
|
|
569
569
|
both["Edge_color"]="none"
|
|
570
570
|
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
571
|
+
log.write(" -Identified "+str(len(sum0)) + " variants which are not significant in " + label[3]+".", verbose=verbose)
|
|
572
|
+
log.write(" -Identified "+str(len(sum1only)) + " variants which are only significant in " + label[0]+".", verbose=verbose)
|
|
573
|
+
log.write(" -Identified "+str(len(sum2only)) + " variants which are only significant in " + label[1]+".", verbose=verbose)
|
|
574
|
+
log.write(" -Identified "+str(len(both)) + " variants which are significant in " + label[2] + ".", verbose=verbose)
|
|
575
575
|
|
|
576
576
|
##plot########################################################################################
|
|
577
|
-
|
|
577
|
+
log.write("Creating the scatter plot for effect sizes comparison...", verbose=verbose)
|
|
578
578
|
#plt.style.use("ggplot")
|
|
579
579
|
sns.set_style("ticks")
|
|
580
580
|
fig,ax = plt.subplots(**plt_args)
|
|
@@ -721,7 +721,7 @@ def compare_effect(path1,
|
|
|
721
721
|
|
|
722
722
|
# estimate se for r
|
|
723
723
|
if r_se==True:
|
|
724
|
-
|
|
724
|
+
log.write(" -Estimating SE for rsq using Jackknife method.", verbose=verbose)
|
|
725
725
|
r_se_jackknife = jackknife_r(sig_list_merged)
|
|
726
726
|
r_se_jackknife_string = " ({:.2f})".format(r_se_jackknife)
|
|
727
727
|
else:
|
|
@@ -731,19 +731,19 @@ def compare_effect(path1,
|
|
|
731
731
|
r_se_jackknife_string= ""
|
|
732
732
|
|
|
733
733
|
#### calculate p values based on selected value , default = 0
|
|
734
|
-
|
|
734
|
+
log.write(" -Calculating p values based on given null slope :",null_beta, verbose=verbose)
|
|
735
735
|
t_score = (reg[0]-null_beta) / reg[4]
|
|
736
736
|
degree = len(sig_list_merged.dropna())-2
|
|
737
737
|
p = reg[3]
|
|
738
738
|
#ss.t.sf(abs(t_score), df=degree)*2
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
#
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
739
|
+
log.write(" -Beta = ", reg[0], verbose=verbose)
|
|
740
|
+
log.write(" -Beta_se = ", reg[4], verbose=verbose)
|
|
741
|
+
#log.write(" -H0 beta = ", null_beta, ", recalculated p = ", "{:.2e}".format(p), verbose=verbose)
|
|
742
|
+
log.write(" -H0 beta = 0",", default p = ", "{:.2e}".format(reg[3]), verbose=verbose)
|
|
743
|
+
log.write(" -Peason correlation coefficient = ", "{:.2f}".format(reg[2]), verbose=verbose)
|
|
744
|
+
log.write(" -r2 = ", "{:.2f}".format(reg[2]**2), verbose=verbose)
|
|
745
745
|
if r_se==True:
|
|
746
|
-
|
|
746
|
+
log.write(" -R se (jackknife) = {:.2e}".format(r_se_jackknife), verbose=verbose)
|
|
747
747
|
|
|
748
748
|
if reg[0] > 0:
|
|
749
749
|
#if regression coeeficient >0 : auxiliary line slope = 1
|
|
@@ -866,15 +866,7 @@ def compare_effect(path1,
|
|
|
866
866
|
gc.collect()
|
|
867
867
|
|
|
868
868
|
save_figure(fig, save, keyword="esc",save_args=save_args, log=log, verbose=verbose)
|
|
869
|
-
|
|
870
|
-
#if save:
|
|
871
|
-
# if verbose: log.write("Saving plot:")
|
|
872
|
-
# if save==True:
|
|
873
|
-
# fig.savefig("./{}_{}_effect_comparison_plot.png".format(label[0],label[1]),bbox_inches="tight",**save_args)
|
|
874
|
-
# log.write(" -Saved to "+ "./{}_{}_effect_comparison_plot.png".format(label[0],label[1]) + " successfully!" )
|
|
875
|
-
# else:
|
|
876
|
-
# fig.savefig(save,bbox_inches="tight",**save_args)
|
|
877
|
-
# log.write(" -Saved to "+ save + " successfully!" )
|
|
869
|
+
|
|
878
870
|
|
|
879
871
|
return [sig_list_merged, fig,log]
|
|
880
872
|
|
|
@@ -902,10 +894,10 @@ def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose
|
|
|
902
894
|
df["Edge_color"]="white"
|
|
903
895
|
|
|
904
896
|
if is_q_mc=="fdr":
|
|
905
|
-
|
|
897
|
+
log.write(" -FDR correction applied...", verbose=verbose)
|
|
906
898
|
df[pq] = ss.false_discovery_control(df[pq])
|
|
907
899
|
elif is_q_mc=="bon":
|
|
908
|
-
|
|
900
|
+
log.write(" -Bonferroni correction applied...", verbose=verbose)
|
|
909
901
|
df[pq] = df[pq] * len(df[pq])
|
|
910
902
|
|
|
911
903
|
df.loc[df[pq]<q_level,"Edge_color"]="black"
|
|
@@ -958,5 +950,5 @@ def drop_duplicate_and_na(df,snpid="SNPID",sort_by=False,log=Log(),verbose=True)
|
|
|
958
950
|
df.drop_duplicates(subset=[snpid], keep='first', inplace=True)
|
|
959
951
|
length_after= len(df)
|
|
960
952
|
if length_before != length_after:
|
|
961
|
-
|
|
953
|
+
log.write(" -Dropped {} duplicates or NAs...".format(length_before - length_after), verbose=verbose)
|
|
962
954
|
return df
|
gwaslab/viz_plot_miamiplot2.py
CHANGED
|
@@ -321,7 +321,7 @@ def plot_miami2(
|
|
|
321
321
|
|
|
322
322
|
garbage_collect.collect()
|
|
323
323
|
|
|
324
|
-
|
|
324
|
+
log.write("Finished creating miami plot successfully", verbose=verbose)
|
|
325
325
|
#Return matplotlib figure object #######################################################################################
|
|
326
326
|
return fig, log
|
|
327
327
|
|
|
@@ -387,19 +387,19 @@ def _set_spine_visibility(ax1,ax5):
|
|
|
387
387
|
|
|
388
388
|
def _figure_type_load_sumstats(name, path, sep, cols, readcsv_args, loadmode, log, verbose):
|
|
389
389
|
if type(path) is str:
|
|
390
|
-
|
|
391
|
-
|
|
390
|
+
log.write(" -Loading {} ({} mode): {}".format(name, loadmode, path), verbose=verbose)
|
|
391
|
+
log.write(" -Obtaining {} CHR, POS, P and annotation from: {}".format(name, cols), verbose=verbose)
|
|
392
392
|
|
|
393
393
|
if loadmode=="pickle":
|
|
394
394
|
sumstats = load_data_from_pickle(path,usecols=cols)
|
|
395
395
|
else:
|
|
396
396
|
if type(path) is Sumstats:
|
|
397
|
-
|
|
397
|
+
log.write(" -Loading {} from gwaslab.Sumstats Object".format(name), verbose=verbose)
|
|
398
398
|
sumstats = path.data[cols].copy()
|
|
399
399
|
elif type(path) is pd.DataFrame:
|
|
400
|
-
|
|
400
|
+
log.write(" -Loading {} from pandas.DataFrame Object".format(name), verbose=verbose)
|
|
401
401
|
sumstats = path[cols].copy()
|
|
402
402
|
else:
|
|
403
|
-
|
|
403
|
+
log.write(" -Loading {} from tabular files".format(name), verbose=verbose)
|
|
404
404
|
sumstats=pd.read_table(path,sep=sep,usecols=cols,dtype={cols[0]:"string",cols[1]:"Int64",cols[2]:"float64"},**readcsv_args)
|
|
405
405
|
return sumstats
|
gwaslab/viz_plot_mqqplot.py
CHANGED
|
@@ -174,6 +174,8 @@ def mqqplot(insumstats,
|
|
|
174
174
|
include_chrXYMT = True,
|
|
175
175
|
ylim=None,
|
|
176
176
|
xpad=None,
|
|
177
|
+
xpadl=None,
|
|
178
|
+
xpadr=None,
|
|
177
179
|
chrpad=0.03,
|
|
178
180
|
drop_chr_start=False,
|
|
179
181
|
title =None,
|
|
@@ -1002,8 +1004,7 @@ def mqqplot(insumstats,
|
|
|
1002
1004
|
if "qq" in mode:
|
|
1003
1005
|
ax2.set_ylim(ylim)
|
|
1004
1006
|
|
|
1005
|
-
|
|
1006
|
-
ax1.set_xlim([0 - xpad* sumstats["i"].max(),(1+xpad)*sumstats["i"].max()])
|
|
1007
|
+
ax1 = _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats)
|
|
1007
1008
|
|
|
1008
1009
|
# Titles
|
|
1009
1010
|
if title and anno and len(to_annotate)>0:
|
|
@@ -1028,8 +1029,21 @@ def mqqplot(insumstats,
|
|
|
1028
1029
|
|
|
1029
1030
|
|
|
1030
1031
|
|
|
1032
|
+
def _add_pad_to_x_axis(ax1, xpad, xpadl, xpadr, sumstats):
|
|
1033
|
+
|
|
1034
|
+
xmin, xmax = ax1.get_xlim()
|
|
1035
|
+
|
|
1036
|
+
if xpad is not None:
|
|
1037
|
+
pad = xpad* sumstats["i"].max()
|
|
1038
|
+
ax1.set_xlim([xmin - pad, xmin + pad])
|
|
1039
|
+
if xpadl is not None:
|
|
1040
|
+
pad = xpadl* sumstats["i"].max()
|
|
1041
|
+
ax1.set_xlim([xmin - pad,xmax])
|
|
1042
|
+
if xpadr is not None:
|
|
1043
|
+
pad = xpadr* sumstats["i"].max()
|
|
1044
|
+
ax1.set_xlim([xmin, xmax + pad])
|
|
1031
1045
|
|
|
1032
|
-
|
|
1046
|
+
return ax1
|
|
1033
1047
|
|
|
1034
1048
|
|
|
1035
1049
|
|
gwaslab/viz_plot_qqplot.py
CHANGED
|
@@ -118,7 +118,7 @@ def _plot_qq(
|
|
|
118
118
|
level = 1 - np.power(10.0,-np.nanmedian(expected_all))
|
|
119
119
|
log.write(" -Level for calculating lambda GC : {}".format(1 - level),verbose=verbose)
|
|
120
120
|
|
|
121
|
-
if
|
|
121
|
+
if not include_chrXYMT : log.write(" -Excluding chrX,Y, MT from calculation of lambda GC.",verbose=verbose)
|
|
122
122
|
lambdagc = lambdaGC(p_toplot_raw,
|
|
123
123
|
mode="MLOG10P",
|
|
124
124
|
level=level,
|