gwaslab 3.4.37__py3-none-any.whl → 3.4.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (57) hide show
  1. gwaslab/bd_common_data.py +6 -3
  2. gwaslab/bd_download.py +9 -9
  3. gwaslab/bd_get_hapmap3.py +43 -9
  4. gwaslab/data/formatbook.json +722 -721
  5. gwaslab/g_Log.py +22 -5
  6. gwaslab/g_Sumstats.py +110 -163
  7. gwaslab/g_SumstatsPair.py +76 -25
  8. gwaslab/g_SumstatsT.py +2 -2
  9. gwaslab/g_Sumstats_summary.py +3 -3
  10. gwaslab/g_version.py +10 -10
  11. gwaslab/hm_casting.py +36 -17
  12. gwaslab/hm_harmonize_sumstats.py +354 -221
  13. gwaslab/hm_rsid_to_chrpos.py +1 -1
  14. gwaslab/io_preformat_input.py +49 -43
  15. gwaslab/io_read_ldsc.py +49 -1
  16. gwaslab/io_to_formats.py +428 -295
  17. gwaslab/ldsc_irwls.py +198 -0
  18. gwaslab/ldsc_jackknife.py +514 -0
  19. gwaslab/ldsc_ldscore.py +417 -0
  20. gwaslab/ldsc_parse.py +294 -0
  21. gwaslab/ldsc_regressions.py +747 -0
  22. gwaslab/ldsc_sumstats.py +629 -0
  23. gwaslab/qc_check_datatype.py +3 -3
  24. gwaslab/qc_fix_sumstats.py +891 -778
  25. gwaslab/util_ex_calculate_ldmatrix.py +31 -13
  26. gwaslab/util_ex_gwascatalog.py +25 -25
  27. gwaslab/util_ex_ldproxyfinder.py +10 -10
  28. gwaslab/util_ex_ldsc.py +189 -0
  29. gwaslab/util_ex_process_ref.py +3 -3
  30. gwaslab/util_ex_run_coloc.py +26 -4
  31. gwaslab/util_in_calculate_gc.py +6 -6
  32. gwaslab/util_in_calculate_power.py +42 -43
  33. gwaslab/util_in_convert_h2.py +8 -8
  34. gwaslab/util_in_fill_data.py +30 -30
  35. gwaslab/util_in_filter_value.py +201 -74
  36. gwaslab/util_in_get_density.py +10 -10
  37. gwaslab/util_in_get_sig.py +445 -71
  38. gwaslab/viz_aux_annotate_plot.py +12 -12
  39. gwaslab/viz_aux_quickfix.py +42 -37
  40. gwaslab/viz_aux_reposition_text.py +10 -7
  41. gwaslab/viz_aux_save_figure.py +18 -8
  42. gwaslab/viz_plot_compare_af.py +32 -33
  43. gwaslab/viz_plot_compare_effect.py +63 -71
  44. gwaslab/viz_plot_miamiplot2.py +34 -26
  45. gwaslab/viz_plot_mqqplot.py +126 -75
  46. gwaslab/viz_plot_qqplot.py +11 -8
  47. gwaslab/viz_plot_regionalplot.py +36 -33
  48. gwaslab/viz_plot_rg_heatmap.py +28 -26
  49. gwaslab/viz_plot_stackedregional.py +40 -21
  50. gwaslab/viz_plot_trumpetplot.py +65 -61
  51. gwaslab-3.4.39.dist-info/LICENSE +674 -0
  52. {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/METADATA +5 -4
  53. gwaslab-3.4.39.dist-info/RECORD +80 -0
  54. gwaslab-3.4.37.dist-info/RECORD +0 -72
  55. /gwaslab-3.4.37.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
  56. {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
  57. {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0
@@ -99,14 +99,14 @@ def compare_effect(path1,
99
99
  if anno_het ==True:
100
100
  is_q=True
101
101
 
102
- if verbose: log.write("Start to process the raw sumstats for plotting...")
102
+ log.write("Start to process the raw sumstats for plotting...")
103
103
 
104
104
  ######### 1 check the value used to plot
105
105
  if mode not in ["Beta","beta","BETA","OR","or"]:
106
106
  raise ValueError("Please input Beta or OR")
107
107
 
108
108
  if type(path1) is Sumstats:
109
- if verbose: log.write("Path1 is gwaslab Sumstats object...")
109
+ log.write("Path1 is gwaslab Sumstats object...")
110
110
  if cols_name_list_1 is None:
111
111
  cols_name_list_1 = ["SNPID","P","EA","NEA","CHR","POS"]
112
112
  if effect_cols_list_1 is None:
@@ -115,10 +115,10 @@ def compare_effect(path1,
115
115
  else:
116
116
  effect_cols_list_1 = ["OR","OR_95L","OR_95U"]
117
117
  elif type(path1) is pd.DataFrame:
118
- if verbose: log.write("Path1 is pandas DataFrame object...")
118
+ log.write("Path1 is pandas DataFrame object...")
119
119
 
120
120
  if type(path2) is Sumstats:
121
- if verbose: log.write("Path2 is gwaslab Sumstats object...")
121
+ log.write("Path2 is gwaslab Sumstats object...")
122
122
  if cols_name_list_2 is None:
123
123
  cols_name_list_2 = ["SNPID","P","EA","NEA","CHR","POS"]
124
124
  if effect_cols_list_2 is None:
@@ -127,10 +127,10 @@ def compare_effect(path1,
127
127
  else:
128
128
  effect_cols_list_2 = ["OR","OR_95L","OR_95U"]
129
129
  elif type(path2) is pd.DataFrame:
130
- if verbose: log.write("Path2 is pandas DataFrame object...")
130
+ log.write("Path2 is pandas DataFrame object...")
131
131
 
132
132
  ######### 2 extract snplist2
133
- if verbose: log.write(" -Loading "+label[1]+" SNP list in memory...")
133
+ log.write(" -Loading "+label[1]+" SNP list in memory...")
134
134
 
135
135
  if type(path2) is Sumstats:
136
136
  sumstats = path2.data[[cols_name_list_2[0]]].copy()
@@ -148,7 +148,7 @@ def compare_effect(path1,
148
148
  cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1],cols_name_list_1[4],cols_name_list_1[5]]
149
149
 
150
150
  ######### 4 load sumstats1
151
- if verbose: log.write(" -Loading sumstats for "+label[0]+":",",".join(cols_to_extract))
151
+ log.write(" -Loading sumstats for "+label[0]+":",",".join(cols_to_extract))
152
152
 
153
153
  if type(path1) is Sumstats:
154
154
  sumstats = path1.data[cols_to_extract].copy()
@@ -163,7 +163,7 @@ def compare_effect(path1,
163
163
  sumstats[cols_name_list_1[1]] = np.power(10,-sumstats[cols_name_list_1[1]])
164
164
  ######### 5 extract the common set
165
165
  common_snp_set = common_snp_set.intersection(sumstats[cols_name_list_1[0]].values)
166
- if verbose: log.write(" -Counting variants available for both datasets:",len(common_snp_set)," variants...")
166
+ log.write(" -Counting variants available for both datasets:",len(common_snp_set)," variants...")
167
167
 
168
168
  ######### 6 rename the sumstats
169
169
  rename_dict = { cols_name_list_1[0]:"SNPID",
@@ -179,16 +179,16 @@ def compare_effect(path1,
179
179
  ######### 7 exctract only available variants from sumstats1
180
180
  sumstats = sumstats.loc[sumstats["SNPID"].isin(common_snp_set),:]
181
181
 
182
- if verbose: log.write(" -Using only variants available for both datasets...")
182
+ log.write(" -Using only variants available for both datasets...")
183
183
  ######### 8 extact SNPs for comparison
184
184
 
185
185
  if snplist is not None:
186
186
  ######### 8.1 if a snplist is provided, use the snp list
187
- if verbose: log.write(" -Extract variants in the given list from "+label[0]+"...")
187
+ log.write(" -Extract variants in the given list from "+label[0]+"...")
188
188
  sig_list_1 = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
189
189
  else:
190
190
  ######### 8,2 otherwise use the sutomatically detected lead SNPs
191
- if verbose: log.write(" -Extract lead variants from "+label[0]+"...")
191
+ log.write(" -Extract lead variants from "+label[0]+"...")
192
192
  sig_list_1 = getsig(sumstats,"SNPID","CHR","POS","P", verbose=verbose,sig_level=sig_level,**get_lead_args)
193
193
 
194
194
  if drop==True:
@@ -200,7 +200,7 @@ def compare_effect(path1,
200
200
  else:
201
201
  cols_to_extract = [cols_name_list_2[0],cols_name_list_2[1],cols_name_list_2[4],cols_name_list_2[5]]
202
202
 
203
- if verbose: log.write(" -Loading sumstats for "+label[1]+":",",".join(cols_to_extract))
203
+ log.write(" -Loading sumstats for "+label[1]+":",",".join(cols_to_extract))
204
204
 
205
205
  if type(path2) is Sumstats:
206
206
  sumstats = path2.data[cols_to_extract].copy()
@@ -228,10 +228,10 @@ def compare_effect(path1,
228
228
  ######## 12 extact SNPs for comparison
229
229
  if snplist is not None:
230
230
  ######### 12.1 if a snplist is provided, use the snp list
231
- if verbose: log.write(" -Extract snps in the given list from "+label[1]+"...")
231
+ log.write(" -Extract snps in the given list from "+label[1]+"...")
232
232
  sig_list_2 = sumstats.loc[sumstats["SNPID"].isin(snplist),:].copy()
233
233
  else:
234
- if verbose: log.write(" -Extract lead snps from "+label[1]+"...")
234
+ log.write(" -Extract lead snps from "+label[1]+"...")
235
235
  ######### 12.2 otherwise use the sutomatically detected lead SNPs
236
236
  sig_list_2 = getsig(sumstats,"SNPID","CHR","POS","P",
237
237
  verbose=verbose,sig_level=sig_level,**get_lead_args)
@@ -240,7 +240,7 @@ def compare_effect(path1,
240
240
 
241
241
  ######### 13 Merge two list using SNPID
242
242
  ##############################################################################
243
- if verbose: log.write("Merging snps from "+label[0]+" and "+label[1]+"...")
243
+ log.write("Merging snps from "+label[0]+" and "+label[1]+"...")
244
244
 
245
245
  sig_list_merged = pd.merge(sig_list_1,sig_list_2,left_on="SNPID",right_on="SNPID",how="outer",suffixes=('_1', '_2'))
246
246
  # SNPID P_1 P_2
@@ -260,7 +260,7 @@ def compare_effect(path1,
260
260
  cols_to_extract = [cols_name_list_1[0],cols_name_list_1[1], cols_name_list_1[2],cols_name_list_1[3], effect_cols_list_1[0], effect_cols_list_1[1], effect_cols_list_1[2]]
261
261
 
262
262
  if len(eaf)>0: cols_to_extract.append(eaf[0])
263
- if verbose: log.write(" -Extract statistics of selected variants from "+label[0]+" : ",",".join(cols_to_extract) )
263
+ log.write(" -Extract statistics of selected variants from "+label[0]+" : ",",".join(cols_to_extract) )
264
264
 
265
265
  if type(path1) is Sumstats:
266
266
  sumstats = path1.data[cols_to_extract].copy()
@@ -300,7 +300,7 @@ def compare_effect(path1,
300
300
  sumstats = drop_duplicate_and_na(sumstats, sort_by="P_1", log=log , verbose=verbose)
301
301
  sumstats.drop("P_1",axis=1,inplace=True)
302
302
 
303
- if verbose: log.write(" -Merging "+label[0]+" effect information...")
303
+ log.write(" -Merging "+label[0]+" effect information...", verbose=verbose)
304
304
 
305
305
  sig_list_merged = pd.merge(sig_list_merged,sumstats,
306
306
  left_on="SNPID",right_on="SNPID",
@@ -316,7 +316,7 @@ def compare_effect(path1,
316
316
  ## check if eaf column is provided.
317
317
  if len(eaf)>0: cols_to_extract.append(eaf[1])
318
318
 
319
- if verbose: log.write(" -Extract statistics of selected variants from "+label[1]+" : ",",".join(cols_to_extract) )
319
+ log.write(" -Extract statistics of selected variants from "+label[1]+" : ",",".join(cols_to_extract), verbose=verbose )
320
320
  if type(path2) is Sumstats:
321
321
  sumstats = path2.data[cols_to_extract].copy()
322
322
  elif type(path2) is pd.DataFrame:
@@ -353,7 +353,7 @@ def compare_effect(path1,
353
353
  sumstats = drop_duplicate_and_na(sumstats, sort_by="P_2", log=log, verbose=verbose)
354
354
  sumstats.drop("P_2",axis=1,inplace=True)
355
355
 
356
- if verbose: log.write(" -Merging "+label[1]+" effect information...")
356
+ log.write(" -Merging "+label[1]+" effect information...", verbose=verbose)
357
357
  sig_list_merged = pd.merge(sig_list_merged,sumstats,
358
358
  left_on="SNPID",right_on="SNPID",
359
359
  how="left")
@@ -361,7 +361,7 @@ def compare_effect(path1,
361
361
  sig_list_merged.set_index("SNPID",inplace=True)
362
362
 
363
363
  ################ 16 update sumstats1
364
- if verbose: log.write(" -Updating missing information for "+label[0]+" ...")
364
+ log.write(" -Updating missing information for "+label[0]+" ...", verbose=verbose)
365
365
  if type(path1) is Sumstats:
366
366
  sumstats = path1.data[[cols_name_list_1[0],cols_name_list_1[1]]].copy()
367
367
  elif type(path1) is pd.DataFrame:
@@ -383,7 +383,7 @@ def compare_effect(path1,
383
383
  sig_list_merged.update(sumstats)
384
384
 
385
385
  ################# 17 update sumstats2
386
- if verbose: log.write(" -Updating missing information for "+label[1]+" ...")
386
+ log.write(" -Updating missing information for "+label[1]+" ...", verbose=verbose)
387
387
  if type(path2) is Sumstats:
388
388
  sumstats = path2.data[[cols_name_list_2[0],cols_name_list_2[1]]].copy()
389
389
  elif type(path2) is pd.DataFrame:
@@ -406,15 +406,15 @@ def compare_effect(path1,
406
406
  sig_list_merged.update(sumstats)
407
407
 
408
408
  if scaled1 ==True :
409
- if verbose:log.write(" -Sumstats -log10(P) values are being converted to P...")
409
+ log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
410
410
  sig_list_merged["P_1"] = np.power(10,-sig_list_merged["P_1"])
411
411
  if scaled2 ==True :
412
- if verbose:log.write(" -Sumstats -log10(P) values are being converted to P...")
412
+ log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
413
413
  sig_list_merged["P_2"] = np.power(10,-sig_list_merged["P_2"])
414
414
  ####
415
415
  #################################################################################
416
416
  ############## 18 init indicator
417
- if verbose: log.write(" -Assigning indicator ...")
417
+ log.write(" -Assigning indicator ...", verbose=verbose)
418
418
  # 0-> 0
419
419
  # 1 -> sig in sumstats1
420
420
  # 2 -> sig in sumsatts2
@@ -428,7 +428,7 @@ def compare_effect(path1,
428
428
  sig_list_merged["POS"]=np.max(sig_list_merged[["POS_1","POS_2"]], axis=1).astype(int)
429
429
  sig_list_merged.drop(labels=['CHR_1', 'CHR_2','POS_1', 'POS_2'], axis=1,inplace=True)
430
430
 
431
- if verbose: log.write(" -Aligning "+label[1]+" EA with "+label[0]+" EA ...")
431
+ log.write(" -Aligning "+label[1]+" EA with "+label[0]+" EA ...", verbose=verbose)
432
432
  ############### 19 align allele effect with sumstats 1
433
433
  sig_list_merged["EA_1"]=sig_list_merged["EA_1"].astype("string")
434
434
  sig_list_merged["EA_2"]=sig_list_merged["EA_2"].astype("string")
@@ -476,16 +476,16 @@ def compare_effect(path1,
476
476
 
477
477
  # checking effect allele matching
478
478
  nonmatch = np.nansum(sig_list_merged["EA_1"] != sig_list_merged["EA_2_aligned"])
479
- if verbose: log.write(" -Aligned all EAs in {} with EAs in {} ...".format(label[1],label[0]))
479
+ log.write(" -Aligned all EAs in {} with EAs in {} ...".format(label[1],label[0]), verbose=verbose)
480
480
  if nonmatch>0:
481
- if verbose: log.write(" -Warning: Alleles for {} variants do not match...".format(nonmatch))
481
+ log.warning("Alleles for {} variants do not match...".format(nonmatch))
482
482
  if allele_match==True:
483
483
  if nonmatch>0:
484
484
  sig_list_merged = sig_list_merged.loc[sig_list_merged["EA_1"] == sig_list_merged["EA_2_aligned"]]
485
485
  else:
486
- if verbose: log.write(" -No variants with EA not matching...")
486
+ log.write(" -No variants with EA not matching...", verbose=verbose)
487
487
  if fdr==True:
488
- if verbose: log.write(" -Using FDR...")
488
+ log.write(" -Using FDR...", verbose=verbose)
489
489
  #sig_list_merged["P_1"] = fdrcorrection(sig_list_merged["P_1"])[1]
490
490
  #sig_list_merged["P_2"] = fdrcorrection(sig_list_merged["P_2"])[1]
491
491
  sig_list_merged["P_1"] =ss.false_discovery_control(sig_list_merged["P_1"])
@@ -495,41 +495,41 @@ def compare_effect(path1,
495
495
  ## winner's curse correction using aligned beta
496
496
  if mode=="beta":
497
497
  if wc_correction == "all":
498
- if verbose: log.write(" -Correcting BETA for winner's curse with threshold at {} for all variants...".format(sig_level))
498
+ log.write(" -Correcting BETA for winner's curse with threshold at {} for all variants...".format(sig_level), verbose=verbose)
499
499
  sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
500
500
  sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
501
501
 
502
- if verbose: log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(~sig_list_merged["EFFECT_1"].isna())))
502
+ log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(~sig_list_merged["EFFECT_1"].isna())), verbose=verbose)
503
503
  sig_list_merged["EFFECT_1"] = sig_list_merged[["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
504
504
 
505
- if verbose: log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(~sig_list_merged["EFFECT_2_aligned"].isna())))
505
+ log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(~sig_list_merged["EFFECT_2_aligned"].isna())), verbose=verbose)
506
506
  sig_list_merged["EFFECT_2_aligned"] = sig_list_merged[["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct(x[0],x[1],sig_level),axis=1)
507
507
 
508
508
  elif wc_correction == "sig" :
509
- if verbose: log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants...".format(sig_level))
509
+ log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants...".format(sig_level), verbose=verbose)
510
510
  sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
511
511
  sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
512
- if verbose: log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(sig_list_merged["P_1"]<sig_level)))
512
+ log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(sig_list_merged["P_1"]<sig_level)), verbose=verbose)
513
513
  sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, "EFFECT_1"] = sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
514
- if verbose: log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(sig_list_merged["P_2"]<sig_level)))
514
+ log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(sig_list_merged["P_2"]<sig_level)), verbose=verbose)
515
515
  sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, "EFFECT_2_aligned"] = sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
516
516
 
517
517
  elif wc_correction == "sumstats1" :
518
- if verbose: log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats1...".format(sig_level))
518
+ log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats1...".format(sig_level), verbose=verbose)
519
519
  sig_list_merged["EFFECT_1_RAW"] = sig_list_merged["EFFECT_1"].copy()
520
- if verbose: log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(sig_list_merged["P_1"]<sig_level)))
520
+ log.write(" -Correcting BETA for {} variants in sumstats1...".format(sum(sig_list_merged["P_1"]<sig_level)), verbose=verbose)
521
521
  sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, "EFFECT_1"] = sig_list_merged.loc[sig_list_merged["P_1"]<sig_level, ["EFFECT_1_RAW","SE_1"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
522
522
 
523
523
  elif wc_correction == "sumstats2" :
524
- if verbose: log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats2...".format(sig_level))
524
+ log.write(" - Correcting BETA for winner's curse with threshold at {} for significant variants in sumstats2...".format(sig_level), verbose=verbose)
525
525
  sig_list_merged["EFFECT_2_aligned_RAW"] = sig_list_merged["EFFECT_2_aligned"].copy()
526
- if verbose: log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(sig_list_merged["P_2"]<sig_level)))
526
+ log.write(" -Correcting BETA for {} variants in sumstats2...".format(sum(sig_list_merged["P_2"]<sig_level)), verbose=verbose)
527
527
  sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, "EFFECT_2_aligned"] = sig_list_merged.loc[sig_list_merged["P_2"]<sig_level, ["EFFECT_2_aligned_RAW","SE_2"]].apply(lambda x: wc_correct_test(x[0],x[1],sig_level),axis=1)
528
528
 
529
529
  ########################## Het test############################################################
530
530
  ## heterogeneity test
531
531
  if (is_q is True):
532
- if verbose: log.write(" -Calculating Cochran's Q statistics and peform chisq test...")
532
+ log.write(" -Calculating Cochran's Q statistics and peform chisq test...", verbose=verbose)
533
533
  if mode=="beta" or mode=="BETA" or mode=="Beta":
534
534
  sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
535
535
  else:
@@ -538,19 +538,19 @@ def compare_effect(path1,
538
538
  ######################### save ###############################################################
539
539
  ## save the merged data
540
540
  save_path = label[0]+"_"+label[1]+"_beta_sig_list_merged.tsv"
541
- if verbose: log.write(" -Saving the merged data to:",save_path)
541
+ log.write(" -Saving the merged data to:",save_path, verbose=verbose)
542
542
  sig_list_merged.to_csv(save_path,"\t")
543
543
 
544
544
  ########################## maf_threshold#############################################################
545
545
  if (len(eaf)>0) and (maf_level is not None):
546
546
  both_eaf_clear = (sig_list_merged["EAF_1"]>maf_level)&(sig_list_merged["EAF_1"]<1-maf_level)&(sig_list_merged["EAF_2"]>maf_level)&(sig_list_merged["EAF_2"]<1-maf_level)
547
- if verbose: log.write(" -Exclude "+str(len(sig_list_merged) -sum(both_eaf_clear))+ " variants with maf <",maf_level)
547
+ log.write(" -Exclude "+str(len(sig_list_merged) -sum(both_eaf_clear))+ " variants with maf <",maf_level, verbose=verbose)
548
548
  sig_list_merged = sig_list_merged.loc[both_eaf_clear,:]
549
549
  # heterogeneity summary
550
550
  if (is_q is True):
551
- if verbose: log.write(" -Significant het:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:]))
552
- if verbose: log.write(" -All sig:" ,len(sig_list_merged))
553
- if verbose: log.write(" -Het rate:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:])/len(sig_list_merged))
551
+ log.write(" -Significant het:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:]), verbose=verbose)
552
+ log.write(" -All sig:" ,len(sig_list_merged), verbose=verbose)
553
+ log.write(" -Het rate:" ,len(sig_list_merged.loc[sig_list_merged["HetP"]<0.05,:])/len(sig_list_merged), verbose=verbose)
554
554
 
555
555
  # extract group
556
556
  if include_all==True:
@@ -568,13 +568,13 @@ def compare_effect(path1,
568
568
  sum2only["Edge_color"]="none"
569
569
  both["Edge_color"]="none"
570
570
 
571
- if verbose: log.write(" -Identified "+str(len(sum0)) + " variants which are not significant in " + label[3]+".")
572
- if verbose: log.write(" -Identified "+str(len(sum1only)) + " variants which are only significant in " + label[0]+".")
573
- if verbose: log.write(" -Identified "+str(len(sum2only)) + " variants which are only significant in " + label[1]+".")
574
- if verbose: log.write(" -Identified "+str(len(both)) + " variants which are significant in " + label[2] + ".")
571
+ log.write(" -Identified "+str(len(sum0)) + " variants which are not significant in " + label[3]+".", verbose=verbose)
572
+ log.write(" -Identified "+str(len(sum1only)) + " variants which are only significant in " + label[0]+".", verbose=verbose)
573
+ log.write(" -Identified "+str(len(sum2only)) + " variants which are only significant in " + label[1]+".", verbose=verbose)
574
+ log.write(" -Identified "+str(len(both)) + " variants which are significant in " + label[2] + ".", verbose=verbose)
575
575
 
576
576
  ##plot########################################################################################
577
- if verbose: log.write("Creating the scatter plot for effect sizes comparison...")
577
+ log.write("Creating the scatter plot for effect sizes comparison...", verbose=verbose)
578
578
  #plt.style.use("ggplot")
579
579
  sns.set_style("ticks")
580
580
  fig,ax = plt.subplots(**plt_args)
@@ -721,7 +721,7 @@ def compare_effect(path1,
721
721
 
722
722
  # estimate se for r
723
723
  if r_se==True:
724
- if verbose:log.write(" -Estimating SE for rsq using Jackknife method.")
724
+ log.write(" -Estimating SE for rsq using Jackknife method.", verbose=verbose)
725
725
  r_se_jackknife = jackknife_r(sig_list_merged)
726
726
  r_se_jackknife_string = " ({:.2f})".format(r_se_jackknife)
727
727
  else:
@@ -731,19 +731,19 @@ def compare_effect(path1,
731
731
  r_se_jackknife_string= ""
732
732
 
733
733
  #### calculate p values based on selected value , default = 0
734
- if verbose:log.write(" -Calculating p values based on given null slope :",null_beta)
734
+ log.write(" -Calculating p values based on given null slope :",null_beta, verbose=verbose)
735
735
  t_score = (reg[0]-null_beta) / reg[4]
736
736
  degree = len(sig_list_merged.dropna())-2
737
737
  p = reg[3]
738
738
  #ss.t.sf(abs(t_score), df=degree)*2
739
- if verbose:log.write(" -Beta = ", reg[0])
740
- if verbose:log.write(" -Beta_se = ", reg[4])
741
- #if verbose:log.write(" -H0 beta = ", null_beta, ", recalculated p = ", "{:.2e}".format(p))
742
- if verbose:log.write(" -H0 beta = 0",", default p = ", "{:.2e}".format(reg[3]))
743
- if verbose:log.write(" -Peason correlation coefficient = ", "{:.2f}".format(reg[2]))
744
- if verbose:log.write(" -r2 = ", "{:.2f}".format(reg[2]**2))
739
+ log.write(" -Beta = ", reg[0], verbose=verbose)
740
+ log.write(" -Beta_se = ", reg[4], verbose=verbose)
741
+ #log.write(" -H0 beta = ", null_beta, ", recalculated p = ", "{:.2e}".format(p), verbose=verbose)
742
+ log.write(" -H0 beta = 0",", default p = ", "{:.2e}".format(reg[3]), verbose=verbose)
743
+ log.write(" -Peason correlation coefficient = ", "{:.2f}".format(reg[2]), verbose=verbose)
744
+ log.write(" -r2 = ", "{:.2f}".format(reg[2]**2), verbose=verbose)
745
745
  if r_se==True:
746
- if verbose:log.write(" -R se (jackknife) = {:.2e}".format(r_se_jackknife))
746
+ log.write(" -R se (jackknife) = {:.2e}".format(r_se_jackknife), verbose=verbose)
747
747
 
748
748
  if reg[0] > 0:
749
749
  #if regression coeeficient >0 : auxiliary line slope = 1
@@ -866,15 +866,7 @@ def compare_effect(path1,
866
866
  gc.collect()
867
867
 
868
868
  save_figure(fig, save, keyword="esc",save_args=save_args, log=log, verbose=verbose)
869
-
870
- #if save:
871
- # if verbose: log.write("Saving plot:")
872
- # if save==True:
873
- # fig.savefig("./{}_{}_effect_comparison_plot.png".format(label[0],label[1]),bbox_inches="tight",**save_args)
874
- # log.write(" -Saved to "+ "./{}_{}_effect_comparison_plot.png".format(label[0],label[1]) + " successfully!" )
875
- # else:
876
- # fig.savefig(save,bbox_inches="tight",**save_args)
877
- # log.write(" -Saved to "+ save + " successfully!" )
869
+
878
870
 
879
871
  return [sig_list_merged, fig,log]
880
872
 
@@ -902,10 +894,10 @@ def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose
902
894
  df["Edge_color"]="white"
903
895
 
904
896
  if is_q_mc=="fdr":
905
- if verbose: log.write(" -FDR correction applied...")
897
+ log.write(" -FDR correction applied...", verbose=verbose)
906
898
  df[pq] = ss.false_discovery_control(df[pq])
907
899
  elif is_q_mc=="bon":
908
- if verbose: log.write(" -Bonferroni correction applied...")
900
+ log.write(" -Bonferroni correction applied...", verbose=verbose)
909
901
  df[pq] = df[pq] * len(df[pq])
910
902
 
911
903
  df.loc[df[pq]<q_level,"Edge_color"]="black"
@@ -958,5 +950,5 @@ def drop_duplicate_and_na(df,snpid="SNPID",sort_by=False,log=Log(),verbose=True)
958
950
  df.drop_duplicates(subset=[snpid], keep='first', inplace=True)
959
951
  length_after= len(df)
960
952
  if length_before != length_after:
961
- if verbose: log.write(" -Dropped {} duplicates or NAs...".format(length_before - length_after))
953
+ log.write(" -Dropped {} duplicates or NAs...".format(length_before - length_after), verbose=verbose)
962
954
  return df
@@ -38,6 +38,7 @@ from gwaslab.g_Sumstats import Sumstats
38
38
  from gwaslab.viz_aux_save_figure import save_figure
39
39
  from gwaslab.viz_plot_mqqplot import mqqplot
40
40
  from gwaslab.g_version import _get_version
41
+
41
42
  def plot_miami2(
42
43
  path1=None,
43
44
  path2=None,
@@ -86,7 +87,7 @@ def plot_miami2(
86
87
  log=Log(),
87
88
  **mqq_args
88
89
  ):
89
-
90
+ log.write("Start to create miami plot {}:".format(_get_version()), verbose=verbose)
90
91
  ## figuring arguments ###########################################################################################################
91
92
  # figure columns to use
92
93
  if scaled == True:
@@ -98,6 +99,7 @@ def plot_miami2(
98
99
  cols = ["CHR","POS","MLOG10P"]
99
100
  else:
100
101
  cols = ["CHR","POS","P"]
102
+
101
103
  if cols1 is None:
102
104
  cols1 = cols.copy()
103
105
  if cols2 is None:
@@ -151,9 +153,8 @@ def plot_miami2(
151
153
  fig_args, scatter_args = _figure_args_for_vector_plot(save, fig_args, scatter_args)
152
154
 
153
155
  # add suffix if ids are the same
154
- id1, id2, mqq_args1, mqq_args2 = _solve_id_contradictory(id0, id1, id2, mqq_args1, mqq_args2)
155
-
156
- if verbose: log.write("Start to plot miami plot {}:".format(_get_version()))
156
+ id1_1, id2_2, mqq_args1, mqq_args2 = _solve_id_contradictory(id0, id1, id2, mqq_args1, mqq_args2)
157
+
157
158
  if dpi!=100:
158
159
  fig_args["dpi"] = dpi
159
160
  if xtickpad is None:
@@ -176,6 +177,7 @@ def plot_miami2(
176
177
  titles_pad_adjusted[0]= 1 + titles_pad[0]
177
178
  if "anno2" in mqq_args.keys():
178
179
  titles_pad_adjusted[1]= - titles_pad[1]
180
+
179
181
  if merged_sumstats is None:
180
182
  ## load sumstats1 ###########################################################################################################
181
183
  sumstats1 = _figure_type_load_sumstats(name="Sumstats1",
@@ -198,8 +200,8 @@ def plot_miami2(
198
200
  else:
199
201
  cols1[2] += suffixes[0]
200
202
  cols2[2] += suffixes[1]
201
- sumstats1 = merged_sumstats.loc[:,cols1].copy()
202
- sumstats2 = merged_sumstats.loc[:,cols2].copy()
203
+ sumstats1 = merged_sumstats[cols1].copy()
204
+ sumstats2 = merged_sumstats[cols2].copy()
203
205
 
204
206
  ## rename and quick fix ###########################################################################################################
205
207
  renaming_dict1 = {cols1[0]:"CHR",cols1[1]:"POS",cols1[2]:"P"}
@@ -217,7 +219,7 @@ def plot_miami2(
217
219
  ## create merge index ###########################################################################################################
218
220
  sumstats1 = _quick_add_tchrpos(sumstats1,large_number=large_number, dropchrpos=False, verbose=verbose, log=log)
219
221
  sumstats2 = _quick_add_tchrpos(sumstats2,large_number=large_number, dropchrpos=False, verbose=verbose, log=log)
220
- if verbose: log.write(" -Merging sumstats using chr and pos...")
222
+ log.write(" -Merging sumstats using chr and pos...",verbose=verbose)
221
223
 
222
224
  ###### merge #####################################################################################################
223
225
  merged_sumstats = _quick_merge_sumstats(sumstats1=sumstats1,sumstats2=sumstats2)
@@ -231,7 +233,9 @@ def plot_miami2(
231
233
  drop_chr_start=False)
232
234
 
233
235
  # P_1 scaled_P_1 P_2 scaled_P_2 TCHR+POS CHR POS
234
-
236
+ log.write(" -Columns in merged sumstats: {}".format(",".join(merged_sumstats.columns)), verbose=verbose)
237
+
238
+
235
239
  del(sumstats1)
236
240
  del(sumstats2)
237
241
  garbage_collect.collect()
@@ -243,13 +247,14 @@ def plot_miami2(
243
247
  plt.subplots_adjust(hspace=region_hspace)
244
248
  else:
245
249
  fig, ax1, ax5 = figax
246
-
250
+
251
+ log.write("Start to create Manhattan plot for sumstats1...", verbose=verbose)
247
252
  fig,log = mqqplot(merged_sumstats,
248
253
  chrom="CHR",
249
254
  pos="POS",
250
255
  p="P_1",
251
256
  mlog10p="scaled_P_1",
252
- snpid=id1,
257
+ snpid=id1_1,
253
258
  scaled=scaled1,
254
259
  log=log,
255
260
  mode=mode,
@@ -260,15 +265,16 @@ def plot_miami2(
260
265
  _if_quick_qc=False,
261
266
  **mqq_args1
262
267
  )
268
+ log.write("Finished creating Manhattan plot for sumstats1".format(_get_version()), verbose=verbose)
263
269
 
264
-
270
+ log.write("Start to create Manhattan plot for sumstats2...", verbose=verbose)
265
271
  fig,log = mqqplot(merged_sumstats,
266
272
  chrom="CHR",
267
273
  pos="POS",
268
274
  p="P_2",
269
275
  mlog10p="scaled_P_2",
270
276
  scaled=scaled2,
271
- snpid=id2,
277
+ snpid=id2_2,
272
278
  log=log,
273
279
  mode=mode,
274
280
  figax=(fig,ax5),
@@ -277,7 +283,8 @@ def plot_miami2(
277
283
  _invert=True,
278
284
  _if_quick_qc=False,
279
285
  **mqq_args2)
280
-
286
+ log.write("Finished creating Manhattan plot for sumstats2".format(_get_version()), verbose=verbose)
287
+
281
288
  if same_ylim==True:
282
289
  ylim1_converted = ax1.get_ylim()
283
290
  ylim2_converted = ax5.get_ylim()
@@ -285,8 +292,6 @@ def plot_miami2(
285
292
  ax5.set_ylim(ylim1_converted)
286
293
  else:
287
294
  ax1.set_ylim(ylim2_converted)
288
-
289
-
290
295
  #####################################################################################################################
291
296
 
292
297
  ax5.set_xlabel("")
@@ -316,7 +321,7 @@ def plot_miami2(
316
321
 
317
322
  garbage_collect.collect()
318
323
 
319
- if verbose: log.write("Finished creating miami plot successfully")
324
+ log.write("Finished creating miami plot successfully", verbose=verbose)
320
325
  #Return matplotlib figure object #######################################################################################
321
326
  return fig, log
322
327
 
@@ -337,24 +342,27 @@ def _sort_args_to_12(mqq_args):
337
342
  return mqq_args1, mqq_args2
338
343
 
339
344
  def _solve_id_contradictory(id0, id1, id2, mqq_args1, mqq_args2):
340
- if id1 is not None and id2 is not None:
345
+ if (id1 is not None) and (id2 is not None):
341
346
  if id1 == id2:
342
347
  id1_1 = id1 + "_1"
343
348
  id2_2 = id2 + "_2"
344
349
  if "anno" in mqq_args1.keys():
345
350
  if mqq_args1["anno"] == id1:
346
351
  mqq_args1["anno"] = id1_1
347
- if "anno" in mqq_args1.keys():
348
- if mqq_args1["anno"] == id2:
349
- mqq_args1["anno"] = id2_2
352
+ if "anno" in mqq_args2.keys():
353
+ if mqq_args2["anno"] == id2:
354
+ mqq_args2["anno"] = id2_2
350
355
  else:
351
356
  id1_1 = id1
352
357
  id2_2 = id2
358
+
353
359
  if id1 is None:
354
360
  id1_1 = id0
361
+
355
362
  if id2 is None:
356
363
  id2_2 = id0
357
- return id1_1, id2_2, mqq_args1, mqq_args2
364
+
365
+ return (id1_1, id2_2, mqq_args1, mqq_args2)
358
366
 
359
367
  def _figure_args_for_vector_plot(save, fig_args, scatter_kwargs ):
360
368
  if save is not None:
@@ -379,19 +387,19 @@ def _set_spine_visibility(ax1,ax5):
379
387
 
380
388
  def _figure_type_load_sumstats(name, path, sep, cols, readcsv_args, loadmode, log, verbose):
381
389
  if type(path) is str:
382
- if verbose: log.write(" -Loading {} ({} mode): {}".format(name, loadmode, path))
383
- if verbose: log.write(" -Obtaining {} CHR, POS, P and annotation from: {}".format(name, cols))
390
+ log.write(" -Loading {} ({} mode): {}".format(name, loadmode, path), verbose=verbose)
391
+ log.write(" -Obtaining {} CHR, POS, P and annotation from: {}".format(name, cols), verbose=verbose)
384
392
 
385
393
  if loadmode=="pickle":
386
394
  sumstats = load_data_from_pickle(path,usecols=cols)
387
395
  else:
388
396
  if type(path) is Sumstats:
389
- if verbose: log.write(" -Loading {} from gwaslab.Sumstats Object".format(name))
397
+ log.write(" -Loading {} from gwaslab.Sumstats Object".format(name), verbose=verbose)
390
398
  sumstats = path.data[cols].copy()
391
399
  elif type(path) is pd.DataFrame:
392
- if verbose: log.write(" -Loading {} from pandas.DataFrame Object".format(name))
400
+ log.write(" -Loading {} from pandas.DataFrame Object".format(name), verbose=verbose)
393
401
  sumstats = path[cols].copy()
394
402
  else:
395
- if verbose: log.write(" -Loading {} from tabular files".format(name))
403
+ log.write(" -Loading {} from tabular files".format(name), verbose=verbose)
396
404
  sumstats=pd.read_table(path,sep=sep,usecols=cols,dtype={cols[0]:"string",cols[1]:"Int64",cols[2]:"float64"},**readcsv_args)
397
405
  return sumstats