gwaslab 3.4.36__py3-none-any.whl → 3.4.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (42) hide show
  1. gwaslab/__init__.py +1 -1
  2. gwaslab/data/formatbook.json +722 -721
  3. gwaslab/g_Log.py +8 -0
  4. gwaslab/g_Sumstats.py +80 -178
  5. gwaslab/g_SumstatsPair.py +6 -2
  6. gwaslab/g_Sumstats_summary.py +3 -3
  7. gwaslab/g_meta.py +13 -3
  8. gwaslab/g_version.py +2 -2
  9. gwaslab/hm_casting.py +29 -15
  10. gwaslab/hm_harmonize_sumstats.py +312 -159
  11. gwaslab/hm_rsid_to_chrpos.py +1 -1
  12. gwaslab/io_preformat_input.py +46 -37
  13. gwaslab/io_to_formats.py +428 -295
  14. gwaslab/qc_check_datatype.py +15 -1
  15. gwaslab/qc_fix_sumstats.py +956 -719
  16. gwaslab/util_ex_calculate_ldmatrix.py +29 -11
  17. gwaslab/util_ex_gwascatalog.py +1 -1
  18. gwaslab/util_ex_ldproxyfinder.py +1 -1
  19. gwaslab/util_ex_process_h5.py +26 -17
  20. gwaslab/util_ex_process_ref.py +3 -3
  21. gwaslab/util_ex_run_coloc.py +26 -4
  22. gwaslab/util_in_convert_h2.py +1 -1
  23. gwaslab/util_in_fill_data.py +44 -5
  24. gwaslab/util_in_filter_value.py +122 -34
  25. gwaslab/util_in_get_density.py +2 -2
  26. gwaslab/util_in_get_sig.py +41 -9
  27. gwaslab/viz_aux_quickfix.py +26 -21
  28. gwaslab/viz_aux_reposition_text.py +7 -4
  29. gwaslab/viz_aux_save_figure.py +6 -5
  30. gwaslab/viz_plot_compare_af.py +5 -5
  31. gwaslab/viz_plot_compare_effect.py +22 -5
  32. gwaslab/viz_plot_miamiplot2.py +28 -20
  33. gwaslab/viz_plot_mqqplot.py +214 -98
  34. gwaslab/viz_plot_qqplot.py +11 -8
  35. gwaslab/viz_plot_regionalplot.py +16 -9
  36. gwaslab/viz_plot_trumpetplot.py +15 -6
  37. {gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/METADATA +3 -3
  38. gwaslab-3.4.38.dist-info/RECORD +72 -0
  39. gwaslab-3.4.36.dist-info/RECORD +0 -72
  40. {gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/LICENSE +0 -0
  41. {gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/WHEEL +0 -0
  42. {gwaslab-3.4.36.dist-info → gwaslab-3.4.38.dist-info}/top_level.txt +0 -0
@@ -13,8 +13,9 @@ from gwaslab.bd_common_data import get_chr_to_NC
13
13
  from gwaslab.bd_common_data import gtf_to_protein_coding
14
14
  from gwaslab.bd_download import check_and_download
15
15
  from gwaslab.util_ex_gwascatalog import gwascatalog_trait
16
-
17
-
16
+ from gwaslab.qc_fix_sumstats import check_dataframe_shape
17
+ from gwaslab.qc_fix_sumstats import start_to
18
+ from gwaslab.qc_fix_sumstats import finished
18
19
  # getsig
19
20
  # closest_gene
20
21
  # annogene
@@ -39,8 +40,24 @@ def getsig(insumstats,
39
40
  """
40
41
  Extract the lead variants using a sliding window. P or MLOG10P will be used and converted to SCALEDP for sorting.
41
42
  """
43
+ ##start function with col checking##########################################################
44
+ _start_line = "extract lead variants"
45
+ _end_line = "extracting lead variants"
46
+ _start_cols = [chrom,pos]
47
+ _start_function = ".get_lead()"
48
+ _must_args ={}
49
+
50
+ is_enough_info = start_to(sumstats=insumstats,
51
+ log=log,
52
+ verbose=verbose,
53
+ start_line=_start_line,
54
+ end_line=_end_line,
55
+ start_cols=_start_cols,
56
+ start_function=_start_function,
57
+ **_must_args)
58
+ if is_enough_info == False: return None
59
+ ############################################################################################
42
60
 
43
- if verbose: log.write("Start to extract lead variants...")
44
61
  if verbose: log.write(" -Processing "+str(len(insumstats))+" variants...")
45
62
  if verbose: log.write(" -Significance threshold :", sig_level)
46
63
  if verbose: log.write(" -Sliding window size:", str(windowsizekb) ," kb")
@@ -155,11 +172,9 @@ def getsig(insumstats,
155
172
  source=source,
156
173
  verbose=verbose)
157
174
 
158
- # Finishing
159
- if verbose: log.write("Finished extracting lead variants successfully!")
160
175
  # drop internal id
161
176
  output = output.drop("__ID",axis=1)
162
- gc.collect()
177
+ finished(log,verbose,_end_line)
163
178
  return output.copy()
164
179
 
165
180
 
@@ -329,7 +344,24 @@ def getnovel(insumstats,
329
344
  gwascatalog_source="NCBI",
330
345
  output_known=False,
331
346
  verbose=True):
332
- if verbose: log.write("Start to check if lead variants are known...")
347
+ ##start function with col checking##########################################################
348
+ _start_line = "check if lead variants are known"
349
+ _end_line = "checking if lead variants are known"
350
+ _start_cols = [chrom,pos]
351
+ _start_function = ".get_novel()"
352
+ _must_args ={}
353
+
354
+ is_enough_info = start_to(sumstats=insumstats,
355
+ log=log,
356
+ verbose=verbose,
357
+ start_line=_start_line,
358
+ end_line=_end_line,
359
+ start_cols=_start_cols,
360
+ start_function=_start_function,
361
+ **_must_args)
362
+ if is_enough_info == False: return None
363
+ ############################################################################################
364
+
333
365
  allsig = getsig(insumstats=insumstats,
334
366
  id=id,chrom=chrom,pos=pos,p=p,use_p=use_p,windowsizekb=windowsizekb,sig_level=sig_level,log=log,
335
367
  xymt=xymt,anno=anno,build=build, source=source,verbose=verbose)
@@ -438,8 +470,8 @@ def getnovel(insumstats,
438
470
 
439
471
  if verbose: log.write(" -Identified ",len(allsig)-sum(allsig["NOVEL"])," known vairants in current sumstats...")
440
472
  if verbose: log.write(" -Identified ",sum(allsig["NOVEL"])," novel vairants in current sumstats...")
441
- if verbose: log.write("Finished checking known or novel successfully!")
442
- gc.collect()
473
+
474
+ finished(log,verbose,_end_line)
443
475
 
444
476
  # how to return
445
477
  if only_novel is True:
@@ -5,7 +5,7 @@ from gwaslab.bd_common_data import get_chr_to_number
5
5
  from gwaslab.bd_common_data import get_number_to_chr
6
6
  from math import ceil
7
7
 
8
- def _quick_fix(sumstats, chr_dict=get_chr_to_number(), scaled=False, chrom="CHR", pos="POS", p="P", mlog10p="MLOG10P",verbose=True, log=Log()):
8
+ def _quick_fix(sumstats, chr_dict=get_chr_to_number(), scaled=False, chrom="CHR", pos="POS", p="P", mlog10p="MLOG10P",log=Log(), verbose=True):
9
9
  '''
10
10
  quick sanity check for input sumstats
11
11
  '''
@@ -45,10 +45,11 @@ def _quick_fix_p_value(sumstats, p="P", mlog10p="MLOG10P", scaled=False,verbose=
45
45
  return sumstats
46
46
 
47
47
 
48
- def _quick_fix_mlog10p(sumstats,p="P", mlog10p="MLOG10P", scaled=False, verbose=True, log=Log()):
48
+ def _quick_fix_mlog10p(insumstats,p="P", mlog10p="MLOG10P", scaled=False, log=Log(), verbose=True):
49
49
  '''
50
50
  drop variants with bad -log10(P) values
51
51
  '''
52
+ sumstats = insumstats.copy()
52
53
  if scaled != True:
53
54
  if verbose:log.write(" -Sumstats P values are being converted to -log10(P)...")
54
55
  sumstats["scaled_P"] = -np.log10(sumstats[p].astype("float64"))
@@ -63,7 +64,7 @@ def _quick_fix_mlog10p(sumstats,p="P", mlog10p="MLOG10P", scaled=False, verbose=
63
64
  return sumstats
64
65
 
65
66
 
66
- def _quick_fix_eaf(seires, verbose=True, log=Log()):
67
+ def _quick_fix_eaf(seires,log=Log(), verbose=True):
67
68
  '''
68
69
  conversion of eaf to maf
69
70
  '''
@@ -73,7 +74,7 @@ def _quick_fix_eaf(seires, verbose=True, log=Log()):
73
74
  return seires.copy()
74
75
 
75
76
 
76
- def _quick_fix_chr(seires, chr_dict, verbose=True, log=Log()):
77
+ def _quick_fix_chr(seires, chr_dict,log=Log(), verbose=True):
77
78
  '''
78
79
  conversion and check for chr
79
80
  '''
@@ -84,7 +85,7 @@ def _quick_fix_chr(seires, chr_dict, verbose=True, log=Log()):
84
85
  return seires
85
86
 
86
87
 
87
- def _quick_fix_pos(seires, verbose=True, log=Log()):
88
+ def _quick_fix_pos(seires,log=Log(), verbose=True):
88
89
  '''
89
90
  force conversion for pos
90
91
  '''
@@ -92,7 +93,7 @@ def _quick_fix_pos(seires, verbose=True, log=Log()):
92
93
  return seires
93
94
 
94
95
 
95
- def _get_largenumber(*args, log=Log()):
96
+ def _get_largenumber(*args,log=Log(), verbose=True):
96
97
  '''
97
98
  get a helper large number, >> max(pos)
98
99
  '''
@@ -108,7 +109,7 @@ def _get_largenumber(*args, log=Log()):
108
109
  return large_number
109
110
 
110
111
 
111
- def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000, dropchrpos=False, verbose=True, log=Log()):
112
+ def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000, dropchrpos=False,log=Log(), verbose=True):
112
113
  sumstats["TCHR+POS"] = sumstats["CHR"]*large_number + sumstats["POS"]
113
114
  sumstats["TCHR+POS"] = sumstats["TCHR+POS"].astype('Int64')
114
115
  if dropchrpos == True:
@@ -117,7 +118,7 @@ def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000,
117
118
  return sumstats
118
119
 
119
120
 
120
- def _quick_merge_sumstats(sumstats1, sumstats2):
121
+ def _quick_merge_sumstats(sumstats1, sumstats2, log=Log(), verbose=True):
121
122
  merged_sumstats = pd.merge(sumstats1, sumstats2, on="TCHR+POS", how="outer", suffixes=('_1', '_2'))
122
123
  merged_sumstats["CHR"] = merged_sumstats["CHR_1"]
123
124
  merged_sumstats["POS"] = merged_sumstats["POS_1"]
@@ -126,7 +127,7 @@ def _quick_merge_sumstats(sumstats1, sumstats2):
126
127
  merged_sumstats = merged_sumstats.drop(labels=["CHR_1", "CHR_2", "POS_1", "POS_2"],axis=1)
127
128
  return merged_sumstats
128
129
 
129
- def _quick_assign_i(sumstats, chrom="CHR",pos="POS"):
130
+ def _quick_assign_i(sumstats, chrom="CHR",pos="POS",log=Log(), verbose=True):
130
131
  # sort by CHR an POS
131
132
  sumstats = sumstats.sort_values([chrom,pos])
132
133
  # set new id
@@ -158,7 +159,7 @@ def _quick_assign_i(sumstats, chrom="CHR",pos="POS"):
158
159
  sumstats["i"] = np.floor(pd.to_numeric(sumstats["i"], errors='coerce')).astype('Int64')
159
160
  return sumstats, chrom_df
160
161
 
161
- def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos="POS",drop_chr_start=False,_posdiccul=None):
162
+ def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos="POS",drop_chr_start=False,_posdiccul=None,log=Log(), verbose=True):
162
163
  # align all variants on a single axis (i)
163
164
  sumstats = sumstats.sort_values([chrom,pos])
164
165
  if use_rank is True:
@@ -218,7 +219,7 @@ def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos=
218
219
  sumstats["i"] = np.floor(pd.to_numeric(sumstats["i"], errors='coerce')).astype('Int64')
219
220
  return sumstats, chrom_df
220
221
 
221
- def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_level=5e-6, lower_level=5e-4):
222
+ def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_level=5e-6, lower_level=5e-4,log=Log(), verbose=True):
222
223
  size_series = series.copy()
223
224
  size_series[:] = 1
224
225
 
@@ -231,7 +232,7 @@ def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_
231
232
  size_series[is_sig_level] = 4
232
233
  return size_series
233
234
 
234
- def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SNPID",chrom="CHR",pos="POS",verbose=True, log=Log()):
235
+ def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SNPID",chrom="CHR",pos="POS",log=Log(), verbose=True):
235
236
  to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
236
237
  #assign colors: 0 is hightlight color
237
238
  for i,row in to_highlight.iterrows():
@@ -243,7 +244,7 @@ def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SN
243
244
  sumstats.loc[right_chr&up_pos&low_pos,"HUE"]="0"
244
245
  return sumstats
245
246
 
246
- def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight_windowkb, chrom="CHR",pos="POS",verbose=True, log=Log()):
247
+ def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight_windowkb, chrom="CHR",pos="POS",log=Log(), verbose=True):
247
248
  #assign colors: 0 is hightlight color
248
249
  to_highlight1 = pd.DataFrame()
249
250
  to_highlight2 = pd.DataFrame()
@@ -271,7 +272,7 @@ def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight
271
272
  sumstats.loc[right_chr&up_pos&low_pos,"HUE2"]="0"
272
273
  return sumstats, to_highlight1, to_highlight2
273
274
 
274
- def _quick_extract_snp_in_region(sumstats, region, chrom="CHR",pos="POS",verbose=True, log=Log()):
275
+ def _quick_extract_snp_in_region(sumstats, region, chrom="CHR",pos="POS",log=Log(), verbose=True):
275
276
  region_chr = region[0]
276
277
  region_start = region[1]
277
278
  region_end = region[2]
@@ -281,15 +282,16 @@ def _quick_extract_snp_in_region(sumstats, region, chrom="CHR",pos="POS",verbose
281
282
  sumstats = sumstats.loc[is_in_region_snp,:]
282
283
  return sumstats
283
284
 
284
- def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose,lines_to_plot, log):
285
+ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_plot, log):
286
+ log.write(" -Converting data above cut line...",verbose=verbose)
285
287
  if ylabels is not None:
286
288
  ylabels = pd.Series(ylabels)
287
289
  maxy = series.max()
288
290
  series = series.copy()
289
291
  if "b" not in mode:
290
- if verbose: log.write(" -Maximum -log10(P) values is "+str(maxy) +" .")
292
+ if verbose: log.write(" -Maximum -log10(P) value is "+str(maxy) +" .")
291
293
  elif "b" in mode:
292
- if verbose: log.write(" -Maximum DENSITY values is "+str(maxy) +" .")
294
+ if verbose: log.write(" -Maximum DENSITY value is "+str(maxy) +" .")
293
295
 
294
296
  maxticker=int(np.round(series.max(skipna=True)))
295
297
 
@@ -340,7 +342,7 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose,lines_to_plo
340
342
  #sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"] = (sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"]-cut)/cutfactor + cut
341
343
 
342
344
  maxy = (maxticker-cut)/cutfactor + cut
343
- if verbose: log.write("Finished data conversion and sanity check.")
345
+
344
346
  return series, maxy, maxticker, cut, cutfactor,ylabels,lines_to_plot
345
347
 
346
348
  #def _cut_line(level, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, log):
@@ -362,9 +364,11 @@ def _set_yticklabels(cut,
362
364
  font_family,
363
365
  ytick3,
364
366
  ylabels,
365
- ylabels_converted
367
+ ylabels_converted,
368
+ log=Log(),
369
+ verbose=True
366
370
  ):
367
-
371
+ log.write(" -Processing Y tick lables...",verbose=verbose)
368
372
  # if no cut
369
373
  if cut == 0:
370
374
  ax1.set_ylim(skip, ceil(maxy*1.2) )
@@ -430,7 +434,8 @@ def _set_yticklabels(cut,
430
434
 
431
435
  return ax1
432
436
 
433
- def _jagged_y(cut,skip,ax1,mode,mqqratio,jagged_len,jagged_wid):
437
+ def _jagged_y(cut,skip,ax1,mode,mqqratio,jagged_len,jagged_wid, log=Log(), verbose=True):
438
+ log.write(" -Processing jagged Y axis...",verbose=verbose)
434
439
  tycut = cut +0.3 #(cut - skip)/ (ax1.get_ylim()[1] - skip) + 0.002
435
440
  dy= jagged_len * (cut - skip)
436
441
  x0 = 0
@@ -2,7 +2,7 @@ import pandas as pd
2
2
  import numpy as np
3
3
  from gwaslab.g_Log import Log
4
4
 
5
- def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode="int",log=Log(),verbose=True):
5
+ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode="int",log=Log(),verbose=True, min_factor=None):
6
6
  # check the number of variants to annotate
7
7
  #if repel_force>0:
8
8
  # if 1/(repel_force*2 +0.01) < len(positions):
@@ -15,10 +15,12 @@ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode=
15
15
  if amode=="int":
16
16
  step = int(yspan*repel_force)
17
17
  elif amode=="log":
18
- min_factor = np.min(positions)
18
+ if min_factor is None:
19
+ min_factor = np.min(positions)
19
20
  #(1, max) -> (0, log(max)))
20
- positions = np.log(positions/min_factor)
21
+ positions = np.log2(positions/min_factor)
21
22
  step = max(positions)*repel_force
23
+
22
24
  else:
23
25
  step = yspan*repel_force
24
26
 
@@ -33,7 +35,8 @@ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode=
33
35
  if amode=="int":
34
36
  return np.floor(pd.to_numeric(positions, errors='coerce')).astype('Int64').copy()
35
37
  elif amode=="log":
36
- return np.exp(pd.to_numeric(positions, errors='coerce')) * min_factor
38
+
39
+ return np.power(2, pd.to_numeric(positions, errors='coerce'))* min_factor
37
40
  else:
38
41
  return pd.to_numeric(positions, errors='coerce')
39
42
  else:
@@ -4,23 +4,24 @@ import time
4
4
  import os.path
5
5
 
6
6
  def save_figure(fig, save, keyword, save_args=None, log = Log(), verbose=True):
7
+ log.write("Start to save figure..." ,verbose=verbose)
7
8
  if save_args is None:
8
9
  save_args = {}
9
10
  if save:
10
- if verbose: log.write("Saving plot:")
11
11
  if save==True:
12
12
  default_path = get_default_path(keyword)
13
13
  fig.savefig(default_path, bbox_inches="tight",**save_args)
14
- log.write(" -Saved to "+ default_path + " successfully!" )
14
+ log.write(" -Saved to "+ default_path + " successfully!" ,verbose=verbose)
15
15
  else:
16
16
  if os.path.exists(save):
17
17
  fig.savefig(save,bbox_inches="tight",**save_args)
18
- log.write(" -Saved to "+ save + " successfully! (overwrite)" )
18
+ log.write(" -Saved to "+ save + " successfully! (overwrite)" ,verbose=verbose)
19
19
  else:
20
20
  fig.savefig(save,bbox_inches="tight",**save_args)
21
- log.write(" -Saved to "+ save + " successfully!" )
21
+ log.write(" -Saved to "+ save + " successfully!" ,verbose=verbose)
22
22
  else:
23
- log.write(" -Skip saving figures!" )
23
+ log.write(" -Skip saving figure!" ,verbose=verbose)
24
+ log.write("Finished saving figure..." ,verbose=verbose)
24
25
 
25
26
  def get_default_path(keyword,fmt="png"):
26
27
  path_dictionary = {
@@ -72,11 +72,11 @@ def plotdaf(sumstats,
72
72
 
73
73
 
74
74
  sumstats = sumstats.loc[(~sumstats[eaf].isna())&(~sumstats[daf].isna()),[snpid,eaf,daf]+alleles].copy()
75
- sumstats.loc[:,daf] = sumstats.loc[:,daf].astype("float")
76
- sumstats.loc[:,eaf] = sumstats.loc[:,eaf].astype("float")
75
+ sumstats[daf] = sumstats[daf].astype("float")
76
+ sumstats[eaf] = sumstats[eaf].astype("float")
77
77
  if verbose: log.write(" -Plotting valriants:" + str(len(sumstats)))
78
78
 
79
- sumstats.loc[:,"RAF"]=sumstats[eaf] - sumstats[daf]
79
+ sumstats["RAF"]=sumstats[eaf] - sumstats[daf]
80
80
  sns.set_style("ticks")
81
81
  fig, (ax1, ax2) = plt.subplots(1, 2,**plt_args)
82
82
  ax1.scatter(sumstats["RAF"],sumstats[eaf],label="Non-outlier", **scatter_args)
@@ -119,9 +119,9 @@ def plotdaf(sumstats,
119
119
  ax1.set_ylim([0,1])
120
120
 
121
121
 
122
- sumstats.loc[:,"ID"] = sumstats.index
122
+ sumstats["ID"] = sumstats.index
123
123
 
124
- to_plot = pd.melt(sumstats,id_vars=['ID'], value_vars=['EAF',"RAF"], var_name='Types', value_name='Allele Frequency')
124
+ to_plot = pd.melt(sumstats,id_vars=['ID'], value_vars=['EAF',"RAF"], var_name='Types', value_name='Allele Frequency').dropna()
125
125
 
126
126
  sns.histplot(data=to_plot, x="Allele Frequency", hue="Types", fill=True, ax=ax2, legend = legend2 ,**histplot_args)
127
127
  ax2.set_xlabel("Allele Frequency",**font_args)
@@ -36,6 +36,7 @@ def compare_effect(path1,
36
36
  wc_correction=False,
37
37
  null_beta=0,
38
38
  is_q=False,
39
+ is_q_mc = False,
39
40
  include_all=True,
40
41
  q_level=0.05,
41
42
  sig_level=5e-8,
@@ -530,9 +531,10 @@ def compare_effect(path1,
530
531
  if (is_q is True):
531
532
  if verbose: log.write(" -Calculating Cochran's Q statistics and peform chisq test...")
532
533
  if mode=="beta" or mode=="BETA" or mode=="Beta":
533
- sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level)
534
+ sig_list_merged = test_q(sig_list_merged,"EFFECT_1","SE_1","EFFECT_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
534
535
  else:
535
- sig_list_merged = test_q(sig_list_merged,"BETA_1","SE_1","BETA_2_aligned","SE_2",q_level=q_level)
536
+ sig_list_merged = test_q(sig_list_merged,"BETA_1","SE_1","BETA_2_aligned","SE_2",q_level=q_level,is_q_mc=is_q_mc, log=log, verbose=verbose)
537
+
536
538
  ######################### save ###############################################################
537
539
  ## save the merged data
538
540
  save_path = label[0]+"_"+label[1]+"_beta_sig_list_merged.tsv"
@@ -806,8 +808,15 @@ def compare_effect(path1,
806
808
  if legend_mode == "full" and is_q==True :
807
809
  title_proxy = Rectangle((0,0), 0, 0, color='w',label=legend_title)
808
810
  title_proxy2 = Rectangle((0,0), 0, 0, color='w',label=legend_title2)
809
- het_label_sig = r"$P_{het} < $" + "${}$".format(q_level)
810
- het_label_sig2 = r"$P_{het} > $" + "${}$".format(q_level)
811
+ if is_q_mc=="fdr":
812
+ het_label_sig = r"$FDR_{het} < $" + "${}$".format(q_level)
813
+ het_label_sig2 = r"$FDR_{het} > $" + "${}$".format(q_level)
814
+ elif is_q_mc=="bon":
815
+ het_label_sig = r"$P_{het,bon} < $" + "${}$".format(q_level)
816
+ het_label_sig2 = r"$P_{het,bon} > $" + "${}$".format(q_level)
817
+ else:
818
+ het_label_sig = r"$P_{het} < $" + "${}$".format(q_level)
819
+ het_label_sig2 = r"$P_{het} > $" + "${}$".format(q_level)
811
820
  het_sig = Rectangle((0,0), 0, 0, facecolor='#cccccc',edgecolor="black", linewidth=1, label=het_label_sig)
812
821
  het_nonsig = Rectangle((0,0), 0, 0, facecolor='#cccccc',edgecolor="white",linewidth=1, label=het_label_sig2)
813
822
 
@@ -876,7 +885,7 @@ def reorderLegend(ax=None, order=None, add=None):
876
885
  new_handles = [info[l] for l in order]
877
886
  return new_handles, order
878
887
 
879
- def test_q(df,beta1,se1,beta2,se2,q_level=0.05):
888
+ def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose=False):
880
889
  w1="Weight_1"
881
890
  w2="Weight_2"
882
891
  beta="BETA_FE"
@@ -891,6 +900,14 @@ def test_q(df,beta1,se1,beta2,se2,q_level=0.05):
891
900
  df[q] = df[w1]*(df[beta1]-df[beta])**2 + df[w2]*(df[beta2]-df[beta])**2
892
901
  df[pq] = ss.chi2.sf(df[q], 1)
893
902
  df["Edge_color"]="white"
903
+
904
+ if is_q_mc=="fdr":
905
+ if verbose: log.write(" -FDR correction applied...")
906
+ df[pq] = ss.false_discovery_control(df[pq])
907
+ elif is_q_mc=="bon":
908
+ if verbose: log.write(" -Bonferroni correction applied...")
909
+ df[pq] = df[pq] * len(df[pq])
910
+
894
911
  df.loc[df[pq]<q_level,"Edge_color"]="black"
895
912
  df.drop(columns=["Weight_1","Weight_2","BETA_FE"],inplace=True)
896
913
  # Huedo-Medina, T. B., Sánchez-Meca, J., Marín-Martínez, F., & Botella, J. (2006). Assessing heterogeneity in meta-analysis: Q statistic or I² index?. Psychological methods, 11(2), 193.
@@ -38,6 +38,7 @@ from gwaslab.g_Sumstats import Sumstats
38
38
  from gwaslab.viz_aux_save_figure import save_figure
39
39
  from gwaslab.viz_plot_mqqplot import mqqplot
40
40
  from gwaslab.g_version import _get_version
41
+
41
42
  def plot_miami2(
42
43
  path1=None,
43
44
  path2=None,
@@ -86,7 +87,7 @@ def plot_miami2(
86
87
  log=Log(),
87
88
  **mqq_args
88
89
  ):
89
-
90
+ log.write("Start to create miami plot {}:".format(_get_version()), verbose=verbose)
90
91
  ## figuring arguments ###########################################################################################################
91
92
  # figure columns to use
92
93
  if scaled == True:
@@ -98,6 +99,7 @@ def plot_miami2(
98
99
  cols = ["CHR","POS","MLOG10P"]
99
100
  else:
100
101
  cols = ["CHR","POS","P"]
102
+
101
103
  if cols1 is None:
102
104
  cols1 = cols.copy()
103
105
  if cols2 is None:
@@ -151,9 +153,8 @@ def plot_miami2(
151
153
  fig_args, scatter_args = _figure_args_for_vector_plot(save, fig_args, scatter_args)
152
154
 
153
155
  # add suffix if ids are the same
154
- id1, id2, mqq_args1, mqq_args2 = _solve_id_contradictory(id0, id1, id2, mqq_args1, mqq_args2)
155
-
156
- if verbose: log.write("Start to plot miami plot {}:".format(_get_version()))
156
+ id1_1, id2_2, mqq_args1, mqq_args2 = _solve_id_contradictory(id0, id1, id2, mqq_args1, mqq_args2)
157
+
157
158
  if dpi!=100:
158
159
  fig_args["dpi"] = dpi
159
160
  if xtickpad is None:
@@ -176,6 +177,7 @@ def plot_miami2(
176
177
  titles_pad_adjusted[0]= 1 + titles_pad[0]
177
178
  if "anno2" in mqq_args.keys():
178
179
  titles_pad_adjusted[1]= - titles_pad[1]
180
+
179
181
  if merged_sumstats is None:
180
182
  ## load sumstats1 ###########################################################################################################
181
183
  sumstats1 = _figure_type_load_sumstats(name="Sumstats1",
@@ -198,8 +200,8 @@ def plot_miami2(
198
200
  else:
199
201
  cols1[2] += suffixes[0]
200
202
  cols2[2] += suffixes[1]
201
- sumstats1 = merged_sumstats.loc[:,cols1].copy()
202
- sumstats2 = merged_sumstats.loc[:,cols2].copy()
203
+ sumstats1 = merged_sumstats[cols1].copy()
204
+ sumstats2 = merged_sumstats[cols2].copy()
203
205
 
204
206
  ## rename and quick fix ###########################################################################################################
205
207
  renaming_dict1 = {cols1[0]:"CHR",cols1[1]:"POS",cols1[2]:"P"}
@@ -217,7 +219,7 @@ def plot_miami2(
217
219
  ## create merge index ###########################################################################################################
218
220
  sumstats1 = _quick_add_tchrpos(sumstats1,large_number=large_number, dropchrpos=False, verbose=verbose, log=log)
219
221
  sumstats2 = _quick_add_tchrpos(sumstats2,large_number=large_number, dropchrpos=False, verbose=verbose, log=log)
220
- if verbose: log.write(" -Merging sumstats using chr and pos...")
222
+ log.write(" -Merging sumstats using chr and pos...",verbose=verbose)
221
223
 
222
224
  ###### merge #####################################################################################################
223
225
  merged_sumstats = _quick_merge_sumstats(sumstats1=sumstats1,sumstats2=sumstats2)
@@ -231,7 +233,9 @@ def plot_miami2(
231
233
  drop_chr_start=False)
232
234
 
233
235
  # P_1 scaled_P_1 P_2 scaled_P_2 TCHR+POS CHR POS
234
-
236
+ log.write(" -Columns in merged sumstats: {}".format(",".join(merged_sumstats.columns)), verbose=verbose)
237
+
238
+
235
239
  del(sumstats1)
236
240
  del(sumstats2)
237
241
  garbage_collect.collect()
@@ -243,13 +247,14 @@ def plot_miami2(
243
247
  plt.subplots_adjust(hspace=region_hspace)
244
248
  else:
245
249
  fig, ax1, ax5 = figax
246
-
250
+
251
+ log.write("Start to create Manhattan plot for sumstats1...", verbose=verbose)
247
252
  fig,log = mqqplot(merged_sumstats,
248
253
  chrom="CHR",
249
254
  pos="POS",
250
255
  p="P_1",
251
256
  mlog10p="scaled_P_1",
252
- snpid=id1,
257
+ snpid=id1_1,
253
258
  scaled=scaled1,
254
259
  log=log,
255
260
  mode=mode,
@@ -260,15 +265,16 @@ def plot_miami2(
260
265
  _if_quick_qc=False,
261
266
  **mqq_args1
262
267
  )
268
+ log.write("Finished creating Manhattan plot for sumstats1".format(_get_version()), verbose=verbose)
263
269
 
264
-
270
+ log.write("Start to create Manhattan plot for sumstats2...", verbose=verbose)
265
271
  fig,log = mqqplot(merged_sumstats,
266
272
  chrom="CHR",
267
273
  pos="POS",
268
274
  p="P_2",
269
275
  mlog10p="scaled_P_2",
270
276
  scaled=scaled2,
271
- snpid=id2,
277
+ snpid=id2_2,
272
278
  log=log,
273
279
  mode=mode,
274
280
  figax=(fig,ax5),
@@ -277,7 +283,8 @@ def plot_miami2(
277
283
  _invert=True,
278
284
  _if_quick_qc=False,
279
285
  **mqq_args2)
280
-
286
+ log.write("Finished creating Manhattan plot for sumstats2".format(_get_version()), verbose=verbose)
287
+
281
288
  if same_ylim==True:
282
289
  ylim1_converted = ax1.get_ylim()
283
290
  ylim2_converted = ax5.get_ylim()
@@ -285,8 +292,6 @@ def plot_miami2(
285
292
  ax5.set_ylim(ylim1_converted)
286
293
  else:
287
294
  ax1.set_ylim(ylim2_converted)
288
-
289
-
290
295
  #####################################################################################################################
291
296
 
292
297
  ax5.set_xlabel("")
@@ -337,24 +342,27 @@ def _sort_args_to_12(mqq_args):
337
342
  return mqq_args1, mqq_args2
338
343
 
339
344
  def _solve_id_contradictory(id0, id1, id2, mqq_args1, mqq_args2):
340
- if id1 is not None and id2 is not None:
345
+ if (id1 is not None) and (id2 is not None):
341
346
  if id1 == id2:
342
347
  id1_1 = id1 + "_1"
343
348
  id2_2 = id2 + "_2"
344
349
  if "anno" in mqq_args1.keys():
345
350
  if mqq_args1["anno"] == id1:
346
351
  mqq_args1["anno"] = id1_1
347
- if "anno" in mqq_args1.keys():
348
- if mqq_args1["anno"] == id2:
349
- mqq_args1["anno"] = id2_2
352
+ if "anno" in mqq_args2.keys():
353
+ if mqq_args2["anno"] == id2:
354
+ mqq_args2["anno"] = id2_2
350
355
  else:
351
356
  id1_1 = id1
352
357
  id2_2 = id2
358
+
353
359
  if id1 is None:
354
360
  id1_1 = id0
361
+
355
362
  if id2 is None:
356
363
  id2_2 = id0
357
- return id1_1, id2_2, mqq_args1, mqq_args2
364
+
365
+ return (id1_1, id2_2, mqq_args1, mqq_args2)
358
366
 
359
367
  def _figure_args_for_vector_plot(save, fig_args, scatter_kwargs ):
360
368
  if save is not None: