gwaslab 3.4.37__py3-none-any.whl → 3.4.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (57) hide show
  1. gwaslab/bd_common_data.py +6 -3
  2. gwaslab/bd_download.py +9 -9
  3. gwaslab/bd_get_hapmap3.py +43 -9
  4. gwaslab/data/formatbook.json +722 -721
  5. gwaslab/g_Log.py +22 -5
  6. gwaslab/g_Sumstats.py +110 -163
  7. gwaslab/g_SumstatsPair.py +76 -25
  8. gwaslab/g_SumstatsT.py +2 -2
  9. gwaslab/g_Sumstats_summary.py +3 -3
  10. gwaslab/g_version.py +10 -10
  11. gwaslab/hm_casting.py +36 -17
  12. gwaslab/hm_harmonize_sumstats.py +354 -221
  13. gwaslab/hm_rsid_to_chrpos.py +1 -1
  14. gwaslab/io_preformat_input.py +49 -43
  15. gwaslab/io_read_ldsc.py +49 -1
  16. gwaslab/io_to_formats.py +428 -295
  17. gwaslab/ldsc_irwls.py +198 -0
  18. gwaslab/ldsc_jackknife.py +514 -0
  19. gwaslab/ldsc_ldscore.py +417 -0
  20. gwaslab/ldsc_parse.py +294 -0
  21. gwaslab/ldsc_regressions.py +747 -0
  22. gwaslab/ldsc_sumstats.py +629 -0
  23. gwaslab/qc_check_datatype.py +3 -3
  24. gwaslab/qc_fix_sumstats.py +891 -778
  25. gwaslab/util_ex_calculate_ldmatrix.py +31 -13
  26. gwaslab/util_ex_gwascatalog.py +25 -25
  27. gwaslab/util_ex_ldproxyfinder.py +10 -10
  28. gwaslab/util_ex_ldsc.py +189 -0
  29. gwaslab/util_ex_process_ref.py +3 -3
  30. gwaslab/util_ex_run_coloc.py +26 -4
  31. gwaslab/util_in_calculate_gc.py +6 -6
  32. gwaslab/util_in_calculate_power.py +42 -43
  33. gwaslab/util_in_convert_h2.py +8 -8
  34. gwaslab/util_in_fill_data.py +30 -30
  35. gwaslab/util_in_filter_value.py +201 -74
  36. gwaslab/util_in_get_density.py +10 -10
  37. gwaslab/util_in_get_sig.py +445 -71
  38. gwaslab/viz_aux_annotate_plot.py +12 -12
  39. gwaslab/viz_aux_quickfix.py +42 -37
  40. gwaslab/viz_aux_reposition_text.py +10 -7
  41. gwaslab/viz_aux_save_figure.py +18 -8
  42. gwaslab/viz_plot_compare_af.py +32 -33
  43. gwaslab/viz_plot_compare_effect.py +63 -71
  44. gwaslab/viz_plot_miamiplot2.py +34 -26
  45. gwaslab/viz_plot_mqqplot.py +126 -75
  46. gwaslab/viz_plot_qqplot.py +11 -8
  47. gwaslab/viz_plot_regionalplot.py +36 -33
  48. gwaslab/viz_plot_rg_heatmap.py +28 -26
  49. gwaslab/viz_plot_stackedregional.py +40 -21
  50. gwaslab/viz_plot_trumpetplot.py +65 -61
  51. gwaslab-3.4.39.dist-info/LICENSE +674 -0
  52. {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/METADATA +5 -4
  53. gwaslab-3.4.39.dist-info/RECORD +80 -0
  54. gwaslab-3.4.37.dist-info/RECORD +0 -72
  55. /gwaslab-3.4.37.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
  56. {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
  57. {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0
@@ -58,7 +58,7 @@ def annotate_single(
58
58
  annotation_col="CHR:POS"
59
59
  elif anno:
60
60
  annotation_col=anno
61
- if verbose: log.write(" -Annotating using column "+annotation_col+"...")
61
+ log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
62
62
 
63
63
  ## calculate y span
64
64
  if region is not None:
@@ -66,7 +66,7 @@ def annotate_single(
66
66
  else:
67
67
  y_span = sumstats["i"].max()-sumstats["i"].min()
68
68
 
69
- if verbose: log.write(" -Adjusting text positions with repel_force={}...".format(repel_force))
69
+ log.write(" -Adjusting text positions with repel_force={}...".format(repel_force), verbose=verbose)
70
70
  if anno_style == "expand" :
71
71
  to_annotate.loc[:, "ADJUSTED_i"] = adjust_text_position(to_annotate["i"].values.copy(), y_span, repel_force,max_iter=anno_max_iter,log=log,amode=amode,verbose=verbose)
72
72
  ## iterate through variants to be annotated
@@ -214,7 +214,7 @@ def annotate_single(
214
214
  anno_count +=1
215
215
  #anno_adjust_keyargs = {"arrowprops":dict(arrowstyle='->', color='grey', linewidth=0.1,relpos=(0.5,0.5))}
216
216
  if anno_adjust==True:
217
- if verbose: log.write(" -Auto-adjusting text positions...")
217
+ log.write(" -Auto-adjusting text positions...", verbose=verbose)
218
218
  adjust_text(texts = anno_to_adjust_list,
219
219
  autoalign=False,
220
220
  only_move={'points':'x', 'text':'x', 'objects':'x'},
@@ -232,7 +232,7 @@ def annotate_single(
232
232
  )
233
233
 
234
234
  else:
235
- if verbose: log.write(" -Skip annotating")
235
+ log.write(" -Skip annotating", verbose=verbose)
236
236
 
237
237
  return ax1
238
238
 
@@ -275,7 +275,7 @@ def annotate_pair(
275
275
  for index,ax,to_annotate_df,anno_d, anno_alias in [(0,ax1,to_annotate1,anno_d1,anno_alias1),(1,ax5,to_annotate5,anno_d2,anno_alias2)]:
276
276
  ###################### annotate() args
277
277
  if to_annotate_df.empty is True:
278
- if verbose: log.write(" -Skipping annotation...")
278
+ log.write(" -Skipping annotation...", verbose=verbose)
279
279
  continue
280
280
 
281
281
  fontweight = "normal"
@@ -313,7 +313,7 @@ def annotate_pair(
313
313
  annotation_col=anno
314
314
  else:
315
315
  annotation_col=anno+"_"+str(index+1)
316
- if verbose: log.write(" -Annotating using column "+annotation_col+"...")
316
+ log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
317
317
 
318
318
  ## calculate y span
319
319
  if region is not None:
@@ -453,7 +453,7 @@ def annotate_pair(
453
453
  anno_count +=1
454
454
 
455
455
  if anno_adjust==True:
456
- if verbose: log.write(" -Auto-adjusting text positions for plot {}...".format(index))
456
+ log.write(" -Auto-adjusting text positions for plot {}...".format(index), verbose=verbose)
457
457
  if index==0:
458
458
  va="bottom"
459
459
  ha='left'
@@ -476,7 +476,7 @@ def annotate_pair(
476
476
  lim =anno_max_iter
477
477
  )
478
478
  else:
479
- if verbose: log.write(" -Skip annotating")
479
+ log.write(" -Skip annotating", verbose=verbose)
480
480
  return ax1,ax5
481
481
 
482
482
 
@@ -521,7 +521,7 @@ def annotate_subtype(
521
521
  annotation_col="CHR:POS"
522
522
  elif anno:
523
523
  annotation_col=anno
524
- if verbose: log.write(" -Annotating using column "+annotation_col+"...")
524
+ log.write(" -Annotating using column "+annotation_col+"...", verbose=verbose)
525
525
 
526
526
  ## calculate y span
527
527
  if region is not None:
@@ -529,7 +529,7 @@ def annotate_subtype(
529
529
  else:
530
530
  y_span = sumstats["i"].max()-sumstats["i"].min()
531
531
 
532
- if verbose: log.write(" -Adjusting text positions with repel_force={}...".format(repel_force))
532
+ log.write(" -Adjusting text positions with repel_force={}...".format(repel_force), verbose=verbose)
533
533
  if anno_style == "expand" :
534
534
  to_annotate.loc[:, "ADJUSTED_i"] = adjust_text_position(to_annotate["i"].values.copy(), y_span, repel_force,max_iter=anno_max_iter,log=log,verbose=verbose)
535
535
  ## iterate through variants to be annotated
@@ -656,7 +656,7 @@ def annotate_subtype(
656
656
  anno_count +=1
657
657
  #anno_adjust_keyargs = {"arrowprops":dict(arrowstyle='->', color='grey', linewidth=0.1,relpos=(0.5,0.5))}
658
658
  if anno_adjust==True:
659
- if verbose: log.write(" -Auto-adjusting text positions...")
659
+ log.write(" -Auto-adjusting text positions...", verbose=verbose)
660
660
  adjust_text(texts = anno_to_adjust_list,
661
661
  autoalign=False,
662
662
  only_move={'points':'x', 'text':'x', 'objects':'x'},
@@ -674,7 +674,7 @@ def annotate_subtype(
674
674
  )
675
675
 
676
676
  else:
677
- if verbose: log.write(" -Skip annotating")
677
+ log.write(" -Skip annotating", verbose=verbose)
678
678
 
679
679
  return ax1
680
680
 
@@ -5,7 +5,7 @@ from gwaslab.bd_common_data import get_chr_to_number
5
5
  from gwaslab.bd_common_data import get_number_to_chr
6
6
  from math import ceil
7
7
 
8
- def _quick_fix(sumstats, chr_dict=get_chr_to_number(), scaled=False, chrom="CHR", pos="POS", p="P", mlog10p="MLOG10P",verbose=True, log=Log()):
8
+ def _quick_fix(sumstats, chr_dict=get_chr_to_number(), scaled=False, chrom="CHR", pos="POS", p="P", mlog10p="MLOG10P",log=Log(), verbose=True):
9
9
  '''
10
10
  quick sanity check for input sumstats
11
11
  '''
@@ -31,39 +31,40 @@ def _quick_fix_p_value(sumstats, p="P", mlog10p="MLOG10P", scaled=False,verbose=
31
31
  '''
32
32
  if scaled==True:
33
33
  # if scaled, add scaled P and P col
34
- if verbose:log.write(" -P values are already scaled...")
35
- if verbose:log.write(" -Sumstats -log10(P) values are being converted to P...")
34
+ log.write(" -P values are already scaled...", verbose=verbose)
35
+ log.write(" -Sumstats -log10(P) values are being converted to P...", verbose=verbose)
36
36
  sumstats["scaled_P"] = sumstats[mlog10p].copy()
37
37
  sumstats[p]= np.power(10,-sumstats[mlog10p].astype("float64"))
38
38
  return sumstats
39
39
  # bad p : na and outside (0,1]
40
40
  bad_p_value = (sumstats[p].isna()) | (sumstats[p] > 1) | (sumstats[p] <= 0)
41
- if verbose:
42
- log.write(" -Sanity check after conversion: " + str(sum(bad_p_value)) +
43
- " variants with P value outside of (0,1] will be removed...")
41
+
42
+ log.write(" -Sanity check after conversion: " + str(sum(bad_p_value)) +
43
+ " variants with P value outside of (0,1] will be removed...", verbose=verbose)
44
44
  sumstats = sumstats.loc[~bad_p_value, :]
45
45
  return sumstats
46
46
 
47
47
 
48
- def _quick_fix_mlog10p(sumstats,p="P", mlog10p="MLOG10P", scaled=False, verbose=True, log=Log()):
48
+ def _quick_fix_mlog10p(insumstats,p="P", mlog10p="MLOG10P", scaled=False, log=Log(), verbose=True):
49
49
  '''
50
50
  drop variants with bad -log10(P) values
51
51
  '''
52
+ sumstats = insumstats.copy()
52
53
  if scaled != True:
53
- if verbose:log.write(" -Sumstats P values are being converted to -log10(P)...")
54
+ log.write(" -Sumstats P values are being converted to -log10(P)...", verbose=verbose)
54
55
  sumstats["scaled_P"] = -np.log10(sumstats[p].astype("float64"))
55
56
 
56
57
  #with pd.option_context('mode.use_inf_as_na', True):
57
58
  # is_na = sumstats["scaled_P"].isna()
58
59
  if_inf_na = np.isinf(sumstats["scaled_P"]) | sumstats["scaled_P"].isna()
59
- if verbose:
60
- log.write(" -Sanity check: "+str(sum(if_inf_na)) +
61
- " na/inf/-inf variants will be removed...")
60
+
61
+ log.write(" -Sanity check: "+str(sum(if_inf_na)) +
62
+ " na/inf/-inf variants will be removed...", verbose=verbose)
62
63
  sumstats = sumstats.loc[~if_inf_na, :]
63
64
  return sumstats
64
65
 
65
66
 
66
- def _quick_fix_eaf(seires, verbose=True, log=Log()):
67
+ def _quick_fix_eaf(seires,log=Log(), verbose=True):
67
68
  '''
68
69
  conversion of eaf to maf
69
70
  '''
@@ -73,7 +74,7 @@ def _quick_fix_eaf(seires, verbose=True, log=Log()):
73
74
  return seires.copy()
74
75
 
75
76
 
76
- def _quick_fix_chr(seires, chr_dict, verbose=True, log=Log()):
77
+ def _quick_fix_chr(seires, chr_dict,log=Log(), verbose=True):
77
78
  '''
78
79
  conversion and check for chr
79
80
  '''
@@ -84,7 +85,7 @@ def _quick_fix_chr(seires, chr_dict, verbose=True, log=Log()):
84
85
  return seires
85
86
 
86
87
 
87
- def _quick_fix_pos(seires, verbose=True, log=Log()):
88
+ def _quick_fix_pos(seires,log=Log(), verbose=True):
88
89
  '''
89
90
  force conversion for pos
90
91
  '''
@@ -92,7 +93,7 @@ def _quick_fix_pos(seires, verbose=True, log=Log()):
92
93
  return seires
93
94
 
94
95
 
95
- def _get_largenumber(*args, log=Log()):
96
+ def _get_largenumber(*args,log=Log(), verbose=True):
96
97
  '''
97
98
  get a helper large number, >> max(pos)
98
99
  '''
@@ -104,11 +105,11 @@ def _get_largenumber(*args, log=Log()):
104
105
  else:
105
106
  break
106
107
  if i == 7:
107
- log.write(" -Warning: max POS is too large!")
108
+ log.warning("Max POS is too large!")
108
109
  return large_number
109
110
 
110
111
 
111
- def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000, dropchrpos=False, verbose=True, log=Log()):
112
+ def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000, dropchrpos=False,log=Log(), verbose=True):
112
113
  sumstats["TCHR+POS"] = sumstats["CHR"]*large_number + sumstats["POS"]
113
114
  sumstats["TCHR+POS"] = sumstats["TCHR+POS"].astype('Int64')
114
115
  if dropchrpos == True:
@@ -117,7 +118,7 @@ def _quick_add_tchrpos(sumstats, chr="chr", pos="POS", large_number=10000000000,
117
118
  return sumstats
118
119
 
119
120
 
120
- def _quick_merge_sumstats(sumstats1, sumstats2):
121
+ def _quick_merge_sumstats(sumstats1, sumstats2, log=Log(), verbose=True):
121
122
  merged_sumstats = pd.merge(sumstats1, sumstats2, on="TCHR+POS", how="outer", suffixes=('_1', '_2'))
122
123
  merged_sumstats["CHR"] = merged_sumstats["CHR_1"]
123
124
  merged_sumstats["POS"] = merged_sumstats["POS_1"]
@@ -126,7 +127,7 @@ def _quick_merge_sumstats(sumstats1, sumstats2):
126
127
  merged_sumstats = merged_sumstats.drop(labels=["CHR_1", "CHR_2", "POS_1", "POS_2"],axis=1)
127
128
  return merged_sumstats
128
129
 
129
- def _quick_assign_i(sumstats, chrom="CHR",pos="POS"):
130
+ def _quick_assign_i(sumstats, chrom="CHR",pos="POS",log=Log(), verbose=True):
130
131
  # sort by CHR an POS
131
132
  sumstats = sumstats.sort_values([chrom,pos])
132
133
  # set new id
@@ -158,7 +159,7 @@ def _quick_assign_i(sumstats, chrom="CHR",pos="POS"):
158
159
  sumstats["i"] = np.floor(pd.to_numeric(sumstats["i"], errors='coerce')).astype('Int64')
159
160
  return sumstats, chrom_df
160
161
 
161
- def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos="POS",drop_chr_start=False,_posdiccul=None):
162
+ def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos="POS",drop_chr_start=False,_posdiccul=None,log=Log(), verbose=True):
162
163
  # align all variants on a single axis (i)
163
164
  sumstats = sumstats.sort_values([chrom,pos])
164
165
  if use_rank is True:
@@ -218,7 +219,7 @@ def _quick_assign_i_with_rank(sumstats, chrpad, use_rank=False, chrom="CHR",pos=
218
219
  sumstats["i"] = np.floor(pd.to_numeric(sumstats["i"], errors='coerce')).astype('Int64')
219
220
  return sumstats, chrom_df
220
221
 
221
- def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_level=5e-6, lower_level=5e-4):
222
+ def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_level=5e-6, lower_level=5e-4,log=Log(), verbose=True):
222
223
  size_series = series.copy()
223
224
  size_series[:] = 1
224
225
 
@@ -231,7 +232,7 @@ def _quick_assign_marker_relative_size(series, sig_level = 5e-8, suggestive_sig_
231
232
  size_series[is_sig_level] = 4
232
233
  return size_series
233
234
 
234
- def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SNPID",chrom="CHR",pos="POS",verbose=True, log=Log()):
235
+ def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SNPID",chrom="CHR",pos="POS",log=Log(), verbose=True):
235
236
  to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
236
237
  #assign colors: 0 is hightlight color
237
238
  for i,row in to_highlight.iterrows():
@@ -243,7 +244,7 @@ def _quick_assign_highlight_hue(sumstats,highlight,highlight_windowkb, snpid="SN
243
244
  sumstats.loc[right_chr&up_pos&low_pos,"HUE"]="0"
244
245
  return sumstats
245
246
 
246
- def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight_windowkb, chrom="CHR",pos="POS",verbose=True, log=Log()):
247
+ def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight_windowkb, chrom="CHR",pos="POS",log=Log(), verbose=True):
247
248
  #assign colors: 0 is hightlight color
248
249
  to_highlight1 = pd.DataFrame()
249
250
  to_highlight2 = pd.DataFrame()
@@ -271,34 +272,35 @@ def _quick_assign_highlight_hue_pair(sumstats, highlight1, highlight2, highlight
271
272
  sumstats.loc[right_chr&up_pos&low_pos,"HUE2"]="0"
272
273
  return sumstats, to_highlight1, to_highlight2
273
274
 
274
- def _quick_extract_snp_in_region(sumstats, region, chrom="CHR",pos="POS",verbose=True, log=Log()):
275
+ def _quick_extract_snp_in_region(sumstats, region, chrom="CHR",pos="POS",log=Log(), verbose=True):
275
276
  region_chr = region[0]
276
277
  region_start = region[1]
277
278
  region_end = region[2]
278
- if verbose:log.write(" -Extract SNPs in region : chr"+str(region_chr)+":"+str(region[1])+"-"+str(region[2])+ "...")
279
+ log.write(" -Extract SNPs in region : chr"+str(region_chr)+":"+str(region[1])+"-"+str(region[2])+ "...", verbose=verbose)
279
280
  is_in_region_snp = (sumstats[chrom]==region_chr) &(sumstats[pos]<region_end) &(sumstats[pos]>region_start)
280
- if verbose:log.write(" -Extract SNPs in specified regions: "+str(sum(is_in_region_snp)))
281
+ log.write(" -Extract SNPs in specified regions: "+str(sum(is_in_region_snp)), verbose=verbose)
281
282
  sumstats = sumstats.loc[is_in_region_snp,:]
282
283
  return sumstats
283
284
 
284
- def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose,lines_to_plot, log):
285
+ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_plot, log):
286
+ log.write(" -Converting data above cut line...",verbose=verbose)
285
287
  if ylabels is not None:
286
288
  ylabels = pd.Series(ylabels)
287
289
  maxy = series.max()
288
290
  series = series.copy()
289
291
  if "b" not in mode:
290
- if verbose: log.write(" -Maximum -log10(P) value is "+str(maxy) +" .")
292
+ log.write(" -Maximum -log10(P) value is "+str(maxy) +" .", verbose=verbose)
291
293
  elif "b" in mode:
292
- if verbose: log.write(" -Maximum DENSITY value is "+str(maxy) +" .")
294
+ log.write(" -Maximum DENSITY value is "+str(maxy) +" .", verbose=verbose)
293
295
 
294
296
  maxticker=int(np.round(series.max(skipna=True)))
295
297
 
296
298
  if cut:
297
299
  # auto mode : determine curline and cut factor
298
300
  if cut==True:
299
- if verbose: log.write(" -Cut Auto mode is activated...")
301
+ log.write(" -Cut Auto mode is activated...", verbose=verbose)
300
302
  if maxy<30:
301
- if verbose: log.write(" - maxy <30 , no need to cut.")
303
+ log.write(" - maxy <30 , no need to cut.", verbose=verbose)
302
304
  cut=0
303
305
  else:
304
306
  cut = 20
@@ -325,9 +327,9 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose,lines_to_plo
325
327
  else:
326
328
  # cut linear mode
327
329
  if "b" not in mode:
328
- if verbose: log.write(" -Minus log10(P) values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...")
330
+ log.write(" -Minus log10(P) values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...", verbose=verbose)
329
331
  else:
330
- if verbose: log.write(" -Minus DENSITY values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...")
332
+ log.write(" -Minus DENSITY values above " + str(cut)+" will be shrunk with a shrinkage factor of " + str(cutfactor)+"...", verbose=verbose)
331
333
 
332
334
  maxticker=int(np.round(series.max(skipna=True)))
333
335
 
@@ -340,7 +342,7 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose,lines_to_plo
340
342
  #sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"] = (sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"]-cut)/cutfactor + cut
341
343
 
342
344
  maxy = (maxticker-cut)/cutfactor + cut
343
- if verbose: log.write("Finished data conversion and sanity check.")
345
+
344
346
  return series, maxy, maxticker, cut, cutfactor,ylabels,lines_to_plot
345
347
 
346
348
  #def _cut_line(level, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, log):
@@ -362,9 +364,11 @@ def _set_yticklabels(cut,
362
364
  font_family,
363
365
  ytick3,
364
366
  ylabels,
365
- ylabels_converted
367
+ ylabels_converted,
368
+ log=Log(),
369
+ verbose=True
366
370
  ):
367
-
371
+ log.write(" -Processing Y tick lables...",verbose=verbose)
368
372
  # if no cut
369
373
  if cut == 0:
370
374
  ax1.set_ylim(skip, ceil(maxy*1.2) )
@@ -430,7 +434,8 @@ def _set_yticklabels(cut,
430
434
 
431
435
  return ax1
432
436
 
433
- def _jagged_y(cut,skip,ax1,mode,mqqratio,jagged_len,jagged_wid):
437
+ def _jagged_y(cut,skip,ax1,mode,mqqratio,jagged_len,jagged_wid, log=Log(), verbose=True):
438
+ log.write(" -Processing jagged Y axis...",verbose=verbose)
434
439
  tycut = cut +0.3 #(cut - skip)/ (ax1.get_ylim()[1] - skip) + 0.002
435
440
  dy= jagged_len * (cut - skip)
436
441
  x0 = 0
@@ -2,23 +2,25 @@ import pandas as pd
2
2
  import numpy as np
3
3
  from gwaslab.g_Log import Log
4
4
 
5
- def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode="int",log=Log(),verbose=True):
5
+ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode="int",log=Log(),verbose=True, min_factor=None):
6
6
  # check the number of variants to annotate
7
7
  #if repel_force>0:
8
8
  # if 1/(repel_force*2 +0.01) < len(positions):
9
- # if verbose: log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants")
9
+ # log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants")
10
10
  #else:
11
11
  if len(positions)>30:
12
- if verbose: log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants")
12
+ log.write(" -Too many variants to annotate; maybe it is better to reduce the number of variants",verbose=verbose)
13
13
 
14
14
  # calculate the steps
15
15
  if amode=="int":
16
16
  step = int(yspan*repel_force)
17
17
  elif amode=="log":
18
- min_factor = np.min(positions)
18
+ if min_factor is None:
19
+ min_factor = np.min(positions)
19
20
  #(1, max) -> (0, log(max)))
20
- positions = np.log(positions/min_factor)
21
+ positions = np.log2(positions/min_factor)
21
22
  step = max(positions)*repel_force
23
+
22
24
  else:
23
25
  step = yspan*repel_force
24
26
 
@@ -33,7 +35,8 @@ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode=
33
35
  if amode=="int":
34
36
  return np.floor(pd.to_numeric(positions, errors='coerce')).astype('Int64').copy()
35
37
  elif amode=="log":
36
- return np.exp(pd.to_numeric(positions, errors='coerce')) * min_factor
38
+
39
+ return np.power(2, pd.to_numeric(positions, errors='coerce'))* min_factor
37
40
  else:
38
41
  return pd.to_numeric(positions, errors='coerce')
39
42
  else:
@@ -44,7 +47,7 @@ def adjust_text_position(positions, yspan, repel_force=0.01, max_iter=100,amode=
44
47
  move_position_from_center_float(positions, index, step)
45
48
 
46
49
  # when reaching maximum iteration, return anyway
47
- if verbose: log.write(" -Reaching maximum iteration: {}; Skipping...".format(max_iter))
50
+ log.write(" -Reaching maximum iteration: {}; Skipping...".format(max_iter),verbose=verbose)
48
51
  if amode=="int":
49
52
  return np.floor(pd.to_numeric(positions, errors='coerce')).astype('Int64').copy()
50
53
  elif amode=="log":
@@ -4,23 +4,33 @@ import time
4
4
  import os.path
5
5
 
6
6
  def save_figure(fig, save, keyword, save_args=None, log = Log(), verbose=True):
7
+ log.write("Start to save figure..." ,verbose=verbose)
7
8
  if save_args is None:
8
9
  save_args = {}
10
+
9
11
  if save:
10
- if verbose: log.write("Saving plot:")
11
12
  if save==True:
12
13
  default_path = get_default_path(keyword)
13
14
  fig.savefig(default_path, bbox_inches="tight",**save_args)
14
- log.write(" -Saved to "+ default_path + " successfully!" )
15
+ log.write(" -Saved to "+ default_path + " successfully!" ,verbose=verbose)
15
16
  else:
16
- if os.path.exists(save):
17
- fig.savefig(save,bbox_inches="tight",**save_args)
18
- log.write(" -Saved to "+ save + " successfully! (overwrite)" )
17
+ if save[-3:]=="pdf":
18
+ if os.path.exists(save):
19
+ fig.savefig(save, **save_args)
20
+ log.write(" -Saved to "+ save + " successfully! (pdf, overwrite)" ,verbose=verbose)
21
+ else:
22
+ fig.savefig(save, **save_args)
23
+ log.write(" -Saved to "+ save + " successfully! (pdf)" ,verbose=verbose)
19
24
  else:
20
- fig.savefig(save,bbox_inches="tight",**save_args)
21
- log.write(" -Saved to "+ save + " successfully!" )
25
+ if os.path.exists(save):
26
+ fig.savefig(save,bbox_inches="tight",**save_args)
27
+ log.write(" -Saved to "+ save + " successfully! (overwrite)" ,verbose=verbose)
28
+ else:
29
+ fig.savefig(save,bbox_inches="tight",**save_args)
30
+ log.write(" -Saved to "+ save + " successfully!" ,verbose=verbose)
22
31
  else:
23
- log.write(" -Skip saving figures!" )
32
+ log.write(" -Skip saving figure!" ,verbose=verbose)
33
+ log.write("Finished saving figure..." ,verbose=verbose)
24
34
 
25
35
  def get_default_path(keyword,fmt="png"):
26
36
  path_dictionary = {
@@ -10,6 +10,7 @@ from gwaslab.viz_aux_save_figure import save_figure
10
10
  def plotdaf(sumstats,
11
11
  eaf="EAF",
12
12
  daf="DAF",
13
+ raf="RAF",
13
14
  threshold=0.16,
14
15
  xlabel="Alternative Allele Frequency in Reference Population (RAF)",
15
16
  ylabel="Effect Allele Frequency in Sumstats (EAF)",
@@ -43,7 +44,7 @@ def plotdaf(sumstats,
43
44
  if plt_args is None:
44
45
  plt_args={"figsize":(8,4),"dpi":300}
45
46
  if histplot_args is None:
46
- histplot_args={"log_scale":(False,True)}
47
+ histplot_args={"log_scale":(False,False)}
47
48
  if reg_line_args is None:
48
49
  reg_line_args={"color":'#cccccc', "linestyle":'--'}
49
50
  if threshold_line_args is None:
@@ -55,8 +56,9 @@ def plotdaf(sumstats,
55
56
  if save_args is None:
56
57
  save_args = {}
57
58
 
58
- if verbose: log.write("Start to plot Reference frequency vs Effect allele frequency plot...")
59
- if not ((eaf in sumstats.columns) and (daf in sumstats.columns)):
59
+ log.write("Start to plot allele frequency comparison plot...", verbose=verbose)
60
+
61
+ if not ((eaf in sumstats.columns) and ((daf in sumstats.columns)) or (raf in sumstats.columns)):
60
62
  raise ValueError("EAF and/or DAF columns were not detected.")
61
63
 
62
64
  if "SNPID" in sumstats.columns:
@@ -70,40 +72,42 @@ def plotdaf(sumstats,
70
72
  if "NEA" in sumstats.columns:
71
73
  alleles.append("NEA")
72
74
 
75
+ if daf not in sumstats.columns:
76
+ sumstats[daf] = sumstats[eaf] - sumstats[raf]
73
77
 
74
78
  sumstats = sumstats.loc[(~sumstats[eaf].isna())&(~sumstats[daf].isna()),[snpid,eaf,daf]+alleles].copy()
75
- sumstats.loc[:,daf] = sumstats.loc[:,daf].astype("float")
76
- sumstats.loc[:,eaf] = sumstats.loc[:,eaf].astype("float")
77
- if verbose: log.write(" -Plotting valriants:" + str(len(sumstats)))
78
-
79
- sumstats.loc[:,"RAF"]=sumstats[eaf] - sumstats[daf]
79
+ sumstats[daf] = sumstats[daf].astype("float")
80
+ sumstats[eaf] = sumstats[eaf].astype("float")
81
+ log.write(" -Plotting valriants:" + str(len(sumstats)), verbose=verbose)
82
+ if raf not in sumstats.columns:
83
+ sumstats[raf] = sumstats[eaf] - sumstats[daf]
80
84
  sns.set_style("ticks")
81
- fig, (ax1, ax2) = plt.subplots(1, 2,**plt_args)
82
- ax1.scatter(sumstats["RAF"],sumstats[eaf],label="Non-outlier", **scatter_args)
85
+ fig, [ax1, ax2] = plt.subplots(1, 2,**plt_args)
86
+ ax1.scatter(sumstats[raf],sumstats[eaf],label="Non-outlier", **scatter_args)
83
87
 
84
88
  if is_threshold is True:
85
89
  is_outliers = sumstats[daf].abs() > threshold
86
90
  if sum(is_outliers)>0:
87
- ax1.scatter(sumstats.loc[is_outliers, "RAF"],sumstats.loc[is_outliers, eaf],label="Outlier", **scatter_args_outlier)
91
+ ax1.scatter(sumstats.loc[is_outliers, raf],sumstats.loc[is_outliers, eaf],label="Outlier", **scatter_args_outlier)
88
92
 
89
93
  if legend1 ==True:
90
94
  ax1.legend()
91
95
 
92
96
  if is_reg is True:
93
- if verbose: log.write(" -Plotting regression line...")
94
- reg = ss.linregress(sumstats["RAF"],sumstats[eaf])
95
- if verbose:log.write(" -Beta = ", reg[0])
96
- if verbose:log.write(" -Intercept = ", reg[1])
97
- if verbose:log.write(" -R2 = ", reg[2])
97
+ log.write(" -Plotting regression line...", verbose=verbose)
98
+ reg = ss.linregress(sumstats[raf],sumstats[eaf])
99
+ log.write(" -Beta = ", reg[0], verbose=verbose)
100
+ log.write(" -Intercept = ", reg[1], verbose=verbose)
101
+ log.write(" -R2 = ", reg[2], verbose=verbose)
98
102
  ax1.axline(xy1=(0,reg[1]),slope=reg[0],zorder=1,**reg_line_args)
99
103
  if r2 is True:
100
104
  ax1.text(0.98,0.02, "$R^2 = {:.3f}$".format(reg[2]), transform=ax1.transAxes, **r2_args)
101
105
 
102
106
  if is_threshold is True:
103
- if verbose: log.write(" -Threshold : " + str(threshold))
107
+ log.write(" -Threshold : " + str(threshold), verbose=verbose)
104
108
  num = sum(np.abs(sumstats[daf])>threshold )
105
- if verbose: log.write(" -Variants with relatively large DAF : ",num )
106
- if verbose: log.write(" -Percentage for variants with relatively large DAF : ",num/len(sumstats) )
109
+ log.write(" -Variants with relatively large DAF : ",num , verbose=verbose)
110
+ log.write(" -Percentage for variants with relatively large DAF : ",num/len(sumstats) , verbose=verbose)
107
111
  ax1.axline(xy1=(0,threshold),slope=1,zorder=1,**threshold_line_args)
108
112
  ax1.axline(xy1=(threshold,0),slope=1,zorder=1,**threshold_line_args)
109
113
 
@@ -119,25 +123,20 @@ def plotdaf(sumstats,
119
123
  ax1.set_ylim([0,1])
120
124
 
121
125
 
122
- sumstats.loc[:,"ID"] = sumstats.index
126
+ sumstats["ID"] = sumstats.index
123
127
 
124
- to_plot = pd.melt(sumstats,id_vars=['ID'], value_vars=['EAF',"RAF"], var_name='Types', value_name='Allele Frequency')
125
-
126
- sns.histplot(data=to_plot, x="Allele Frequency", hue="Types", fill=True, ax=ax2, legend = legend2 ,**histplot_args)
127
- ax2.set_xlabel("Allele Frequency",**font_args)
128
+ to_plot = pd.melt(sumstats,id_vars=['ID'], value_vars=[eaf,raf], var_name='Types', value_name='Allele Frequency').dropna()
128
129
 
130
+ sns.histplot(data=to_plot, x="Allele Frequency",
131
+ hue="Types", fill=True,
132
+ ax=ax2, legend = legend2,
133
+ **histplot_args)
134
+
135
+ ax2.set_xlabel("Allele Frequency",**font_args)
129
136
 
130
137
  plt.tight_layout()
131
138
  save_figure(fig, save, keyword="afc",save_args=save_args, log=log, verbose=verbose)
132
-
133
- #if save:
134
- # if verbose: log.write("Saving plot:")
135
- # if save==True:
136
- # fig.savefig("./allele_frequency_comparison.png",bbox_inches="tight",**save_args)
137
- # log.write(" -Saved to "+ "./allele_frequency_comparison.png" + " successfully!" )
138
- # else:
139
- # fig.savefig(save,bbox_inches="tight",**save_args)
140
- # log.write(" -Saved to "+ save + " successfully!" )
141
139
  sumstats = sumstats.drop(columns="ID")
140
+
142
141
  return fig, sumstats[is_outliers].copy()
143
142