gwaslab 3.4.37__py3-none-any.whl → 3.4.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (57) hide show
  1. gwaslab/bd_common_data.py +6 -3
  2. gwaslab/bd_download.py +9 -9
  3. gwaslab/bd_get_hapmap3.py +43 -9
  4. gwaslab/data/formatbook.json +722 -721
  5. gwaslab/g_Log.py +22 -5
  6. gwaslab/g_Sumstats.py +110 -163
  7. gwaslab/g_SumstatsPair.py +76 -25
  8. gwaslab/g_SumstatsT.py +2 -2
  9. gwaslab/g_Sumstats_summary.py +3 -3
  10. gwaslab/g_version.py +10 -10
  11. gwaslab/hm_casting.py +36 -17
  12. gwaslab/hm_harmonize_sumstats.py +354 -221
  13. gwaslab/hm_rsid_to_chrpos.py +1 -1
  14. gwaslab/io_preformat_input.py +49 -43
  15. gwaslab/io_read_ldsc.py +49 -1
  16. gwaslab/io_to_formats.py +428 -295
  17. gwaslab/ldsc_irwls.py +198 -0
  18. gwaslab/ldsc_jackknife.py +514 -0
  19. gwaslab/ldsc_ldscore.py +417 -0
  20. gwaslab/ldsc_parse.py +294 -0
  21. gwaslab/ldsc_regressions.py +747 -0
  22. gwaslab/ldsc_sumstats.py +629 -0
  23. gwaslab/qc_check_datatype.py +3 -3
  24. gwaslab/qc_fix_sumstats.py +891 -778
  25. gwaslab/util_ex_calculate_ldmatrix.py +31 -13
  26. gwaslab/util_ex_gwascatalog.py +25 -25
  27. gwaslab/util_ex_ldproxyfinder.py +10 -10
  28. gwaslab/util_ex_ldsc.py +189 -0
  29. gwaslab/util_ex_process_ref.py +3 -3
  30. gwaslab/util_ex_run_coloc.py +26 -4
  31. gwaslab/util_in_calculate_gc.py +6 -6
  32. gwaslab/util_in_calculate_power.py +42 -43
  33. gwaslab/util_in_convert_h2.py +8 -8
  34. gwaslab/util_in_fill_data.py +30 -30
  35. gwaslab/util_in_filter_value.py +201 -74
  36. gwaslab/util_in_get_density.py +10 -10
  37. gwaslab/util_in_get_sig.py +445 -71
  38. gwaslab/viz_aux_annotate_plot.py +12 -12
  39. gwaslab/viz_aux_quickfix.py +42 -37
  40. gwaslab/viz_aux_reposition_text.py +10 -7
  41. gwaslab/viz_aux_save_figure.py +18 -8
  42. gwaslab/viz_plot_compare_af.py +32 -33
  43. gwaslab/viz_plot_compare_effect.py +63 -71
  44. gwaslab/viz_plot_miamiplot2.py +34 -26
  45. gwaslab/viz_plot_mqqplot.py +126 -75
  46. gwaslab/viz_plot_qqplot.py +11 -8
  47. gwaslab/viz_plot_regionalplot.py +36 -33
  48. gwaslab/viz_plot_rg_heatmap.py +28 -26
  49. gwaslab/viz_plot_stackedregional.py +40 -21
  50. gwaslab/viz_plot_trumpetplot.py +65 -61
  51. gwaslab-3.4.39.dist-info/LICENSE +674 -0
  52. {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/METADATA +5 -4
  53. gwaslab-3.4.39.dist-info/RECORD +80 -0
  54. gwaslab-3.4.37.dist-info/RECORD +0 -72
  55. /gwaslab-3.4.37.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
  56. {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
  57. {gwaslab-3.4.37.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0
@@ -21,24 +21,23 @@ def get_power(
21
21
  log=Log(),
22
22
  verbose=True
23
23
  ):
24
- if verbose: log.write(" Start to calculate statistical power...")
24
+ log.write(" Start to calculate statistical power...", verbose=verbose)
25
25
  if mode=="b":
26
- if verbose:
27
- log.write(" -Input settings (b mode):")
28
- log.write(" -Number of cases:{}".format(ncase))
29
- log.write(" -Number of controls:{}".format(ncontrol))
30
- if genotype_rr is not None:
31
- log.write(" -Risk allele RR:{:.3f}".format(genotype_rr))
32
- elif genotype_or is not None:
33
- log.write(" -Risk allele OR:{:.3f}".format(genotype_or))
34
- elif beta is not None:
35
- log.write(" -Risk allele beta:{:.3f}".format(beta))
36
- else:
37
- genotype_rr = 0.1
38
- log.write(" -Risk allele RR:{:.3f}".format(genotype_rr))
39
- log.write(" -Disease prevalence:{:.3f}".format(prevalence))
40
- log.write(" -Risk allele frequency: {:.3f}".format(daf))
41
- log.write(" -Significance level: {:.3e}".format(sig_level))
26
+ log.write(" -Input settings (b mode):", verbose=verbose)
27
+ log.write(" -Number of cases:{}".format(ncase), verbose=verbose)
28
+ log.write(" -Number of controls:{}".format(ncontrol), verbose=verbose)
29
+ if genotype_rr is not None:
30
+ log.write(" -Risk allele RR:{:.3f}".format(genotype_rr), verbose=verbose)
31
+ elif genotype_or is not None:
32
+ log.write(" -Risk allele OR:{:.3f}".format(genotype_or), verbose=verbose)
33
+ elif beta is not None:
34
+ log.write(" -Risk allele beta:{:.3f}".format(beta), verbose=verbose)
35
+ else:
36
+ genotype_rr = 0.1
37
+ log.write(" -Risk allele RR:{:.3f}".format(genotype_rr), verbose=verbose)
38
+ log.write(" -Disease prevalence:{:.3f}".format(prevalence), verbose=verbose)
39
+ log.write(" -Risk allele frequency: {:.3f}".format(daf), verbose=verbose)
40
+ log.write(" -Significance level: {:.3e}".format(sig_level), verbose=verbose)
42
41
  # Skol, A. D., Scott, L. J., Abecasis, G. R., & Boehnke, M. (2006). Joint analysis is more efficient than replication-based analysis for two-stage genome-wide association studies. Nature genetics, 38(2), 209-213.
43
42
  aaf = daf**2
44
43
  abf = 2 * (daf) * (1 - daf)
@@ -56,11 +55,11 @@ def get_power(
56
55
  # https://jamanetwork.com/journals/jama/fullarticle/188182
57
56
 
58
57
  if or_to_rr ==False:
59
- if verbose: log.write(" -Alogorithm: Skol, Andrew D., et al. Nature genetics 38.2 (2006): 209-213....")
60
- if verbose: log.write(" -GRR is approximated using OR. For prevalence < 10%, GRR is very similar to OR....")
58
+ log.write(" -Alogorithm: Skol, Andrew D., et al. Nature genetics 38.2 (2006): 209-213....", verbose=verbose)
59
+ log.write(" -GRR is approximated using OR. For prevalence < 10%, GRR is very similar to OR....", verbose=verbose)
61
60
  else:
62
- if verbose: log.write(" -OR is converted to GRR using base prevalence: {}".format(prevalence))
63
- if verbose: log.write(" -Alogorithm: Zhang, J., & Kai, F. Y. (1998). What's the relative risk?: A method of correcting the odds ratio in cohort studies of common outcomes. Jama, 280(19), 1690-1691.....")
61
+ log.write(" -OR is converted to GRR using base prevalence: {}".format(prevalence), verbose=verbose)
62
+ log.write(" -Alogorithm: Zhang, J., & Kai, F. Y. (1998). What's the relative risk?: A method of correcting the odds ratio in cohort studies of common outcomes. Jama, 280(19), 1690-1691.....", verbose=verbose)
64
63
 
65
64
  # additive
66
65
  x = [ 2*genotype_rr-1, genotype_rr, 1 ]
@@ -68,19 +67,19 @@ def get_power(
68
67
  aap= x[0] * prevalence / (x[0]*aaf + x[1]*abf + x[2]*bbf)
69
68
  abp= x[1] * prevalence / (x[0]*aaf + x[1]*abf + x[2]*bbf)
70
69
  bbp= x[2] * prevalence / (x[0]*aaf + x[1]*abf + x[2]*bbf)
71
- if verbose: log.write("Probability of disease :")
72
- if verbose: log.write(" - Individuals with AA genotype: {:.3f}".format(aap))
73
- if verbose: log.write(" - Individuals with AB genotype: {:.3f}".format(abp))
74
- if verbose: log.write(" - Individuals with BB genotype: {:.3f}".format(bbp))
70
+ log.write("Probability of disease :", verbose=verbose)
71
+ log.write(" - Individuals with AA genotype: {:.3f}".format(aap), verbose=verbose)
72
+ log.write(" - Individuals with AB genotype: {:.3f}".format(abp), verbose=verbose)
73
+ log.write(" - Individuals with BB genotype: {:.3f}".format(bbp), verbose=verbose)
75
74
 
76
75
  pcase= (aap * aaf + abp * abf*0.5) / prevalence
77
76
  pcontrol=((1-aap )* aaf + (1-abp )* abf*0.5) / (1 - prevalence)
78
77
 
79
78
  vcase = pcase *(1-pcase)
80
79
  vcontrol =pcontrol *(1-pcontrol)
81
- if verbose: log.write("Expected risk allele frequency:")
82
- if verbose: log.write(" - In cases: {:.3f}".format(pcase))
83
- if verbose: log.write(" - In controls: {:.3f}".format(pcontrol))
80
+ log.write("Expected risk allele frequency:", verbose=verbose)
81
+ log.write(" - In cases: {:.3f}".format(pcase), verbose=verbose)
82
+ log.write(" - In controls: {:.3f}".format(pcontrol), verbose=verbose)
84
83
 
85
84
  num= (pcase - pcontrol)
86
85
  den= np.sqrt( (vcase/ncase + vcontrol/ncontrol)*0.5 )
@@ -88,22 +87,22 @@ def get_power(
88
87
 
89
88
  c = ss.norm.isf(sig_level/2)
90
89
  power = 1 - ss.norm.cdf(c-u) + ss.norm.cdf(-c-u)
91
- if verbose: log.write("Expected power: {:.3f}".format(power))
90
+ log.write("Expected power: {:.3f}".format(power), verbose=verbose)
92
91
 
93
92
  elif mode=="q":
94
93
  if beta is None:
95
94
  beta = 0.1
96
- if verbose:
97
- log.write(" -Input settings (q mode):")
98
- log.write(" -Significance level: {}".format(sig_level))
99
- log.write(" -EAF: {}".format(eaf))
100
- log.write(" -BETA: {}".format(beta))
101
- log.write(" -N: {}".format(n))
102
- log.write(" -SNPR2: {}".format(2*eaf*(1-eaf)*(beta**2)))
95
+
96
+ log.write(" -Input settings (q mode):", verbose=verbose)
97
+ log.write(" -Significance level: {}".format(sig_level), verbose=verbose)
98
+ log.write(" -EAF: {}".format(eaf), verbose=verbose)
99
+ log.write(" -BETA: {}".format(beta), verbose=verbose)
100
+ log.write(" -N: {}".format(n), verbose=verbose)
101
+ log.write(" -SNPR2: {}".format(2*eaf*(1-eaf)*(beta**2)), verbose=verbose)
103
102
  c = ss.chi2.isf(sig_level,df=1)
104
103
  NCP = n * 2*eaf*(1-eaf)*(beta**2)/vary
105
104
  power = 1 - ss.ncx2.cdf(c, df=1, nc=NCP)
106
- if verbose: log.write("Finished calculating statistical power.")
105
+ log.write("Finished calculating statistical power.", verbose=verbose)
107
106
  return power
108
107
 
109
108
  def get_beta(
@@ -137,11 +136,11 @@ def get_beta(
137
136
  eafs = np.linspace(eaf_range[1],eaf_range[0],n_matrix)
138
137
  betas = np.linspace(beta_range[0],beta_range[1],n_matrix)
139
138
 
140
- if verbose: log.write(" -Updating eaf-beta matrix...")
139
+ log.write(" -Updating eaf-beta matrix...", verbose=verbose)
141
140
  for i in range(n_matrix):
142
141
  eaf_beta_matrix[i,] = calculate_power_single(beta=betas,eaf=eafs[i],n=n,sig_level=sig_level,vary=vary)
143
142
 
144
- if verbose: log.write(" -Extracting eaf-beta combinations with power = {}...".format(t))
143
+ log.write(" -Extracting eaf-beta combinations with power = {}...".format(t), verbose=verbose)
145
144
  i,j=1,1
146
145
  eaf_beta = []
147
146
  while i<n_matrix-1 and j<n_matrix-1:
@@ -207,11 +206,11 @@ def get_beta_binary(
207
206
  eafs = np.linspace(eaf_range[1],eaf_range[0],n_matrix)
208
207
  betas = np.linspace(beta_range[0],beta_range[1],n_matrix)
209
208
 
210
- if verbose: log.write(" -Updating eaf-beta matrix...")
209
+ log.write(" -Updating eaf-beta matrix...", verbose=verbose)
211
210
  if or_to_rr ==False:
212
- if verbose: log.write(" -GRR is approximated using OR. For prevalence < 10%, GRR is very similar to OR....")
211
+ log.write(" -GRR is approximated using OR. For prevalence < 10%, GRR is very similar to OR....", verbose=verbose)
213
212
  else:
214
- if verbose: log.write(" -OR is converted to GRR using base prevalence: {}".format(prevalence))
213
+ log.write(" -OR is converted to GRR using base prevalence: {}".format(prevalence), verbose=verbose)
215
214
 
216
215
  for i in range(n_matrix):
217
216
  eaf_beta_matrix[i,] = calculate_power_single(beta=betas,
@@ -222,7 +221,7 @@ def get_beta_binary(
222
221
  sig_level=sig_level,
223
222
  or_to_rr=or_to_rr)
224
223
 
225
- if verbose: log.write(" -Extracting eaf-beta combinations with power = {}...".format(t))
224
+ log.write(" -Extracting eaf-beta combinations with power = {}...".format(t), verbose=verbose)
226
225
  i,j=1,1
227
226
  eaf_beta = []
228
227
  while i<n_matrix-1 and j<n_matrix-1:
@@ -65,7 +65,7 @@ def _get_per_snp_r2(sumstats,
65
65
  adjuested=False,
66
66
  verbose=True):
67
67
  # Pierce, B. L., Ahsan, H., & VanderWeele, T. J. (2011). Power and instrument strength requirements for Mendelian randomization studies using multiple genetic variants. International journal of epidemiology, 40(3), 740-752.
68
- if verbose: log.write("Start to calculate per-SNP heritibility...")
68
+ log.write("Start to calculate per-SNP heritibility...", verbose=verbose)
69
69
  if type(k) is int or type(k) is float:
70
70
  pass
71
71
  elif k =="all":
@@ -81,18 +81,18 @@ def _get_per_snp_r2(sumstats,
81
81
  # Var(e) = betase**2 * 2 * N * MAF * (1-MAF)
82
82
  # r2 = Var(beta * X) / Var(y)
83
83
 
84
- if verbose: log.write(" -Calculating per-SNP rsq by 2 * (BETA**2) * AF * (1-AF) / Var(y)...")
84
+ log.write(" -Calculating per-SNP rsq by 2 * (BETA**2) * AF * (1-AF) / Var(y)...", verbose=verbose)
85
85
  sumstats["_VAR(BETAX)"] = 2*(sumstats[beta]**2)*sumstats[af]*(1-sumstats[af])
86
86
 
87
87
  if type(vary) is int or type(vary) is float:
88
- if verbose: log.write(" -Var(y) is provided: {}...".format(vary))
88
+ log.write(" -Var(y) is provided: {}...".format(vary), verbose=verbose)
89
89
  sumstats["SNPR2"] = sumstats["_VAR(BETAX)"] / vary
90
90
  elif vary=="se":
91
- if verbose: log.write(" -Var(y) is estimated from VAR(BETA * X), N, MAF, SE: {}...".format(vary))
91
+ log.write(" -Var(y) is estimated from VAR(BETA * X), N, MAF, SE: {}...".format(vary), verbose=verbose)
92
92
  sumstats["_SIGMA2"] = sumstats[se]**2 * 2*(sumstats[n])*sumstats[af]*(1-sumstats[af])
93
93
  sumstats["SNPR2"] = sumstats["_VAR(BETAX)"] / (sumstats["_SIGMA2"] + sumstats["_VAR(BETAX)"])
94
94
  else:
95
- if verbose: log.write(" -Warning: Not enough informationfor calculation.")
95
+ log.warning("Not enough information for calculation.")
96
96
 
97
97
  if mode=="b":
98
98
  if ncase not in sumstats.columns:
@@ -117,11 +117,11 @@ def _get_per_snp_r2(sumstats,
117
117
  else:
118
118
  snpr2 = "SNPR2"
119
119
  if n in sumstats.columns:
120
- if verbose: log.write(" -Calculating F-statistic: F = [(N-k-1)/k] * (r2/1-r2)... where k = {}".format(k))
121
- if verbose: log.write(" -For r2, {} is used.".format(snpr2))
120
+ log.write(" -Calculating F-statistic: F = [(N-k-1)/k] * (r2/1-r2)... where k = {}".format(k), verbose=verbose)
121
+ log.write(" -For r2, {} is used.".format(snpr2), verbose=verbose)
122
122
  sumstats["F"] = sumstats[snpr2]*(sumstats[n]-1 -k)/((1-sumstats[snpr2]) * k)
123
123
 
124
- if verbose: log.write("Finished calculating per-SNP heritibility!")
124
+ log.write("Finished calculating per-SNP heritability!", verbose=verbose)
125
125
  return sumstats
126
126
  #
127
127
  def get_population_allele_frequency(af, prop, odds_ratio, prevalence,eps=1e-15):
@@ -9,7 +9,7 @@ from gwaslab.g_version import _get_version
9
9
  from gwaslab.qc_check_datatype import check_datatype
10
10
 
11
11
  def filldata(
12
- sumstats,
12
+ insumstats,
13
13
  to_fill=None,
14
14
  df=None,
15
15
  overwrite=False,
@@ -23,32 +23,32 @@ def filldata(
23
23
  # if a string is passed to to_fill, convert it to list
24
24
  if type(to_fill) is str:
25
25
  to_fill = [to_fill]
26
-
27
- if verbose: log.write("Start filling data using existing columns...{}".format(_get_version()))
26
+ sumstats = insumstats.copy()
27
+ log.write("Start filling data using existing columns...{}".format(_get_version()), verbose=verbose)
28
28
 
29
29
  check_datatype(sumstats,verbose=verbose,log=log)
30
30
 
31
31
  # check dupication ##############################################################################################
32
32
  skip_cols=[]
33
- if verbose: log.write(" -Overwrite mode: ",overwrite)
33
+ log.write(" -Overwrite mode: ",overwrite, verbose=verbose)
34
34
  if overwrite is False:
35
35
  for i in to_fill:
36
36
  if i in sumstats.columns:
37
37
  skip_cols.append(i)
38
38
  for i in skip_cols:
39
39
  to_fill.remove(i)
40
- if verbose: log.write(" -Skipping columns: ",skip_cols)
40
+ log.write(" -Skipping columns: ",skip_cols, verbose=verbose)
41
41
  if len(set(to_fill) & set(["OR","OR_95L","OR_95U","BETA","SE","P","Z","CHISQ","MLOG10P","MAF"]))==0:
42
42
  log.write(" -No available columns to fill. Skipping.", verbose=verbose)
43
43
  log.write("Finished filling data using existing columns.", verbose=verbose)
44
44
  return sumstats
45
- if verbose: log.write(" -Filling columns: ",to_fill)
45
+ log.write(" -Filling columns: ",to_fill, verbose=verbose)
46
46
  fill_iteratively(sumstats,to_fill,log,only_sig,df,extreme,verbose,sig_level)
47
47
 
48
48
  # ###################################################################################
49
49
  #sumstats = sortcolumn(sumstats, verbose=verbose, log=log)
50
50
  gc.collect()
51
- if verbose: log.write("Finished filling data using existing columns.")
51
+ log.write("Finished filling data using existing columns.", verbose=verbose)
52
52
  return sumstats
53
53
 
54
54
  ##########################################################################################################################
@@ -56,20 +56,20 @@ def filldata(
56
56
  def fill_p(sumstats,log,df=None,only_sig=False,sig_level=5e-8,overwrite=False,verbose=True,filled_count=0):
57
57
  # MLOG10P -> P
58
58
  if "MLOG10P" in sumstats.columns:
59
- if verbose: log.write(" - Filling P value using MLOG10P column...")
59
+ log.write(" - Filling P value using MLOG10P column...", verbose=verbose)
60
60
  sumstats["P"] = np.power(10,-sumstats["MLOG10P"])
61
61
  filled_count +=1
62
62
 
63
63
  # Z -> P
64
64
  elif "Z" in sumstats.columns:
65
- if verbose: log.write(" - Filling P value using Z column...")
65
+ log.write(" - Filling P value using Z column...", verbose=verbose)
66
66
  stats.chisqprob = lambda chisq, degree_of_freedom: stats.chi2.sf(chisq, degree_of_freedom)
67
67
  sumstats["P"] = ss.chisqprob(sumstats["Z"]**2,1)
68
68
  filled_count +=1
69
69
 
70
70
  elif "CHISQ" in sumstats.columns:
71
71
  #CHISQ -> P
72
- if verbose: log.write(" - Filling P value using CHISQ column...")
72
+ log.write(" - Filling P value using CHISQ column...", verbose=verbose)
73
73
  stats.chisqprob = lambda chisq, degree_of_freedom: stats.chi2.sf(chisq, degree_of_freedom)
74
74
  if df is None:
75
75
  if only_sig is True and overwrite is True:
@@ -80,11 +80,11 @@ def fill_p(sumstats,log,df=None,only_sig=False,sig_level=5e-8,overwrite=False,ve
80
80
  filled_count +=1
81
81
  else:
82
82
  if only_sig is True and overwrite is True:
83
- if verbose: log.write(" - Filling P value using CHISQ column for variants:" , sum(sumstats["P"]<sig_level))
83
+ log.write(" - Filling P value using CHISQ column for variants:" , sum(sumstats["P"]<sig_level), verbose=verbose)
84
84
  sumstats.loc[sumstats["P"]<sig_level,"P"] = stats.chisqprob(sumstats.loc[sumstats["P"]<sig_level,"CHISQ"],sumstats.loc[sumstats["P"]<sig_level,df].astype("int"))
85
85
  filled_count +=1
86
86
  else:
87
- if verbose: log.write(" - Filling P value using CHISQ column for all valid variants:")
87
+ log.write(" - Filling P value using CHISQ column for all valid variants:", verbose=verbose)
88
88
  sumstats["P"] = stats.chisqprob(sumstats["CHISQ"],sumstats[df].astype("int"))
89
89
  filled_count +=1
90
90
  else:
@@ -94,7 +94,7 @@ def fill_p(sumstats,log,df=None,only_sig=False,sig_level=5e-8,overwrite=False,ve
94
94
  def fill_z(sumstats,log,verbose=True,filled_count=0):
95
95
  # BETA/SE -> Z
96
96
  if ("BETA" in sumstats.columns) and ("SE" in sumstats.columns):
97
- if verbose: log.write(" - Filling Z using BETA/SE column...")
97
+ log.write(" - Filling Z using BETA/SE column...", verbose=verbose)
98
98
  sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
99
99
  filled_count +=1
100
100
  else:
@@ -104,12 +104,12 @@ def fill_z(sumstats,log,verbose=True,filled_count=0):
104
104
  def fill_chisq(sumstats,log,verbose=True,filled_count=0):
105
105
  # Z -> CHISQ
106
106
  if "Z" in sumstats.columns:
107
- if verbose: log.write(" - Filling CHISQ using Z column...")
107
+ log.write(" - Filling CHISQ using Z column...", verbose=verbose)
108
108
  sumstats["CHISQ"] = (sumstats["Z"])**2
109
109
  filled_count +=1
110
110
  elif "P" in sumstats.columns:
111
111
  # P -> CHISQ
112
- if verbose: log.write(" - Filling CHISQ using P column...")
112
+ log.write(" - Filling CHISQ using P column...", verbose=verbose)
113
113
  sumstats["CHISQ"] = ss.chi2.isf(sumstats["P"], 1)
114
114
  filled_count +=1
115
115
  else:
@@ -119,13 +119,13 @@ def fill_chisq(sumstats,log,verbose=True,filled_count=0):
119
119
  def fill_or(sumstats,log,verbose=True,filled_count=0):
120
120
  # BETA -> OR
121
121
  if "BETA" in sumstats.columns:
122
- if verbose: log.write(" - Filling OR using BETA column...")
122
+ log.write(" - Filling OR using BETA column...", verbose=verbose)
123
123
  sumstats["OR"] = np.exp(sumstats["BETA"])
124
124
  filled_count +=1
125
125
  # BETA/SE -> OR_95L / OR_95U
126
126
  # get confidence interval 95
127
127
  if ("BETA" in sumstats.columns) and ("SE" in sumstats.columns):
128
- if verbose: log.write(" - Filling OR_95L/OR_95U using BETA/SE columns...")
128
+ log.write(" - Filling OR_95L/OR_95U using BETA/SE columns...", verbose=verbose)
129
129
  # beta - 1.96 x se , beta + 1.96 x se
130
130
  sumstats["OR_95L"] = np.exp(sumstats["BETA"]-ss.norm.ppf(0.975)*sumstats["SE"])
131
131
  sumstats["OR_95U"] = np.exp(sumstats["BETA"]+ss.norm.ppf(0.975)*sumstats["SE"])
@@ -136,7 +136,7 @@ def fill_or(sumstats,log,verbose=True,filled_count=0):
136
136
  def fill_or95(sumstats,log,verbose=True,filled_count=0):
137
137
  # get confidence interval 95
138
138
  if ("BETA" in sumstats.columns) and ("SE" in sumstats.columns):
139
- if verbose: log.write(" - Filling OR_95L/OR_95U using BETA/SE columns...")
139
+ log.write(" - Filling OR_95L/OR_95U using BETA/SE columns...", verbose=verbose)
140
140
  # beta - 1.96 x se , beta + 1.96 x se
141
141
  sumstats["OR_95L"] = np.exp(sumstats["BETA"]-ss.norm.ppf(0.975)*sumstats["SE"])
142
142
  sumstats["OR_95U"] = np.exp(sumstats["BETA"]+ss.norm.ppf(0.975)*sumstats["SE"])
@@ -148,7 +148,7 @@ def fill_or95(sumstats,log,verbose=True,filled_count=0):
148
148
  def fill_beta(sumstats,log,verbose=True,filled_count=0):
149
149
  # OR -> beta
150
150
  if "OR" in sumstats.columns:
151
- if verbose: log.write(" - Filling BETA value using OR column...")
151
+ log.write(" - Filling BETA value using OR column...", verbose=verbose)
152
152
  sumstats["BETA"] = np.log(sumstats["OR"])
153
153
  filled_count +=1
154
154
  else:
@@ -158,27 +158,27 @@ def fill_beta(sumstats,log,verbose=True,filled_count=0):
158
158
  def fill_se(sumstats,log,verbose=True,filled_count=0):
159
159
  # OR / OR_95L /OR_95U -> SE
160
160
  if ("P" in sumstats.columns) and ("BETA" in sumstats.columns):
161
- if verbose: log.write(" - Filling SE value using BETA and P column...")
161
+ log.write(" - Filling SE value using BETA and P column...", verbose=verbose)
162
162
  sumstats["SE"]= np.abs(sumstats["BETA"]/ ss.norm.ppf(1-sumstats["P"]/2))
163
163
  filled_count +=1
164
164
  elif ("OR" in sumstats.columns) and ("OR_95U" in sumstats.columns):
165
- if verbose: log.write(" - Filling SE value using OR/OR_95U column...")
165
+ log.write(" - Filling SE value using OR/OR_95U column...", verbose=verbose)
166
166
  #
167
167
  sumstats["SE"]=(np.log(sumstats["OR_95U"]) - np.log(sumstats["OR"]))/ss.norm.ppf(0.975)
168
168
  filled_count +=1
169
169
  elif ("OR" in sumstats.columns) and ("OR_95L" in sumstats.columns):
170
- if verbose: log.write(" - Filling SE value using OR/OR_95L column...")
170
+ log.write(" - Filling SE value using OR/OR_95L column...", verbose=verbose)
171
171
  sumstats["SE"]=(np.log(sumstats["OR"]) - np.log(sumstats["OR_95L"]))/ss.norm.ppf(0.975)
172
172
  filled_count +=1
173
173
  else:
174
- if verbose: log.write(" - Not enough information to fill SE...")
174
+ log.write(" - Not enough information to fill SE...", verbose=verbose)
175
175
  return 0,filled_count
176
176
  return 1,filled_count
177
177
 
178
178
  def fill_mlog10p(sumstats,log,verbose=True,filled_count=0):
179
179
  if "P" in sumstats.columns:
180
180
  # P -> MLOG10P
181
- if verbose: log.write(" - Filling MLOG10P using P column...")
181
+ log.write(" - Filling MLOG10P using P column...", verbose=verbose)
182
182
  sumstats["MLOG10P"] = -np.log10(sumstats["P"])
183
183
  filled_count +=1
184
184
  else:
@@ -188,14 +188,14 @@ def fill_extreme_mlog10p(sumstats,log,verbose=True,filled_count=0):
188
188
  # ref: https://stackoverflow.com/questions/46416027/how-to-compute-p-values-from-z-scores-in-r-when-the-z-score-is-large-pvalue-muc/46416222#46416222
189
189
  if "Z" in sumstats.columns:
190
190
  # P -> MLOG10P
191
- if verbose: log.write(" - Filling MLOG10P using Z column...")
191
+ log.write(" - Filling MLOG10P using Z column...", verbose=verbose)
192
192
  sumstats = fill_extreme_mlog10(sumstats, "Z")
193
193
  filled_count +=1
194
194
  elif "BETA" in sumstats.columns and "SE" in sumstats.columns:
195
- if verbose: log.write(" - Z column not available...")
196
- if verbose: log.write(" - Filling Z using BETA/SE column...")
195
+ log.write(" - Z column not available...", verbose=verbose)
196
+ log.write(" - Filling Z using BETA/SE column...", verbose=verbose)
197
197
  sumstats["Z"] = sumstats["BETA"]/sumstats["SE"]
198
- if verbose: log.write(" - Filling MLOG10P using Z column...")
198
+ log.write(" - Filling MLOG10P using Z column...", verbose=verbose)
199
199
  sumstats = fill_extreme_mlog10(sumstats, "Z")
200
200
  filled_count +=1
201
201
  else:
@@ -205,7 +205,7 @@ def fill_extreme_mlog10p(sumstats,log,verbose=True,filled_count=0):
205
205
  def fill_maf(sumstats,log,verbose=True,filled_count=0):
206
206
  if "EAF" in sumstats.columns:
207
207
  # EAF -> MAF
208
- if verbose: log.write(" - Filling MAF using EAF column...")
208
+ log.write(" - Filling MAF using EAF column...", verbose=verbose)
209
209
  sumstats["MAF"] = sumstats["EAF"].apply(lambda x: min(x,1-x) if pd.notnull(x) else np.nan)
210
210
  filled_count +=1
211
211
  else:
@@ -226,7 +226,7 @@ def fill_extreme_mlog10(sumstats, z):
226
226
  ####################################################################################################################
227
227
  def fill_iteratively(sumstats,raw_to_fill,log,only_sig,df,extreme,verbose,sig_level):
228
228
  to_fill = raw_to_fill.copy()
229
- if verbose: log.write(" - Filling Columns iteratively...")
229
+ log.write(" - Filling Columns iteratively...", verbose=verbose)
230
230
 
231
231
  filled_count=0
232
232
  for i in range(len(to_fill)+1):