gwaslab 3.6.6__py3-none-any.whl → 3.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (110) hide show
  1. gwaslab/__init__.py +57 -47
  2. gwaslab/{bd_common_data.py → bd/bd_common_data.py} +10 -9
  3. gwaslab/bd/bd_config.py +28 -0
  4. gwaslab/{bd_download.py → bd/bd_download.py} +1 -1
  5. gwaslab/{bd_get_hapmap3.py → bd/bd_get_hapmap3.py} +9 -6
  6. gwaslab/bd/bd_path_manager.py +110 -0
  7. gwaslab/data/formatbook.json +805 -9
  8. gwaslab/{ldsc_irwls.py → extension/ldsc/ldsc_irwls.py} +1 -1
  9. gwaslab/{ldsc_regressions.py → extension/ldsc/ldsc_regressions.py} +2 -2
  10. gwaslab/{ldsc_sumstats.py → extension/ldsc/ldsc_sumstats.py} +2 -2
  11. gwaslab/{prscs_mcmc_gtb.py → extension/prscs/prscs_mcmc_gtb.py} +1 -1
  12. gwaslab/g_Sumstats.py +130 -96
  13. gwaslab/g_SumstatsMulti.py +69 -40
  14. gwaslab/g_SumstatsPair.py +54 -37
  15. gwaslab/g_SumstatsSet.py +88 -81
  16. gwaslab/g_SumstatsT.py +6 -6
  17. gwaslab/g_Sumstats_polars.py +84 -84
  18. gwaslab/g_meta_update.py +1 -1
  19. gwaslab/g_vchange_status.py +4 -4
  20. gwaslab/g_version.py +2 -2
  21. gwaslab/{hm_casting.py → hm/hm_casting.py} +4 -4
  22. gwaslab/{hm_casting_polars.py → hm/hm_casting_polars.py} +4 -4
  23. gwaslab/hm/hm_harmonize_sumstats.py +1635 -0
  24. gwaslab/hm_harmonize_sumstats.py +3 -8
  25. gwaslab/{io_load_ld.py → io/io_load_ld.py} +16 -13
  26. gwaslab/{io_preformat_input.py → io/io_preformat_input.py} +152 -73
  27. gwaslab/{io_preformat_input_polars.py → io/io_preformat_input_polars.py} +7 -7
  28. gwaslab/{io_read_pipcs.py → io/io_read_pipcs.py} +2 -2
  29. gwaslab/{io_read_tabular.py → io/io_read_tabular.py} +2 -2
  30. gwaslab/{io_to_formats.py → io/io_to_formats.py} +11 -8
  31. gwaslab/{io_to_pickle.py → io/io_to_pickle.py} +16 -1
  32. gwaslab/{qc_check_datatype_polars.py → qc/qc_check_datatype_polars.py} +2 -2
  33. gwaslab/{qc_fix_sumstats.py → qc/qc_fix_sumstats.py} +60 -33
  34. gwaslab/{qc_fix_sumstats_polars.py → qc/qc_fix_sumstats_polars.py} +15 -11
  35. gwaslab/{util_abf_finemapping.py → util/util_abf_finemapping.py} +2 -2
  36. gwaslab/{util_ex_calculate_ldmatrix.py → util/util_ex_calculate_ldmatrix.py} +18 -8
  37. gwaslab/{util_ex_calculate_prs.py → util/util_ex_calculate_prs.py} +2 -2
  38. gwaslab/{util_ex_ldproxyfinder.py → util/util_ex_ldproxyfinder.py} +6 -6
  39. gwaslab/{util_ex_ldsc.py → util/util_ex_ldsc.py} +18 -13
  40. gwaslab/{util_ex_match_ldmatrix.py → util/util_ex_match_ldmatrix.py} +8 -7
  41. gwaslab/util/util_ex_phewwas.py +117 -0
  42. gwaslab/{util_ex_process_h5.py → util/util_ex_process_h5.py} +2 -2
  43. gwaslab/{util_ex_process_ref.py → util/util_ex_process_ref.py} +2 -2
  44. gwaslab/{util_ex_run_2samplemr.py → util/util_ex_run_2samplemr.py} +18 -7
  45. gwaslab/{util_ex_run_ccgwas.py → util/util_ex_run_ccgwas.py} +4 -4
  46. gwaslab/{util_ex_run_clumping.py → util/util_ex_run_clumping.py} +28 -13
  47. gwaslab/{util_ex_run_coloc.py → util/util_ex_run_coloc.py} +22 -10
  48. gwaslab/{util_ex_run_hyprcoloc.py → util/util_ex_run_hyprcoloc.py} +4 -4
  49. gwaslab/{util_ex_run_magma.py → util/util_ex_run_magma.py} +21 -11
  50. gwaslab/{util_ex_run_mesusie.py → util/util_ex_run_mesusie.py} +3 -3
  51. gwaslab/{util_ex_run_mtag.py → util/util_ex_run_mtag.py} +50 -18
  52. gwaslab/{util_ex_run_prscs.py → util/util_ex_run_prscs.py} +3 -3
  53. gwaslab/{util_ex_run_scdrs.py → util/util_ex_run_scdrs.py} +10 -4
  54. gwaslab/{util_ex_run_susie.py → util/util_ex_run_susie.py} +49 -26
  55. gwaslab/{util_in_fill_data.py → util/util_in_fill_data.py} +1 -1
  56. gwaslab/{util_in_filter_value.py → util/util_in_filter_value.py} +18 -11
  57. gwaslab/{util_in_get_sig.py → util/util_in_get_sig.py} +15 -13
  58. gwaslab/{util_in_meta.py → util/util_in_meta.py} +1 -1
  59. gwaslab/{util_in_meta_polars.py → util/util_in_meta_polars.py} +1 -1
  60. gwaslab/{viz_aux_annotate_plot.py → viz/viz_aux_annotate_plot.py} +1 -1
  61. gwaslab/{viz_aux_quickfix.py → viz/viz_aux_quickfix.py} +2 -2
  62. gwaslab/{viz_plot_compare_af.py → viz/viz_plot_compare_af.py} +1 -1
  63. gwaslab/{viz_plot_compare_effect.py → viz/viz_plot_compare_effect.py} +16 -8
  64. gwaslab/{viz_plot_credible_sets.py → viz/viz_plot_credible_sets.py} +6 -6
  65. gwaslab/{viz_plot_effect.py → viz/viz_plot_effect.py} +37 -69
  66. gwaslab/{viz_plot_miamiplot.py → viz/viz_plot_miamiplot.py} +28 -20
  67. gwaslab/{viz_plot_miamiplot2.py → viz/viz_plot_miamiplot2.py} +27 -22
  68. gwaslab/{viz_plot_mqqplot.py → viz/viz_plot_mqqplot.py} +48 -38
  69. gwaslab/{viz_plot_phe_heatmap.py → viz/viz_plot_phe_heatmap.py} +18 -15
  70. gwaslab/{viz_plot_qqplot.py → viz/viz_plot_qqplot.py} +4 -2
  71. gwaslab/{viz_plot_regional2.py → viz/viz_plot_regional2.py} +11 -9
  72. gwaslab/{viz_plot_regionalplot.py → viz/viz_plot_regionalplot.py} +5 -4
  73. gwaslab/{viz_plot_rg_heatmap.py → viz/viz_plot_rg_heatmap.py} +1 -1
  74. gwaslab/{viz_plot_scatter_with_reg.py → viz/viz_plot_scatter_with_reg.py} +10 -7
  75. gwaslab/{viz_plot_stackedregional.py → viz/viz_plot_stackedregional.py} +67 -33
  76. gwaslab/{viz_plot_trumpetplot.py → viz/viz_plot_trumpetplot.py} +11 -9
  77. {gwaslab-3.6.6.dist-info → gwaslab-3.6.7.dist-info}/METADATA +1 -1
  78. gwaslab-3.6.7.dist-info/RECORD +123 -0
  79. gwaslab/bd_config.py +0 -18
  80. gwaslab-3.6.6.dist-info/RECORD +0 -120
  81. /gwaslab/{ldsc_jackknife.py → extension/ldsc/ldsc_jackknife.py} +0 -0
  82. /gwaslab/{ldsc_ldscore.py → extension/ldsc/ldsc_ldscore.py} +0 -0
  83. /gwaslab/{ldsc_parse.py → extension/ldsc/ldsc_parse.py} +0 -0
  84. /gwaslab/{prscs_gigrnd.py → extension/prscs/prscs_gigrnd.py} +0 -0
  85. /gwaslab/{prscs_parse_genet.py → extension/prscs/prscs_parse_genet.py} +0 -0
  86. /gwaslab/{hm_rsid_to_chrpos.py → hm/hm_rsid_to_chrpos.py} +0 -0
  87. /gwaslab/{io_process_args.py → io/io_process_args.py} +0 -0
  88. /gwaslab/{io_read_ldsc.py → io/io_read_ldsc.py} +0 -0
  89. /gwaslab/{qc_build.py → qc/qc_build.py} +0 -0
  90. /gwaslab/{qc_check_datatype.py → qc/qc_check_datatype.py} +0 -0
  91. /gwaslab/{util_ex_gwascatalog.py → util/util_ex_gwascatalog.py} +0 -0
  92. /gwaslab/{util_ex_infer_ancestry.py → util/util_ex_infer_ancestry.py} +0 -0
  93. /gwaslab/{util_ex_plink_filter.py → util/util_ex_plink_filter.py} +0 -0
  94. /gwaslab/{util_in_calculate_gc.py → util/util_in_calculate_gc.py} +0 -0
  95. /gwaslab/{util_in_calculate_power.py → util/util_in_calculate_power.py} +0 -0
  96. /gwaslab/{util_in_convert_h2.py → util/util_in_convert_h2.py} +0 -0
  97. /gwaslab/{util_in_correct_winnerscurse.py → util/util_in_correct_winnerscurse.py} +0 -0
  98. /gwaslab/{util_in_estimate_ess.py → util/util_in_estimate_ess.py} +0 -0
  99. /gwaslab/{util_in_get_density.py → util/util_in_get_density.py} +0 -0
  100. /gwaslab/{util_in_merge.py → util/util_in_merge.py} +0 -0
  101. /gwaslab/{util_in_snphwe.py → util/util_in_snphwe.py} +0 -0
  102. /gwaslab/{viz_aux_chromatin.py → viz/viz_aux_chromatin.py} +0 -0
  103. /gwaslab/{viz_aux_property.py → viz/viz_aux_property.py} +0 -0
  104. /gwaslab/{viz_aux_reposition_text.py → viz/viz_aux_reposition_text.py} +0 -0
  105. /gwaslab/{viz_aux_save_figure.py → viz/viz_aux_save_figure.py} +0 -0
  106. /gwaslab/{viz_plot_forestplot.py → viz/viz_plot_forestplot.py} +0 -0
  107. {gwaslab-3.6.6.dist-info → gwaslab-3.6.7.dist-info}/WHEEL +0 -0
  108. {gwaslab-3.6.6.dist-info → gwaslab-3.6.7.dist-info}/licenses/LICENSE +0 -0
  109. {gwaslab-3.6.6.dist-info → gwaslab-3.6.7.dist-info}/licenses/LICENSE_before_v3.4.39 +0 -0
  110. {gwaslab-3.6.6.dist-info → gwaslab-3.6.7.dist-info}/top_level.txt +0 -0
@@ -4,9 +4,9 @@ import gc
4
4
  import pandas as pd
5
5
  import numpy as np
6
6
  from gwaslab.g_Log import Log
7
- from gwaslab.util_in_filter_value import _exclude_hla
7
+ from gwaslab.util.util_in_filter_value import _exclude_hla
8
8
 
9
- def _run_magma(sumstats,
9
+ def _run_magma(gls,
10
10
  magma="magma",
11
11
  study="Study1",
12
12
  exclude_hla=True,
@@ -15,7 +15,7 @@ def _run_magma(sumstats,
15
15
  ref=None,
16
16
  ncbi=None,
17
17
  set_annot=None,
18
- out="./",
18
+ out=None,
19
19
  delete=True,
20
20
  ncol="N",
21
21
  build="19",
@@ -24,33 +24,42 @@ def _run_magma(sumstats,
24
24
 
25
25
  log.write(" Start to run magma from command line:", verbose=verbose)
26
26
 
27
+ sumstats = gls.data
28
+ gls.offload()
27
29
  if exclude_hla==True:
28
30
  sumstats = _exclude_hla(sumstats, build =build)
31
+
32
+ if out is None:
33
+ out = os.path.join("./", study)
34
+ else:
35
+ out = os.path.join(out, study)
29
36
 
30
- snploc="{}{}.rsid.chr.pos.tsv".format(out,study)
31
- pval="{}{}.rsid.p.n.tsv".format(out, study)
37
+ snploc="{}.rsid.chr.pos.tsv".format(out)
38
+ pval="{}.rsid.p.n.tsv".format(out)
32
39
 
33
40
  log.write(f" -writing temp file for --snp-loc:{snploc}", verbose=verbose)
34
- sumstats.dropna()[[id_to_use,"CHR","POS"]].rename(columns={id_to_use:"SNP"}).to_csv("{}{}.rsid.chr.pos.tsv".format(out,study),index=None, sep="\t")
41
+ sumstats.dropna()[[id_to_use,"CHR","POS"]].rename(columns={id_to_use:"SNP"}).to_csv("{}.rsid.chr.pos.tsv".format(out),index=None, sep="\t")
35
42
 
36
43
  log.write(f" -writing temp file for --pval:{pval}", verbose=verbose)
37
- sumstats.dropna()[[id_to_use,"P","N"]].rename(columns={id_to_use:"SNP"}).to_csv("{}{}.rsid.p.n.tsv".format(out,study),index=None, sep="\t")
44
+ sumstats.dropna()[[id_to_use,"P","N"]].rename(columns={id_to_use:"SNP"}).to_csv("{}.rsid.p.n.tsv".format(out),index=None, sep="\t")
38
45
 
39
46
  log.write(f" --annotate window: {window}", verbose=verbose)
40
47
  log.write(f" --gene-loc: {ncbi}", verbose=verbose)
41
48
  log.write(f" --bfile: {ref}", verbose=verbose)
42
49
  log.write(f" Output prefix: {out}", verbose=verbose)
43
-
50
+
51
+
52
+
44
53
  bash_script=f'''#!/bin/bash
45
54
 
46
- {magma} --annotate window={window} --snp-loc {snploc} --gene-loc {ncbi} --out {study}
55
+ {magma} --annotate window={window} --snp-loc {snploc} --gene-loc {ncbi} --out {out}
47
56
 
48
- {magma} --bfile {ref} --pval {pval} ncol={ncol} --gene-annot {study}.genes.annot --out {study}
57
+ {magma} --bfile {ref} --pval {pval} ncol={ncol} --gene-annot {out}.genes.annot --out {out}
49
58
  '''
50
59
 
51
60
  if set_annot is not None:
52
61
  bash_script+=f'''
53
- {magma} --gene-results {study}.genes.raw --set-annot {set_annot} --out {study}
62
+ {magma} --gene-results {out}.genes.raw --set-annot {set_annot} --out {out}
54
63
  '''
55
64
  log.write(f"Script: {bash_script}")
56
65
 
@@ -67,5 +76,6 @@ def _run_magma(sumstats,
67
76
  log.warning("ERROR!")
68
77
  log.write(e.output)
69
78
 
79
+ gls.reload()
70
80
  log.write("Finished running magma.", verbose=verbose)
71
81
 
@@ -6,9 +6,9 @@ import numpy as np
6
6
  from gwaslab.g_Log import Log
7
7
  from gwaslab.g_version import _checking_r_version
8
8
  from gwaslab.g_version import _check_susie_version
9
- from gwaslab.qc_fix_sumstats import start_to
10
- from gwaslab.qc_fix_sumstats import finished
11
- from gwaslab.viz_plot_stackedregional import _sort_args
9
+ from gwaslab.qc.qc_fix_sumstats import start_to
10
+ from gwaslab.qc.qc_fix_sumstats import finished
11
+ from gwaslab.viz.viz_plot_stackedregional import _sort_args
12
12
 
13
13
  def _run_mesusie(filepath,
14
14
  r="Rscript",
@@ -4,19 +4,22 @@ import gc
4
4
  import pandas as pd
5
5
  import numpy as np
6
6
  from gwaslab.g_Log import Log
7
+ from gwaslab.bd.bd_path_manager import _path
7
8
 
8
9
  def _run_mtag( sumstats_multi,
9
- python="Rscript",
10
+ python="python",
10
11
  mtag="",
11
12
  study="Group1",
13
+ special_flags="",
14
+ ld_ref_panel = None,
12
15
  traits=None,
13
16
  out_prefix=None,
14
- types=None,
17
+ perfect_gencov = False,
18
+ equal_h2 = False,
19
+ no_overlap = False,
20
+ fdr=False,
15
21
  n_min=0,
16
- loci=None,
17
22
  nstudy=2,
18
- windowsizekb=1000,
19
- build="99",
20
23
  log=Log(),
21
24
  verbose=True):
22
25
 
@@ -49,16 +52,37 @@ def _run_mtag( sumstats_multi,
49
52
  "N_{}".format( i+1) :"n",
50
53
 
51
54
  }
55
+ csv_path = _path(study = study,
56
+ trait = traits_to_form_string[i],
57
+ suffix="tsv.gz")
58
+
59
+ sumstats_multi.data[output_snp_info_cols+ output_stats_cols].rename(columns=rename_dict).to_csv(csv_path, index=None,sep="\t")
60
+ sumstats_paths.append(csv_path)
52
61
 
53
- sumstats_multi[output_snp_info_cols+ output_stats_cols].rename(columns=rename_dict).to_csv("{}_{}.tsv.gz".format(study, traits_to_form_string[i]), index=None,sep="\t")
54
- sumstats_paths.append("{}_{}.tsv.gz".format(study, traits_to_form_string[i]))
62
+ sumstats_multi.offload()
55
63
 
56
64
  python_log=""
57
65
  if out_prefix is None:
58
- out_prefix = "./{study}_{nstudy}studies".format(study=study, nstudy=nstudy)
59
-
66
+ out_prefix = _path(study=study,
67
+ nstudy = nstudy)
68
+
69
+ #out_prefix = "./{study}_{nstudy}studies".format(study=study, nstudy=nstudy)
70
+ if ld_ref_panel is not None:
71
+ ld_ref_flag = "--ld_ref_panel {}".format(ld_ref_panel)
72
+ else:
73
+ ld_ref_flag=""
74
+
75
+ if perfect_gencov == True:
76
+ special_flags += "--perfect_gencov "
77
+ if equal_h2 == True:
78
+ special_flags += "--equal_h2 "
79
+ if no_overlap == True:
80
+ special_flags += "--no_overlap "
81
+ if fdr == True:
82
+ special_flags += "--fdr "
83
+
60
84
  script='''
61
- {python} {mtag} \
85
+ {python} {mtag} {special_flags} {ld_ref_flag} \
62
86
  --sumstats {sumstats_paths_string} \
63
87
  --out {out_prefix} \
64
88
  --n_min {n_min} \
@@ -67,26 +91,34 @@ def _run_mtag( sumstats_multi,
67
91
  python=python,
68
92
  n_min=n_min,
69
93
  mtag=mtag,
94
+ special_flags=special_flags,
70
95
  out_prefix=out_prefix,
96
+ ld_ref_flag=ld_ref_flag,
71
97
  sumstats_paths_string = ",".join(sumstats_paths)
72
98
  )
73
- log.write(" MTAG script: {} ".format(script), verbose=verbose)
99
+ log.write("MTAG script: {} ".format(script), verbose=verbose)
74
100
 
75
-
76
- with open("_{}_gwaslab_mtag_temp.sh".format(study),"w") as file:
101
+ temp_script_path = _path(tmp=True,
102
+ study=study,
103
+ analysis="mtag",
104
+ suffix="sh"
105
+ )
106
+
107
+ with open(temp_script_path,"w") as file:
77
108
  file.write(script)
78
109
 
79
- os.chmod("_{}_gwaslab_mtag_temp.sh".format(study), 0o700)
80
-
81
- script_run = "./_{}_gwaslab_mtag_temp.sh".format(study)
110
+ os.chmod(temp_script_path, 0o700)
82
111
 
83
112
  try:
84
- log.write(" Running MTAG from command line...", verbose=verbose)
85
- output = subprocess.check_output(script_run, stderr=subprocess.STDOUT, shell=True,text=True)
113
+ log.write(" -Running MTAG from command line...", verbose=verbose)
114
+ output = subprocess.check_output(os.path.join(temp_script_path)
115
+ ,stderr=subprocess.STDOUT, shell=True,text=True)
86
116
  log.write(output)
87
117
  python_log+= output + "\n"
88
118
 
89
119
  except subprocess.CalledProcessError as e:
90
120
  log.write(e.output)
91
121
 
122
+ sumstats_multi.reload()
123
+
92
124
  log.write("Finished MTAG.", verbose=verbose)
@@ -20,9 +20,9 @@ import os
20
20
  import sys
21
21
  import getopt
22
22
 
23
- import gwaslab.prscs_parse_genet as parse_genet
24
- import gwaslab.prscs_mcmc_gtb as mcmc_gtb
25
- import gwaslab.prscs_gigrnd as gigrnd
23
+ import gwaslab.extension.prscs.prscs_parse_genet as parse_genet
24
+ import gwaslab.extension.prscs.prscs_mcmc_gtb as mcmc_gtb
25
+ import gwaslab.extension.prscs.prscs_gigrnd as gigrnd
26
26
 
27
27
 
28
28
  def _run_prscs(
@@ -5,7 +5,8 @@ import pandas as pd
5
5
  import numpy as np
6
6
  from gwaslab.g_Log import Log
7
7
 
8
- def _run_scdrs( scdrs="scdrs",
8
+ def _run_scdrs( gls,
9
+ scdrs="scdrs",
9
10
  python="python",
10
11
  study="Study1",
11
12
  conda_env=None,
@@ -32,16 +33,21 @@ def _run_scdrs( scdrs="scdrs",
32
33
 
33
34
  log.write(" Start to run scDRS from command line:", verbose=verbose)
34
35
 
36
+ log.write(f" Output prefix: {out}", verbose=verbose)
37
+ gls.offload()
35
38
  trait = study
39
+
36
40
  if out_file is None:
37
41
  out_file = f"./{trait}.gs"
42
+ out_file = os.path.join(out, out_file)
38
43
  if out_folder is None:
39
- out_folder = f"./"
44
+ out_folder = out
45
+
40
46
  if conda_env is not None:
41
47
  conda_env_string = f"conda init bash\n conda activate {conda_env}\n"
42
48
  else:
43
49
  conda_env_string=""
44
- log.write(f" Output prefix: {out}", verbose=verbose)
50
+
45
51
 
46
52
  if group_analysis is not None:
47
53
  analysis_string = f"--group-analysis {group_analysis} "
@@ -104,5 +110,5 @@ def _run_scdrs( scdrs="scdrs",
104
110
  except subprocess.CalledProcessError as e:
105
111
  log.warning("ERROR!")
106
112
  log.write(e.output)
107
-
113
+ gls.reload()
108
114
  log.write("Finished running scDRS.", verbose=verbose)
@@ -6,22 +6,23 @@ import numpy as np
6
6
  from gwaslab.g_Log import Log
7
7
  from gwaslab.g_version import _checking_r_version
8
8
  from gwaslab.g_version import _check_susie_version
9
- from gwaslab.qc_fix_sumstats import start_to
10
- from gwaslab.qc_fix_sumstats import finished
9
+ from gwaslab.qc.qc_fix_sumstats import start_to
10
+ from gwaslab.qc.qc_fix_sumstats import finished
11
11
 
12
- def _run_susie_rss(filepath,
12
+ def _run_susie_rss(gls,
13
+ filepath,
13
14
  r="Rscript",
14
15
  mode="bs",
15
- max_iter=100000,
16
- min_abs_corr=0.1,
17
- refine="TRUE",
16
+ out=None,
17
+ max_iter=100,
18
+ min_abs_corr=0.5,
19
+ refine="FALSE",
18
20
  L=10,
19
21
  fillldna=True,
20
22
  n=None,
21
23
  delete=False, #if delete output file
22
24
  susie_args="",
23
25
  log=Log(),
24
- main_sumstats=None,
25
26
  verbose=True):
26
27
  ##start function with col checking##########################################################
27
28
  _start_line = "run finemapping using SuSieR from command line"
@@ -44,7 +45,9 @@ def _run_susie_rss(filepath,
44
45
  log.write(" -File path is None.")
45
46
  log.write("Finished finemapping using SuSieR.")
46
47
  return pd.DataFrame()
47
-
48
+
49
+ gls.offload()
50
+
48
51
  filelist = pd.read_csv(filepath,sep="\t")
49
52
  r_log=""
50
53
  # write R script
@@ -52,38 +55,49 @@ def _run_susie_rss(filepath,
52
55
 
53
56
  log = _checking_r_version(r, log)
54
57
  log = _check_susie_version(r,log)
55
-
58
+
56
59
  for index, row in filelist.iterrows():
57
60
  gc.collect()
58
61
  study = row["STUDY"]
59
62
  ld_r_matrix = row["LD_R_MATRIX"] #ld matrix path
60
63
  sumstats = row["LOCUS_SUMSTATS"] #sumsttas path
61
- output_prefix = sumstats.replace(".sumstats.gz","")
64
+
65
+ # out: directory for output files
66
+ if out is None:
67
+ output_prefix = sumstats.replace(".sumstats.gz","")
68
+ else:
69
+ output_prefix = os.path.join(out, os.path.basename(sumstats.replace(".sumstats.gz","")))
70
+
62
71
  log.write(" -Running for: {} - {}".format(row["SNPID"],row["STUDY"] ))
63
72
  log.write(" -Locus sumstats:{}".format(sumstats))
64
73
  log.write(" -LD r matrix:{}".format(ld_r_matrix))
65
74
  log.write(" -output_prefix:{}".format(output_prefix))
66
75
 
67
76
  rscript='''
68
- library(susieR)
69
-
70
- sumstats <- read.csv("{}",sep="\t")
71
-
72
- R <- as.matrix(read.csv("{}",sep="\t",header=FALSE))
73
- {}
77
+ library(susieR)
78
+
79
+ sumstats <- read.csv("{}",sep="\t")
80
+
81
+ R <- as.matrix(read.csv("{}",sep="\t",header=FALSE))
82
+ {}
83
+
84
+ n <- floor(mean(sumstats$N))
74
85
 
75
- n <- floor(mean(sumstats$N))
86
+ fitted_rss1 <- susie_rss({}, n = {}, R = R, max_iter = {}, min_abs_corr={}, refine = {}, L = {}{})
76
87
 
77
- fitted_rss1 <- susie_rss({}, n = {}, R = R, max_iter = {}, min_abs_corr={}, refine = {}, L = {}{})
88
+ susie_fitted_summary <- summary(fitted_rss1)
78
89
 
79
- susie_fitted_summary <- summary(fitted_rss1)
90
+ output <- susie_fitted_summary$vars
91
+ output$SNPID <- sumstats$SNPID[susie_fitted_summary$vars$variable]
92
+ output$LOCUS <- "{}"
93
+ output$STUDY <- "{}"
80
94
 
81
- output <- susie_fitted_summary$vars
82
- output$SNPID <- sumstats$SNPID[susie_fitted_summary$vars$variable]
83
- output$LOCUS <- "{}"
84
- output$STUDY <- "{}"
95
+ write.csv(output, "{}.pipcs", row.names = FALSE)
85
96
 
86
- write.csv(output, "{}.pipcs", row.names = FALSE)
97
+ png(filename="{}_diagnostic.png")
98
+ diagnostic <- kriging_rss({}, R, n=n)
99
+ diagnostic$plot
100
+ dev.off()
87
101
  '''.format(sumstats,
88
102
  ld_r_matrix,
89
103
  "R[is.na(R)] <- 0" if fillldna==True else "",
@@ -96,7 +110,9 @@ def _run_susie_rss(filepath,
96
110
  susie_args,
97
111
  row["SNPID"],
98
112
  row["STUDY"],
99
- output_prefix)
113
+ output_prefix,
114
+ output_prefix,
115
+ "sumstats$Z" if mode=="z" else "sumstats$BETA/sumstats$SE")
100
116
  susier_line = "susie_rss({}, n = {}, R = R, max_iter = {}, min_abs_corr={}, refine = {}, L = {}{})".format("z= sumstats$Z," if mode=="z" else "bhat = sumstats$BETA,shat = sumstats$SE,",
101
117
  n if n is not None else "n",
102
118
  max_iter,
@@ -106,7 +122,12 @@ def _run_susie_rss(filepath,
106
122
  susie_args)
107
123
  log.write(" -SuSieR script: {}".format(susier_line))
108
124
 
125
+ # temporary R script path
109
126
  temp_r_path = "_{}_{}_{}_gwaslab_susie_temp.R".format(study,row["SNPID"],id(sumstats))
127
+ if out is not None:
128
+ temp_r_path = os.path.join(out, temp_r_path)
129
+
130
+
110
131
  log.write(" -Createing temp R script: {}".format(temp_r_path))
111
132
  with open(temp_r_path,"w") as file:
112
133
  file.write(rscript)
@@ -140,8 +161,10 @@ def _run_susie_rss(filepath,
140
161
  os.remove(temp_r_path)
141
162
  log.write(" -Removing temp R script: {}".format(temp_r_path))
142
163
 
164
+ gls.reload()
165
+
143
166
  locus_pip_cs = locus_pip_cs.rename(columns={"variable":"N_SNP","variable_prob":"PIP","cs":"CREDIBLE_SET_INDEX"})
144
- locus_pip_cs = pd.merge(locus_pip_cs, main_sumstats, on="SNPID",how="left")
167
+ locus_pip_cs = pd.merge(locus_pip_cs, gls.data[["SNPID","CHR","POS"]], on="SNPID",how="left")
145
168
 
146
169
  finished(log=log, verbose=verbose, end_line=_end_line)
147
170
  return locus_pip_cs
@@ -7,7 +7,7 @@ from gwaslab.g_Log import Log
7
7
  import gc
8
8
  #from gwaslab.qc_fix_sumstats import sortcolumn
9
9
  from gwaslab.g_version import _get_version
10
- from gwaslab.qc_check_datatype import check_datatype
10
+ from gwaslab.qc.qc_check_datatype import check_datatype
11
11
 
12
12
 
13
13
  def filldata(
@@ -1,17 +1,21 @@
1
1
  import re
2
- #import modin.pandas as pd
3
2
  import pandas as pd
4
3
  import numpy as np
5
4
  from os import path
6
- from gwaslab.bd_common_data import get_high_ld
7
- from gwaslab.bd_common_data import get_chr_to_number
5
+ from pathlib import Path
6
+
8
7
  from gwaslab.g_Log import Log
9
8
  from gwaslab.g_vchange_status import vchange_status
10
- from gwaslab.qc_fix_sumstats import sortcoordinate
11
- from gwaslab.qc_fix_sumstats import start_to
12
- from gwaslab.qc_fix_sumstats import finished
13
- from gwaslab.qc_fix_sumstats import _process_build
14
- from gwaslab.hm_harmonize_sumstats import is_palindromic
9
+
10
+ from gwaslab.qc.qc_fix_sumstats import sortcoordinate
11
+ from gwaslab.qc.qc_fix_sumstats import start_to
12
+ from gwaslab.qc.qc_fix_sumstats import finished
13
+ from gwaslab.qc.qc_fix_sumstats import _process_build
14
+
15
+ from gwaslab.bd.bd_common_data import get_high_ld
16
+ from gwaslab.bd.bd_common_data import get_chr_to_number
17
+
18
+ from gwaslab.hm.hm_harmonize_sumstats import is_palindromic
15
19
 
16
20
  import gc
17
21
  def filtervalues(sumstats,expr,remove=False,verbose=True,log=Log()):
@@ -221,6 +225,8 @@ def inferbuild(sumstats,status="STATUS",chrom="CHR", pos="POS",
221
225
  ea="EA", nea="NEA",build="19",
222
226
  change_status=True,
223
227
  verbose=True,log=Log()):
228
+
229
+
224
230
  ##start function with col checking##########################################################
225
231
  _start_line = "infer genome build version using hapmap3 SNPs"
226
232
  _end_line = "inferring genome build version using hapmap3 SNPs"
@@ -241,8 +247,10 @@ def inferbuild(sumstats,status="STATUS",chrom="CHR", pos="POS",
241
247
 
242
248
  inferred_build="Unknown"
243
249
  log.write("Start to infer genome build version using hapmap3 SNPs...", verbose=verbose)
244
- data_path_19 = path.dirname(__file__) + '/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz'
245
- data_path_38 = path.dirname(__file__) + '/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz'
250
+
251
+ data_path_19 = path.join( Path(__file__).parents[1], "data","hapmap3_SNPs","hapmap3_db150_hg19.snplist.gz")
252
+ data_path_38 = path.join( Path(__file__).parents[1], "data","hapmap3_SNPs","hapmap3_db151_hg38.snplist.gz")
253
+
246
254
  log.write(" -Loading Hapmap3 variants data...", verbose=verbose)
247
255
  hapmap3_ref_19 = pd.read_csv(data_path_19,sep="\s+",usecols=["#CHROM","POS"],dtype={"#CHROM":"string","POS":"string"})
248
256
  hapmap3_ref_38 = pd.read_csv(data_path_38,sep="\s+",usecols=["#CHROM","POS"],dtype={"#CHROM":"string","POS":"string"})
@@ -266,7 +274,6 @@ def inferbuild(sumstats,status="STATUS",chrom="CHR", pos="POS",
266
274
  log.write(" -Since num_hg19 >> num_hg38, assigning genome build hg19...", verbose=verbose)
267
275
  if change_status==True:
268
276
  sumstats[status] = vchange_status(sumstats[status],1,"9","1")
269
- sumstats[status] = vchange_status(sumstats[status],2,"9","9")
270
277
  inferred_build="19"
271
278
  elif match_count_for_19 < match_count_for_38:
272
279
  log.write(" -Since num_hg19 << num_hg38, assigning genome build hg38...", verbose=verbose)
@@ -5,20 +5,22 @@ import gc
5
5
  from pyensembl import EnsemblRelease
6
6
  from pyensembl import Genome
7
7
  from os import path
8
- from gwaslab.util_in_fill_data import fill_p
9
8
  from gwaslab.g_Log import Log
10
- from gwaslab.bd_common_data import get_chr_to_number
11
- from gwaslab.bd_common_data import get_number_to_chr
12
- from gwaslab.bd_common_data import get_chr_to_NC
13
- from gwaslab.bd_common_data import gtf_to_protein_coding
14
- from gwaslab.bd_common_data import gtf_to_all_gene
15
- from gwaslab.bd_download import check_and_download
16
- from gwaslab.util_ex_gwascatalog import gwascatalog_trait
17
- from gwaslab.qc_fix_sumstats import check_dataframe_shape
18
- from gwaslab.qc_fix_sumstats import start_to
19
- from gwaslab.qc_fix_sumstats import finished
20
- from gwaslab.qc_build import _check_build
21
- from gwaslab.util_in_correct_winnerscurse import wc_correct
9
+
10
+ from gwaslab.bd.bd_common_data import get_chr_to_number
11
+ from gwaslab.bd.bd_common_data import get_number_to_chr
12
+ from gwaslab.bd.bd_common_data import get_chr_to_NC
13
+ from gwaslab.bd.bd_common_data import gtf_to_protein_coding
14
+ from gwaslab.bd.bd_common_data import gtf_to_all_gene
15
+ from gwaslab.bd.bd_download import check_and_download
16
+
17
+ from gwaslab.qc.qc_fix_sumstats import check_dataframe_shape
18
+ from gwaslab.qc.qc_fix_sumstats import start_to
19
+ from gwaslab.qc.qc_fix_sumstats import finished
20
+ from gwaslab.qc.qc_build import _check_build
21
+ from gwaslab.util.util_in_correct_winnerscurse import wc_correct
22
+ from gwaslab.util.util_ex_gwascatalog import gwascatalog_trait
23
+ from gwaslab.util.util_in_fill_data import fill_p
22
24
  # getsig
23
25
  # closest_gene
24
26
  # annogene
@@ -4,7 +4,7 @@ import numpy as np
4
4
  from scipy.stats.distributions import chi2
5
5
  from scipy.stats import norm
6
6
  from gwaslab.g_Log import Log
7
- from gwaslab.io_to_pickle import load_data_from_pickle
7
+ from gwaslab.io.io_to_pickle import load_data_from_pickle
8
8
  from gwaslab.g_Sumstats import Sumstats
9
9
  import gc
10
10
 
@@ -4,7 +4,7 @@ import numpy as np
4
4
  from scipy.stats.distributions import chi2
5
5
  from scipy.stats import norm
6
6
  from gwaslab.g_Log import Log
7
- from gwaslab.io_to_pickle import load_data_from_pickle
7
+ from gwaslab.io.io_to_pickle import load_data_from_pickle
8
8
  from gwaslab.g_Sumstats import Sumstats
9
9
  import polars as pl
10
10
  ########################################################################################################################################################################################################################################################################################################################################################
@@ -9,7 +9,7 @@ from scipy import stats
9
9
  from mpl_toolkits.axes_grid1.inset_locator import inset_axes
10
10
  from adjustText import adjust_text
11
11
  from gwaslab.g_Log import Log
12
- from gwaslab.viz_aux_reposition_text import adjust_text_position
12
+ from gwaslab.viz.viz_aux_reposition_text import adjust_text_position
13
13
  from pandas.api.types import is_string_dtype
14
14
 
15
15
  # single mqqplot
@@ -3,8 +3,8 @@ import numpy as np
3
3
  from gwaslab.g_Log import Log
4
4
  from matplotlib import ticker
5
5
  import matplotlib.pyplot as plt
6
- from gwaslab.bd_common_data import get_chr_to_number
7
- from gwaslab.bd_common_data import get_number_to_chr
6
+ from gwaslab.bd.bd_common_data import get_chr_to_number
7
+ from gwaslab.bd.bd_common_data import get_number_to_chr
8
8
  from math import ceil
9
9
 
10
10
  def _quick_fix(sumstats, chr_dict=get_chr_to_number(), scaled=False, chrom="CHR", pos="POS", p="P", mlog10p="MLOG10P",log=Log(), verbose=True):
@@ -4,7 +4,7 @@ import matplotlib.pyplot as plt
4
4
  import scipy.stats as ss
5
5
  import seaborn as sns
6
6
  from gwaslab.g_Log import Log
7
- from gwaslab.viz_aux_save_figure import save_figure
7
+ from gwaslab.viz.viz_aux_save_figure import save_figure
8
8
 
9
9
  ################################################################################################################################
10
10
  def plotdaf(sumstats,
@@ -8,15 +8,18 @@ import math
8
8
  import scipy.stats as ss
9
9
  from matplotlib.patches import Rectangle
10
10
  from adjustText import adjust_text
11
- from gwaslab.viz_aux_save_figure import save_figure
12
- from gwaslab.util_in_get_sig import getsig
13
- from gwaslab.util_in_get_sig import annogene
14
11
  from gwaslab.g_Log import Log
15
- from gwaslab.util_in_correct_winnerscurse import wc_correct
16
- from gwaslab.util_in_correct_winnerscurse import wc_correct_test
17
12
  from gwaslab.g_Sumstats import Sumstats
18
- from gwaslab.io_process_args import _merge_and_sync_dic
19
- from gwaslab.io_process_args import _extract_kwargs
13
+
14
+ from gwaslab.viz.viz_aux_save_figure import save_figure
15
+
16
+ from gwaslab.util.util_in_get_sig import getsig
17
+ from gwaslab.util.util_in_get_sig import annogene
18
+ from gwaslab.util.util_in_correct_winnerscurse import wc_correct
19
+ from gwaslab.util.util_in_correct_winnerscurse import wc_correct_test
20
+
21
+ from gwaslab.io.io_process_args import _merge_and_sync_dic
22
+ from gwaslab.io.io_process_args import _extract_kwargs
20
23
  #20220422
21
24
  def compare_effect(path1,
22
25
  path2,
@@ -91,13 +94,17 @@ def compare_effect(path1,
91
94
  exponent = math.floor(math.log10(sig_level))
92
95
  mantissa = sig_level / 10**exponent
93
96
 
94
- legend_title = '$\mathregular{ P < {} x 10^{{{}}}}$ in:'.format(mantissa, exponent)
97
+ legend_title = '$\mathregular{{ P < {} x 10^{{{}}} }}$ in:'.format(mantissa, exponent)
95
98
 
99
+ # what method to use for correction
96
100
  if is_q_mc=="fdr" or is_q_mc=="bon":
97
101
  is_q = True
102
+
103
+ # if heterogeneity test
98
104
  if is_q == True:
99
105
  if is_q_mc not in [False,"fdr","bon","non"]:
100
106
  raise ValueError('Please select either "fdr" or "bon" or "non"/False for is_q_mc.')
107
+
101
108
  if save_args is None:
102
109
  save_args = {"dpi":300,"facecolor":"white"}
103
110
  if reg_box is None:
@@ -985,6 +992,7 @@ def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose
985
992
  log.write(" -Bonferroni correction applied...", verbose=verbose)
986
993
  df[rawpq] = df[pq]
987
994
  df[pq] = df[pq] * len(df[pq])
995
+ # P value upper bound -> 1
988
996
  df.loc[df[pq]>1,pq] = 1
989
997
 
990
998
  df.loc[df[pq]<q_level,"Edge_color"]="black"
@@ -3,12 +3,12 @@ import matplotlib.pyplot as plt
3
3
  import pandas as pd
4
4
  import seaborn as sns
5
5
  from gwaslab.g_Log import Log
6
- from gwaslab.viz_aux_quickfix import _quick_assign_i_with_rank
7
- from gwaslab.viz_plot_mqqplot import _process_xtick
8
- from gwaslab.viz_plot_mqqplot import _process_xlabel
9
- from gwaslab.bd_common_data import get_number_to_chr
10
- from gwaslab.util_in_filter_value import _filter_region
11
- from gwaslab.io_process_args import _extract_kwargs
6
+ from gwaslab.viz.viz_aux_quickfix import _quick_assign_i_with_rank
7
+ from gwaslab.viz.viz_plot_mqqplot import _process_xtick
8
+ from gwaslab.viz.viz_plot_mqqplot import _process_xlabel
9
+ from gwaslab.bd.bd_common_data import get_number_to_chr
10
+ from gwaslab.util.util_in_filter_value import _filter_region
11
+ from gwaslab.io.io_process_args import _extract_kwargs
12
12
  import copy
13
13
 
14
14
  def _plot_cs(pipcs_raw,