gwaslab 3.6.6__py3-none-any.whl → 3.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (110) hide show
  1. gwaslab/__init__.py +57 -47
  2. gwaslab/{bd_common_data.py → bd/bd_common_data.py} +10 -9
  3. gwaslab/bd/bd_config.py +28 -0
  4. gwaslab/{bd_download.py → bd/bd_download.py} +1 -1
  5. gwaslab/{bd_get_hapmap3.py → bd/bd_get_hapmap3.py} +9 -6
  6. gwaslab/bd/bd_path_manager.py +110 -0
  7. gwaslab/data/formatbook.json +805 -9
  8. gwaslab/{ldsc_irwls.py → extension/ldsc/ldsc_irwls.py} +1 -1
  9. gwaslab/{ldsc_regressions.py → extension/ldsc/ldsc_regressions.py} +2 -2
  10. gwaslab/{ldsc_sumstats.py → extension/ldsc/ldsc_sumstats.py} +2 -2
  11. gwaslab/{prscs_mcmc_gtb.py → extension/prscs/prscs_mcmc_gtb.py} +1 -1
  12. gwaslab/g_Sumstats.py +130 -96
  13. gwaslab/g_SumstatsMulti.py +69 -40
  14. gwaslab/g_SumstatsPair.py +54 -37
  15. gwaslab/g_SumstatsSet.py +88 -81
  16. gwaslab/g_SumstatsT.py +6 -6
  17. gwaslab/g_Sumstats_polars.py +84 -84
  18. gwaslab/g_meta_update.py +1 -1
  19. gwaslab/g_vchange_status.py +4 -4
  20. gwaslab/g_version.py +2 -2
  21. gwaslab/{hm_casting.py → hm/hm_casting.py} +4 -4
  22. gwaslab/{hm_casting_polars.py → hm/hm_casting_polars.py} +4 -4
  23. gwaslab/hm/hm_harmonize_sumstats.py +1635 -0
  24. gwaslab/hm_harmonize_sumstats.py +3 -8
  25. gwaslab/{io_load_ld.py → io/io_load_ld.py} +16 -13
  26. gwaslab/{io_preformat_input.py → io/io_preformat_input.py} +152 -73
  27. gwaslab/{io_preformat_input_polars.py → io/io_preformat_input_polars.py} +7 -7
  28. gwaslab/{io_read_pipcs.py → io/io_read_pipcs.py} +2 -2
  29. gwaslab/{io_read_tabular.py → io/io_read_tabular.py} +2 -2
  30. gwaslab/{io_to_formats.py → io/io_to_formats.py} +11 -8
  31. gwaslab/{io_to_pickle.py → io/io_to_pickle.py} +16 -1
  32. gwaslab/{qc_check_datatype_polars.py → qc/qc_check_datatype_polars.py} +2 -2
  33. gwaslab/{qc_fix_sumstats.py → qc/qc_fix_sumstats.py} +60 -33
  34. gwaslab/{qc_fix_sumstats_polars.py → qc/qc_fix_sumstats_polars.py} +15 -11
  35. gwaslab/{util_abf_finemapping.py → util/util_abf_finemapping.py} +2 -2
  36. gwaslab/{util_ex_calculate_ldmatrix.py → util/util_ex_calculate_ldmatrix.py} +18 -8
  37. gwaslab/{util_ex_calculate_prs.py → util/util_ex_calculate_prs.py} +2 -2
  38. gwaslab/{util_ex_ldproxyfinder.py → util/util_ex_ldproxyfinder.py} +6 -6
  39. gwaslab/{util_ex_ldsc.py → util/util_ex_ldsc.py} +18 -13
  40. gwaslab/{util_ex_match_ldmatrix.py → util/util_ex_match_ldmatrix.py} +8 -7
  41. gwaslab/util/util_ex_phewwas.py +127 -0
  42. gwaslab/{util_ex_process_h5.py → util/util_ex_process_h5.py} +2 -2
  43. gwaslab/{util_ex_process_ref.py → util/util_ex_process_ref.py} +2 -2
  44. gwaslab/{util_ex_run_2samplemr.py → util/util_ex_run_2samplemr.py} +18 -7
  45. gwaslab/{util_ex_run_ccgwas.py → util/util_ex_run_ccgwas.py} +4 -4
  46. gwaslab/{util_ex_run_clumping.py → util/util_ex_run_clumping.py} +28 -13
  47. gwaslab/{util_ex_run_coloc.py → util/util_ex_run_coloc.py} +22 -10
  48. gwaslab/{util_ex_run_hyprcoloc.py → util/util_ex_run_hyprcoloc.py} +4 -4
  49. gwaslab/{util_ex_run_magma.py → util/util_ex_run_magma.py} +21 -11
  50. gwaslab/{util_ex_run_mesusie.py → util/util_ex_run_mesusie.py} +3 -3
  51. gwaslab/{util_ex_run_mtag.py → util/util_ex_run_mtag.py} +50 -18
  52. gwaslab/{util_ex_run_prscs.py → util/util_ex_run_prscs.py} +3 -3
  53. gwaslab/{util_ex_run_scdrs.py → util/util_ex_run_scdrs.py} +10 -4
  54. gwaslab/{util_ex_run_susie.py → util/util_ex_run_susie.py} +49 -26
  55. gwaslab/{util_in_fill_data.py → util/util_in_fill_data.py} +1 -1
  56. gwaslab/{util_in_filter_value.py → util/util_in_filter_value.py} +18 -11
  57. gwaslab/{util_in_get_sig.py → util/util_in_get_sig.py} +15 -13
  58. gwaslab/{util_in_meta.py → util/util_in_meta.py} +1 -1
  59. gwaslab/{util_in_meta_polars.py → util/util_in_meta_polars.py} +1 -1
  60. gwaslab/{viz_aux_annotate_plot.py → viz/viz_aux_annotate_plot.py} +1 -1
  61. gwaslab/{viz_aux_quickfix.py → viz/viz_aux_quickfix.py} +2 -2
  62. gwaslab/{viz_plot_compare_af.py → viz/viz_plot_compare_af.py} +1 -1
  63. gwaslab/{viz_plot_compare_effect.py → viz/viz_plot_compare_effect.py} +16 -8
  64. gwaslab/{viz_plot_credible_sets.py → viz/viz_plot_credible_sets.py} +6 -6
  65. gwaslab/{viz_plot_effect.py → viz/viz_plot_effect.py} +37 -69
  66. gwaslab/{viz_plot_miamiplot.py → viz/viz_plot_miamiplot.py} +28 -20
  67. gwaslab/{viz_plot_miamiplot2.py → viz/viz_plot_miamiplot2.py} +27 -22
  68. gwaslab/{viz_plot_mqqplot.py → viz/viz_plot_mqqplot.py} +100 -46
  69. gwaslab/{viz_plot_phe_heatmap.py → viz/viz_plot_phe_heatmap.py} +18 -15
  70. gwaslab/{viz_plot_qqplot.py → viz/viz_plot_qqplot.py} +12 -28
  71. gwaslab/{viz_plot_regional2.py → viz/viz_plot_regional2.py} +11 -9
  72. gwaslab/{viz_plot_regionalplot.py → viz/viz_plot_regionalplot.py} +5 -4
  73. gwaslab/{viz_plot_rg_heatmap.py → viz/viz_plot_rg_heatmap.py} +1 -1
  74. gwaslab/{viz_plot_scatter_with_reg.py → viz/viz_plot_scatter_with_reg.py} +10 -7
  75. gwaslab/{viz_plot_stackedregional.py → viz/viz_plot_stackedregional.py} +67 -33
  76. gwaslab/{viz_plot_trumpetplot.py → viz/viz_plot_trumpetplot.py} +15 -9
  77. {gwaslab-3.6.6.dist-info → gwaslab-3.6.8.dist-info}/METADATA +1 -1
  78. gwaslab-3.6.8.dist-info/RECORD +123 -0
  79. gwaslab/bd_config.py +0 -18
  80. gwaslab-3.6.6.dist-info/RECORD +0 -120
  81. /gwaslab/{ldsc_jackknife.py → extension/ldsc/ldsc_jackknife.py} +0 -0
  82. /gwaslab/{ldsc_ldscore.py → extension/ldsc/ldsc_ldscore.py} +0 -0
  83. /gwaslab/{ldsc_parse.py → extension/ldsc/ldsc_parse.py} +0 -0
  84. /gwaslab/{prscs_gigrnd.py → extension/prscs/prscs_gigrnd.py} +0 -0
  85. /gwaslab/{prscs_parse_genet.py → extension/prscs/prscs_parse_genet.py} +0 -0
  86. /gwaslab/{hm_rsid_to_chrpos.py → hm/hm_rsid_to_chrpos.py} +0 -0
  87. /gwaslab/{io_process_args.py → io/io_process_args.py} +0 -0
  88. /gwaslab/{io_read_ldsc.py → io/io_read_ldsc.py} +0 -0
  89. /gwaslab/{qc_build.py → qc/qc_build.py} +0 -0
  90. /gwaslab/{qc_check_datatype.py → qc/qc_check_datatype.py} +0 -0
  91. /gwaslab/{util_ex_gwascatalog.py → util/util_ex_gwascatalog.py} +0 -0
  92. /gwaslab/{util_ex_infer_ancestry.py → util/util_ex_infer_ancestry.py} +0 -0
  93. /gwaslab/{util_ex_plink_filter.py → util/util_ex_plink_filter.py} +0 -0
  94. /gwaslab/{util_in_calculate_gc.py → util/util_in_calculate_gc.py} +0 -0
  95. /gwaslab/{util_in_calculate_power.py → util/util_in_calculate_power.py} +0 -0
  96. /gwaslab/{util_in_convert_h2.py → util/util_in_convert_h2.py} +0 -0
  97. /gwaslab/{util_in_correct_winnerscurse.py → util/util_in_correct_winnerscurse.py} +0 -0
  98. /gwaslab/{util_in_estimate_ess.py → util/util_in_estimate_ess.py} +0 -0
  99. /gwaslab/{util_in_get_density.py → util/util_in_get_density.py} +0 -0
  100. /gwaslab/{util_in_merge.py → util/util_in_merge.py} +0 -0
  101. /gwaslab/{util_in_snphwe.py → util/util_in_snphwe.py} +0 -0
  102. /gwaslab/{viz_aux_chromatin.py → viz/viz_aux_chromatin.py} +0 -0
  103. /gwaslab/{viz_aux_property.py → viz/viz_aux_property.py} +0 -0
  104. /gwaslab/{viz_aux_reposition_text.py → viz/viz_aux_reposition_text.py} +0 -0
  105. /gwaslab/{viz_aux_save_figure.py → viz/viz_aux_save_figure.py} +0 -0
  106. /gwaslab/{viz_plot_forestplot.py → viz/viz_plot_forestplot.py} +0 -0
  107. {gwaslab-3.6.6.dist-info → gwaslab-3.6.8.dist-info}/WHEEL +0 -0
  108. {gwaslab-3.6.6.dist-info → gwaslab-3.6.8.dist-info}/licenses/LICENSE +0 -0
  109. {gwaslab-3.6.6.dist-info → gwaslab-3.6.8.dist-info}/licenses/LICENSE_before_v3.4.39 +0 -0
  110. {gwaslab-3.6.6.dist-info → gwaslab-3.6.8.dist-info}/top_level.txt +0 -0
@@ -11,18 +11,22 @@ from gwaslab.g_vchange_status_polars import vchange_statusp
11
11
  from gwaslab.g_vchange_status import status_match
12
12
  from gwaslab.g_vchange_status import change_status
13
13
  from gwaslab.g_Log import Log
14
- from gwaslab.bd_common_data import get_chr_to_number
15
- from gwaslab.bd_common_data import get_number_to_chr
16
- from gwaslab.bd_common_data import get_chr_list
17
- from gwaslab.qc_check_datatype import check_datatype
18
- from gwaslab.qc_check_datatype import check_dataframe_shape
19
- from gwaslab.qc_build import _process_build
20
- from gwaslab.qc_build import _set_build
21
14
  from gwaslab.g_version import _get_version
22
- from gwaslab.util_in_fill_data import _convert_betase_to_mlog10p
23
- from gwaslab.util_in_fill_data import _convert_betase_to_p
24
- from gwaslab.util_in_fill_data import _convert_mlog10p_to_p
25
- from gwaslab.bd_common_data import get_chain
15
+
16
+ from gwaslab.bd.bd_common_data import get_chr_to_number
17
+ from gwaslab.bd.bd_common_data import get_number_to_chr
18
+ from gwaslab.bd.bd_common_data import get_chr_list
19
+ from gwaslab.bd.bd_common_data import get_chain
20
+
21
+ from gwaslab.qc.qc_check_datatype import check_datatype
22
+ from gwaslab.qc.qc_check_datatype import check_dataframe_shape
23
+ from gwaslab.qc.qc_build import _process_build
24
+ from gwaslab.qc.qc_build import _set_build
25
+
26
+ from gwaslab.util.util_in_fill_data import _convert_betase_to_mlog10p
27
+ from gwaslab.util.util_in_fill_data import _convert_betase_to_p
28
+ from gwaslab.util.util_in_fill_data import _convert_mlog10p_to_p
29
+
26
30
  import polars as pl
27
31
  ###############################################################################################################
28
32
  # 20220426
@@ -1,8 +1,8 @@
1
1
  import pandas as pd
2
2
  import numpy as np
3
3
  from gwaslab.g_Log import Log
4
- from gwaslab.util_in_filter_value import _get_flanking_by_chrpos
5
- from gwaslab.util_in_filter_value import _get_flanking_by_id
4
+ from gwaslab.util.util_in_filter_value import _get_flanking_by_chrpos
5
+ from gwaslab.util.util_in_filter_value import _get_flanking_by_id
6
6
 
7
7
  # Calculate PIP based on approximate Bayesian factor (ABF)
8
8
  # Wakefield, J. A bayesian measure of the probability of false discovery in genetic epidemiology studies. Am J Hum Genet 81, 208–227 (2007).
@@ -4,14 +4,14 @@ import gc
4
4
  import pandas as pd
5
5
  import numpy as np
6
6
  from gwaslab.g_Log import Log
7
- from gwaslab.qc_fix_sumstats import start_to
8
- from gwaslab.qc_fix_sumstats import finished
9
- from gwaslab.util_in_get_sig import getsig
10
- from gwaslab.util_ex_process_ref import _process_plink_input_files
7
+ from gwaslab.qc.qc_fix_sumstats import start_to
8
+ from gwaslab.qc.qc_fix_sumstats import finished
9
+ from gwaslab.util.util_in_get_sig import getsig
10
+ from gwaslab.util.util_ex_process_ref import _process_plink_input_files
11
+ from gwaslab.util.util_in_filter_value import _exclude_hla
11
12
  from gwaslab.g_version import _checking_plink_version
12
- from gwaslab.util_in_filter_value import _exclude_hla
13
13
 
14
- def tofinemapping(sumstats,
14
+ def tofinemapping(gls,
15
15
  study=None,
16
16
  bfile=None,
17
17
  vcf=None,
@@ -39,6 +39,9 @@ def tofinemapping(sumstats,
39
39
  _start_cols =["SNPID","CHR","POS","EA","NEA"]
40
40
  _start_function = ".calculate_ld_matrix()"
41
41
  _must_args ={}
42
+
43
+ sumstats = gls.data
44
+ gls.offload()
42
45
 
43
46
  is_enough_info = start_to(sumstats=sumstats,
44
47
  log=log,
@@ -114,7 +117,8 @@ def tofinemapping(sumstats,
114
117
  locus_sumstats=locus_sumstats,
115
118
  ref_bim=ref_bim[0],
116
119
  log=log,suffixes=suffixes)
117
-
120
+ del locus_sumstats
121
+ gc.collect()
118
122
  #########################################################################################################
119
123
  # create matched snp list
120
124
  matched_snp_list_path,matched_sumstats_path=_export_snplist_and_locus_sumstats(matched_sumstats=matched_sumstats,
@@ -144,7 +148,8 @@ def tofinemapping(sumstats,
144
148
  extra_plink_option=extra_plink_option,
145
149
  ref_allele_path = matched_sumstats_path,
146
150
  verbose=verbose)
147
-
151
+ del matched_sumstats
152
+ gc.collect()
148
153
 
149
154
  # print file list
150
155
  row_dict={}
@@ -166,7 +171,12 @@ def tofinemapping(sumstats,
166
171
  output_file_list_path=None
167
172
  log.write(" -No avaialable lead variants.",verbose=verbose)
168
173
  log.write(" -Stopped LD matrix calculation.",verbose=verbose)
174
+
175
+ del sumstats
176
+
169
177
  finished(log=log, verbose=verbose, end_line=_end_line)
178
+ gls.reload()
179
+
170
180
  return output_file_list_path, output_file_list, plink_log
171
181
 
172
182
 
@@ -4,8 +4,8 @@ import gc
4
4
  import pandas as pd
5
5
  import numpy as np
6
6
  from gwaslab.g_Log import Log
7
- from gwaslab.util_in_get_sig import getsig
8
- from gwaslab.util_ex_process_ref import _process_plink_input_files
7
+ from gwaslab.util.util_in_get_sig import getsig
8
+ from gwaslab.util.util_ex_process_ref import _process_plink_input_files
9
9
  from gwaslab.g_version import _checking_plink_version
10
10
 
11
11
  def _calculate_prs(sumstats,
@@ -17,12 +17,12 @@ from mpl_toolkits.axes_grid1.inset_locator import mark_inset
17
17
  from adjustText import adjust_text
18
18
  from gtfparse import read_gtf
19
19
  from gwaslab.g_Log import Log
20
- from gwaslab.bd_common_data import get_chr_to_number
21
- from gwaslab.bd_common_data import get_number_to_chr
22
- from gwaslab.bd_common_data import get_recombination_rate
23
- from gwaslab.bd_common_data import get_gtf
24
- from gwaslab.util_in_filter_value import _get_flanking
25
- from gwaslab.hm_harmonize_sumstats import auto_check_vcf_chr_dict
20
+ from gwaslab.bd.bd_common_data import get_chr_to_number
21
+ from gwaslab.bd.bd_common_data import get_number_to_chr
22
+ from gwaslab.bd.bd_common_data import get_recombination_rate
23
+ from gwaslab.bd.bd_common_data import get_gtf
24
+ from gwaslab.util.util_in_filter_value import _get_flanking
25
+ from gwaslab.hm.hm_harmonize_sumstats import auto_check_vcf_chr_dict
26
26
  # unmatched SNP list 1
27
27
 
28
28
  # for each SNP in unmatched SNP list 1:
@@ -1,17 +1,22 @@
1
- from gwaslab.ldsc_sumstats import estimate_h2
2
- from gwaslab.ldsc_sumstats import estimate_rg
3
- from gwaslab.ldsc_sumstats import cell_type_specific
4
- from gwaslab.g_Log import Log
5
- from gwaslab.qc_fix_sumstats import start_to
6
- from gwaslab.qc_fix_sumstats import finished
7
- from gwaslab.qc_fix_sumstats import skipped
8
- from gwaslab.io_read_ldsc import parse_ldsc_summary
9
- from gwaslab.io_read_ldsc import parse_partitioned_ldsc_summary
10
- from gwaslab.util_in_filter_value import filtervalues
11
- from gwaslab.util_in_filter_value import _filter_palindromic
12
- from gwaslab.util_in_filter_value import _exclude_hla
13
- from gwaslab.util_in_filter_value import _exclude_sexchr
14
1
  import copy
2
+ from gwaslab.g_Log import Log
3
+
4
+ from gwaslab.extension.ldsc.ldsc_sumstats import estimate_h2
5
+ from gwaslab.extension.ldsc.ldsc_sumstats import estimate_rg
6
+ from gwaslab.extension.ldsc.ldsc_sumstats import cell_type_specific
7
+
8
+ from gwaslab.qc.qc_fix_sumstats import start_to
9
+ from gwaslab.qc.qc_fix_sumstats import finished
10
+ from gwaslab.qc.qc_fix_sumstats import skipped
11
+
12
+ from gwaslab.io.io_read_ldsc import parse_ldsc_summary
13
+ from gwaslab.io.io_read_ldsc import parse_partitioned_ldsc_summary
14
+
15
+ from gwaslab.util.util_in_filter_value import filtervalues
16
+ from gwaslab.util.util_in_filter_value import _filter_palindromic
17
+ from gwaslab.util.util_in_filter_value import _exclude_hla
18
+ from gwaslab.util.util_in_filter_value import _exclude_sexchr
19
+
15
20
 
16
21
  class ARGS():
17
22
  def __init__(self, kwargs=None):
@@ -1,7 +1,7 @@
1
1
  import scipy.sparse as sparse
2
2
  import numpy as np
3
3
  import pandas as pd
4
- from gwaslab.hm_casting import _merge_mold_with_sumstats_by_chrpos
4
+
5
5
  import subprocess
6
6
  import os
7
7
  import re
@@ -9,16 +9,17 @@ import gc
9
9
  import pandas as pd
10
10
  import numpy as np
11
11
  from gwaslab.g_Log import Log
12
- from gwaslab.qc_fix_sumstats import start_to
13
- from gwaslab.qc_fix_sumstats import finished
14
- from gwaslab.util_in_get_sig import getsig
15
- from gwaslab.util_ex_process_ref import _process_plink_input_files
16
12
  from gwaslab.g_version import _checking_plink_version
17
- from gwaslab.util_in_filter_value import _exclude_hla
18
- from gwaslab.util_ex_calculate_ldmatrix import _extract_variants_in_locus
19
13
 
14
+ from gwaslab.hm.hm_casting import _merge_mold_with_sumstats_by_chrpos
20
15
 
16
+ from gwaslab.qc.qc_fix_sumstats import start_to
17
+ from gwaslab.qc.qc_fix_sumstats import finished
21
18
 
19
+ from gwaslab.util.util_in_get_sig import getsig
20
+ from gwaslab.util.util_ex_process_ref import _process_plink_input_files
21
+ from gwaslab.util.util_in_filter_value import _exclude_hla
22
+ from gwaslab.util.util_ex_calculate_ldmatrix import _extract_variants_in_locus
22
23
 
23
24
  def tofinemapping_m(sumstats,
24
25
  studies=None,
@@ -0,0 +1,127 @@
1
+ import pandas as pd
2
+ from gwaslab.g_Log import Log
3
+ import pandas as pd
4
+ import numpy as np
5
+
6
+ def _extract_associations(sumstats, rsid="rsID", log = Log(), verbose=True):
7
+
8
+ assoc, traits, studies, variants = get_associations_from_gwascatalog(sumstats, rsid=rsid, log=log, verbose=verbose)
9
+
10
+ if len(assoc)==0:
11
+ # if no associations
12
+ log.write("No associations!")
13
+ return None, None
14
+
15
+ assoc = _fix_beta(assoc)
16
+
17
+ traits_agg = traits.groupby("associationId")[["trait","shortForm"]].agg(lambda x: ",".join(x)).reset_index()
18
+
19
+ assoc_traits_agg= pd.merge(assoc, traits_agg, on ="associationId",how="left")
20
+
21
+ assoc_traits_agg= pd.merge(assoc_traits_agg, studies, on ="associationId", how="left")
22
+
23
+ assoc_traits_agg= pd.merge(assoc_traits_agg, variants, on ="associationId",how="left")
24
+
25
+ assoc_traits_agg = assoc_traits_agg.rename(columns={"trait":"GWASCATALOG_TRAIT",
26
+ "riskFrequency":"RAF",
27
+ "betaNum":"Beta",
28
+ "pvalue":"P-value"
29
+ })
30
+
31
+ summary_columns=['GWASCATALOG_TRAIT','associationId', 'rsID', "geneName",
32
+ 'RA', 'RAF','Beta', 'P-value','cohort','initialSampleSize','publicationInfo.pubmedId',
33
+ "functionalClass","gene.geneName"]
34
+
35
+ assoc_traits_agg_summary = assoc_traits_agg[summary_columns]
36
+
37
+ return assoc_traits_agg, assoc_traits_agg_summary
38
+
39
+ def get_associations_from_gwascatalog(sumstats, rsid="rsID", log=Log(), verbose=True):
40
+ from pandasgwas import get_associations
41
+ from pandasgwas import get_traits
42
+ from pandasgwas import get_studies
43
+ from pandasgwas import get_variants
44
+
45
+ association = pd.DataFrame()
46
+ strongest_risk_alleles=pd.DataFrame()
47
+ author_reported_genes = pd.DataFrame()
48
+ unique_sumstats = sumstats.dropna(subset=[rsid]).drop_duplicates(subset=[rsid])
49
+
50
+ for index,row in unique_sumstats.iterrows():
51
+ log.write(f"Getting associations from GWAS Catalog for {row[rsid]}...",verbose=verbose)
52
+
53
+ df = get_associations(variant_id = row[rsid])
54
+
55
+ empty=[]
56
+ if len(df.associations)>0:
57
+ df.associations[rsid] = row[rsid]
58
+ association = pd.concat([association, df.associations],ignore_index=True)
59
+
60
+ df.strongest_risk_alleles[rsid] = row[rsid]
61
+ strongest_risk_alleles = pd.concat([strongest_risk_alleles, df.strongest_risk_alleles],ignore_index=True)
62
+
63
+ try:
64
+ author_reported_genes = pd.concat([author_reported_genes, df.author_reported_genes],ignore_index=True)
65
+ except:
66
+ pass
67
+ log.write("", show_time=False, verbose=verbose)
68
+ else:
69
+ empty.append(row[rsid])
70
+
71
+ log.write(f"No associations: {empty}", verbose=verbose)
72
+
73
+ if len(strongest_risk_alleles)>0:
74
+ strongest_risk_alleles["RA"] = strongest_risk_alleles["riskAlleleName"].str.split("-").str[-1]
75
+
76
+ if len(association)>0:
77
+ association = pd.merge(association, strongest_risk_alleles[["associationId","RA"]],on="associationId",how="left")
78
+
79
+ author_reported_genes = author_reported_genes.groupby("associationId")["geneName"].agg(lambda x: ",".join(x))
80
+ association = pd.merge(association, author_reported_genes,on="associationId",how="left")
81
+
82
+ log.write(f"Retrieved {len(association)} associations from GWAS Catalog...", verbose=verbose)
83
+
84
+ traits = pd.DataFrame()
85
+ studies = pd.DataFrame()
86
+ variants = pd.DataFrame()
87
+
88
+ for index,row in association.drop_duplicates(subset=["associationId"]).iterrows():
89
+ log.write(f'Getting traits/studies/variants from GWAS Catalog for associationId: {row["associationId"]}...',verbose=verbose)
90
+
91
+ df = get_traits(association_id = row["associationId"])
92
+ df.efo_traits["associationId"] = row["associationId"]
93
+ traits = pd.concat([traits, df.efo_traits],ignore_index=True)
94
+
95
+ df = get_studies(association_id = row["associationId"])
96
+ df.studies["associationId"] = row["associationId"]
97
+ studies = pd.concat([studies, df.studies],ignore_index=True)
98
+
99
+ df = get_variants(association_id = row["associationId"])
100
+ df.variants["associationId"] = row["associationId"]
101
+ min_distance = df.genomic_contexts["distance"].min()
102
+ df.genomic_contexts = df.genomic_contexts.loc[df.genomic_contexts["distance"]==min_distance,:].drop_duplicates("gene.geneName").groupby("rsId")["gene.geneName"].agg(lambda x: ",".join(x))
103
+ df.variants = pd.merge(df.variants[["rsId","functionalClass","associationId"]],df.genomic_contexts, on="rsId")
104
+ variants = pd.concat([variants, df.variants[["associationId","functionalClass","gene.geneName"]]],ignore_index=True)
105
+
106
+ return association, traits, studies, variants
107
+
108
+ def _fix_beta(association):
109
+ if "betaNum" not in association:
110
+ association["betaNum"] = pd.NA
111
+ if "orPerCopyNum" not in association:
112
+ association["orPerCopyNum"] = pd.NA
113
+ if "range" not in association:
114
+ association["range"] = pd.NA
115
+ is_or_available = (association["betaNum"].isna()) & (~association["orPerCopyNum"].isna())
116
+ is_range_available = (association["betaNum"].isna()) & (association["orPerCopyNum"].isna()) & (~association["range"].isna())
117
+
118
+ association.loc[is_or_available ,"betaNum"] = np.log(association.loc[is_or_available,"orPerCopyNum"])
119
+ association.loc[is_range_available ,"betaNum"] = association.loc[is_range_available,"range"].apply(lambda x: parse_range(x))
120
+ return association
121
+
122
+ def parse_range(x):
123
+ range_list = x.strip("[|]").split("-")
124
+ high = np.log(range_list[1])
125
+ low = np.log(range_list[0])
126
+ beta = (high + low)/2
127
+ return beta
@@ -2,8 +2,8 @@ import pandas as pd
2
2
  import os
3
3
  import numpy as np
4
4
  from gwaslab.g_Log import Log
5
- from gwaslab.qc_fix_sumstats import start_to
6
- from gwaslab.qc_fix_sumstats import finished
5
+ from gwaslab.qc.qc_fix_sumstats import start_to
6
+ from gwaslab.qc.qc_fix_sumstats import finished
7
7
 
8
8
  def process_vcf_to_hfd5(vcf,
9
9
  directory=None,
@@ -4,8 +4,8 @@ import subprocess
4
4
  from gwaslab.g_Log import Log
5
5
  import os
6
6
  from gwaslab.g_version import _checking_plink_version
7
- from gwaslab.qc_fix_sumstats import start_to
8
- from gwaslab.qc_fix_sumstats import finished
7
+ from gwaslab.qc.qc_fix_sumstats import start_to
8
+ from gwaslab.qc.qc_fix_sumstats import finished
9
9
 
10
10
  def _process_plink_input_files(chrlist,
11
11
  bfile=None,
@@ -7,13 +7,14 @@ import numpy as np
7
7
  from gwaslab.g_Log import Log
8
8
  from gwaslab.g_version import _checking_r_version
9
9
  from gwaslab.g_version import _check_susie_version
10
- from gwaslab.util_in_convert_h2 import _get_per_snp_r2
11
- from gwaslab.qc_fix_sumstats import start_to
12
- from gwaslab.qc_fix_sumstats import finished
10
+ from gwaslab.util.util_in_convert_h2 import _get_per_snp_r2
11
+ from gwaslab.qc.qc_fix_sumstats import start_to
12
+ from gwaslab.qc.qc_fix_sumstats import finished
13
13
 
14
14
 
15
15
  def _run_two_sample_mr(sumstatspair_object,
16
16
  r,
17
+ out="./",
17
18
  clump=False,
18
19
  f_check=10,
19
20
  exposure1="Trait1",
@@ -77,9 +78,15 @@ def _run_two_sample_mr(sumstatspair_object,
77
78
  # Clumping
78
79
 
79
80
  prefix = "{exposure}_{outcome}_{memory_id}".format(exposure = exposure1, outcome= outcome2, memory_id = id(sumstatspair))
80
- temp_sumstats_path = "twosample_mr_{exposure}_{outcome}_{memory_id}.csv.gz".format(exposure = exposure1, outcome= outcome2, memory_id = id(sumstatspair))
81
- sumstatspair.to_csv(temp_sumstats_path ,index=None)
82
-
81
+ prefix = "{}{}".format(out.rstrip('/') + "/",prefix)
82
+ temp_sumstats_path = "{out}twosample_mr_{exposure}_{outcome}_{memory_id}.csv.gz".format(out=out.rstrip('/') + "/",
83
+ exposure = exposure1,
84
+ outcome= outcome2,
85
+ memory_id = id(sumstatspair))
86
+ if len(sumstatspair)>0:
87
+ sumstatspair.to_csv(temp_sumstats_path ,index=None)
88
+ else:
89
+ return 0
83
90
  ###
84
91
  calculate_r_script = ""
85
92
 
@@ -173,7 +180,10 @@ def _run_two_sample_mr(sumstatspair_object,
173
180
  directionality_test = directionality_test_script
174
181
  )
175
182
 
176
- temp_r_script_path = "_{}_{}_{}_gwaslab_2smr_temp.R".format(exposure1,outcome2,id(sumstatspair))
183
+ temp_r_script_path = "{}_{}_{}_{}_gwaslab_2smr_temp.R".format(out.rstrip('/') + "/",
184
+ exposure1,
185
+ outcome2,
186
+ id(sumstatspair))
177
187
  with open(temp_r_script_path,"w") as file:
178
188
  file.write(rscript)
179
189
 
@@ -200,6 +210,7 @@ def _run_two_sample_mr(sumstatspair_object,
200
210
  log.write(rscript)
201
211
  log.write(e.output)
202
212
  os.remove(temp_r_script_path)
213
+ log.write(" Finished running MR using twosampleMR from command line.")
203
214
 
204
215
 
205
216
 
@@ -6,10 +6,10 @@ import numpy as np
6
6
  from gwaslab.g_Log import Log
7
7
  from gwaslab.g_version import _checking_r_version
8
8
  from gwaslab.g_version import _check_susie_version
9
- from gwaslab.qc_fix_sumstats import start_to
10
- from gwaslab.qc_fix_sumstats import finished
11
- from gwaslab.util_ex_calculate_ldmatrix import _extract_variants_in_locus
12
- from gwaslab.util_in_get_sig import getsig
9
+ from gwaslab.qc.qc_fix_sumstats import start_to
10
+ from gwaslab.qc.qc_fix_sumstats import finished
11
+ from gwaslab.util.util_ex_calculate_ldmatrix import _extract_variants_in_locus
12
+ from gwaslab.util.util_in_get_sig import getsig
13
13
 
14
14
  def _run_ccgwas( sumstats_pair,
15
15
  r="Rscript",
@@ -3,12 +3,12 @@ import numpy as np
3
3
  import os
4
4
  import pandas as pd
5
5
  from gwaslab.g_Log import Log
6
- from gwaslab.qc_fix_sumstats import start_to
7
- from gwaslab.qc_fix_sumstats import finished
8
- from gwaslab.util_ex_process_ref import _process_plink_input_files
6
+ from gwaslab.qc.qc_fix_sumstats import start_to
7
+ from gwaslab.qc.qc_fix_sumstats import finished
8
+ from gwaslab.util.util_ex_process_ref import _process_plink_input_files
9
9
  from gwaslab.g_version import _checking_plink_version
10
10
 
11
- def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
11
+ def _clump(gls, vcf=None, scaled=False, out="clumping_plink2",
12
12
  p="P",mlog10p="MLOG10P", overwrite=False, study=None, bfile=None, pfile=None,
13
13
  n_cores=1, memory=None, chrom=None, clump_p1=5e-8, clump_p2=5e-8, clump_r2=0.01, clump_kb=250,
14
14
  log=Log(),verbose=True,plink="plink",plink2="plink2"):
@@ -18,8 +18,16 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
18
18
  _start_cols =["SNPID","CHR","POS"]
19
19
  _start_function = ".clump()"
20
20
  _must_args ={}
21
+
22
+ if out is None:
23
+ out = f"./{study}_clumpping".lstrip('/')
24
+ else:
25
+ out = out.lstrip('/')
26
+ sumstats_id = gls.id
27
+ sumstats = gls.data
28
+ gls.offload()
21
29
 
22
- is_enough_info = start_to(sumstats=insumstats,
30
+ is_enough_info = start_to(sumstats=sumstats,
23
31
  log=log,
24
32
  verbose=verbose,
25
33
  start_line=_start_line,
@@ -42,11 +50,18 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
42
50
  clump_log10_p2=-np.log10(clump_p2)
43
51
  log.write(" -clump_log10_p1 : {}...".format(clump_log10_p1),verbose=verbose)
44
52
  log.write(" -clump_log10_p2 : {}...".format(clump_log10_p2),verbose=verbose)
45
- sumstats = insumstats.loc[insumstats[mlog10p]>min(clump_log10_p1,clump_log10_p2),:].copy()
53
+ sumstats = sumstats.loc[sumstats[mlog10p]>min(clump_log10_p1,clump_log10_p2),:].copy()
46
54
  # extract lead variants
47
55
  else:
48
56
  log.write(" -Clumping will be performed using {}".format(p),verbose=verbose)
49
- sumstats = insumstats.loc[insumstats[p]<max(clump_p1,clump_p2),:].copy()
57
+ sumstats = sumstats.loc[sumstats[p]<max(clump_p1,clump_p2),:].copy()
58
+
59
+ if len(sumstats)==0:
60
+ log.write(" -No significant variants after filtering.")
61
+ finished(log=log, verbose=verbose, end_line=_end_line)
62
+ gls.reload()
63
+ return pd.DataFrame(), pd.DataFrame(), ""
64
+
50
65
  log.write(" -Significant variants on CHR: ",list(sumstats["CHR"].unique()),verbose=verbose)
51
66
 
52
67
  plink_log=""
@@ -88,9 +103,9 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
88
103
  is_avaialable_variant = (sumstats["CHR"]==i) & (is_on_both)
89
104
 
90
105
  if scaled == True:
91
- sumstats.loc[is_avaialable_variant,["SNPID",mlog10p]].to_csv("_gwaslab_tmp.{}.SNPIDP".format(i),index=False,sep="\t")
106
+ sumstats.loc[is_avaialable_variant,["SNPID",mlog10p]].to_csv("{}_gwaslab_tmp.{}.{}.SNPIDP".format(out, sumstats_id, i),index=False,sep="\t")
92
107
  else:
93
- sumstats.loc[is_avaialable_variant,["SNPID",p]].to_csv("_gwaslab_tmp.{}.SNPIDP".format(i),index=False,sep="\t")
108
+ sumstats.loc[is_avaialable_variant,["SNPID",p]].to_csv("{}_gwaslab_tmp.{}.{}.SNPIDP".format(out, sumstats_id,i),index=False,sep="\t")
94
109
  except:
95
110
  log.write(" -Not available for: {}...".format(i),verbose=verbose)
96
111
 
@@ -102,7 +117,7 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
102
117
  for i in sumstats["CHR"].unique():
103
118
  chrom = i
104
119
  # temp file
105
- clump = "_gwaslab_tmp.{}.SNPIDP".format(chrom)
120
+ clump = "{}_gwaslab_tmp.{}.{}.SNPIDP".format(out,sumstats_id,chrom)
106
121
  # output prefix
107
122
  out_single_chr= out + ".{}".format(chrom)
108
123
 
@@ -173,10 +188,10 @@ def _clump(insumstats, vcf=None, scaled=False, out="clumping_plink2",
173
188
  os.remove(clump)
174
189
 
175
190
  results = results.sort_values(by=["#CHROM","POS"]).rename(columns={"#CHROM":"CHR","ID":"SNPID"})
176
- log.write("Finished clumping.",verbose=verbose)
177
- results_sumstats = insumstats.loc[insumstats["SNPID"].isin(results["SNPID"]),:].copy()
191
+ results_sumstats = sumstats.loc[sumstats["SNPID"].isin(results["SNPID"]),:].copy()
178
192
  finished(log=log, verbose=verbose, end_line=_end_line)
179
-
193
+ gls.reload()
194
+
180
195
  return results_sumstats, results, plink_log
181
196
 
182
197
 
@@ -6,10 +6,12 @@ import numpy as np
6
6
  from gwaslab.g_Log import Log
7
7
  from gwaslab.g_version import _checking_r_version
8
8
  from gwaslab.g_version import _check_susie_version
9
- from gwaslab.qc_fix_sumstats import start_to
10
- from gwaslab.qc_fix_sumstats import finished
9
+ from gwaslab.qc.qc_fix_sumstats import start_to
10
+ from gwaslab.qc.qc_fix_sumstats import finished
11
11
 
12
- def _run_coloc_susie(filepath, r="Rscript",
12
+ def _run_coloc_susie(glsp,
13
+ filepath,
14
+ r="Rscript",
13
15
  types=None, ns=None,
14
16
  fillldna=True, delete=False,
15
17
  coloc_args="",
@@ -17,10 +19,18 @@ def _run_coloc_susie(filepath, r="Rscript",
17
19
  ncols=None,
18
20
  d1_args="",
19
21
  d2_args="",
22
+ out=None,
20
23
  log=Log(),
21
24
  verbose=True):
22
25
 
23
- log.write(" Start to run coloc.susie from command line:", verbose=verbose)
26
+ log.write("Start to run coloc.susie from command line:", verbose=verbose)
27
+
28
+ if filepath is None:
29
+ log.write(" -File path is None.", verbose=verbose)
30
+ log.write("Finished finemapping using SuSieR.", verbose=verbose)
31
+ return pd.DataFrame()
32
+
33
+ glsp.offload()
24
34
 
25
35
  if types is None:
26
36
  types = ("cc","cc")
@@ -31,11 +41,6 @@ def _run_coloc_susie(filepath, r="Rscript",
31
41
  ns = ncols
32
42
  log.write(" -Ns: {} and {}".format(ns[0],ns[1]), verbose=verbose)
33
43
 
34
- if filepath is None:
35
- log.write(" -File path is None.", verbose=verbose)
36
- log.write("Finished finemapping using SuSieR.", verbose=verbose)
37
- return pd.DataFrame()
38
-
39
44
  filelist = pd.read_csv(filepath,sep="\t")
40
45
  r_log=""
41
46
  # write R script
@@ -49,7 +54,12 @@ def _run_coloc_susie(filepath, r="Rscript",
49
54
  study = row["STUDY"]
50
55
  ld_r_matrix = row["LD_R_MATRIX"]
51
56
  sumstats = row["LOCUS_SUMSTATS"]
52
- output_prefix = sumstats.replace(".sumstats.gz","")
57
+
58
+ if out is None:
59
+ output_prefix = sumstats.replace(".sumstats.gz","")
60
+ else:
61
+ output_prefix = os.path.join(out, os.path.basename(sumstats.replace(".sumstats.gz","")))
62
+
53
63
  log.write(" -Running for: {} - {}".format(row["SNPID"],row["STUDY"] ), verbose=verbose)
54
64
  log.write(" -Locus sumstats:{}".format(sumstats), verbose=verbose)
55
65
  log.write(" -LD r matrix:{}".format(ld_r_matrix), verbose=verbose)
@@ -141,5 +151,7 @@ def _run_coloc_susie(filepath, r="Rscript",
141
151
  except subprocess.CalledProcessError as e:
142
152
  log.write(e.output)
143
153
  os.remove("_{}_{}_gwaslab_coloc_susie_temp.R".format(study,row["SNPID"]))
154
+
144
155
  log.write("Finished clocalization using coloc and SuSiE.", verbose=verbose)
156
+ glsp.reload()
145
157
  return locus_pip_cs
@@ -6,10 +6,10 @@ import numpy as np
6
6
  from gwaslab.g_Log import Log
7
7
  from gwaslab.g_version import _checking_r_version
8
8
  from gwaslab.g_version import _check_susie_version
9
- from gwaslab.qc_fix_sumstats import start_to
10
- from gwaslab.qc_fix_sumstats import finished
11
- from gwaslab.util_ex_calculate_ldmatrix import _extract_variants_in_locus
12
- from gwaslab.util_in_get_sig import getsig
9
+ from gwaslab.qc.qc_fix_sumstats import start_to
10
+ from gwaslab.qc.qc_fix_sumstats import finished
11
+ from gwaslab.util.util_ex_calculate_ldmatrix import _extract_variants_in_locus
12
+ from gwaslab.util.util_in_get_sig import getsig
13
13
 
14
14
  def _run_hyprcoloc( sumstats_multi,
15
15
  r="Rscript",