gwaslab 3.6.5__py3-none-any.whl → 3.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (110) hide show
  1. gwaslab/__init__.py +57 -47
  2. gwaslab/{bd_common_data.py → bd/bd_common_data.py} +10 -9
  3. gwaslab/bd/bd_config.py +28 -0
  4. gwaslab/{bd_download.py → bd/bd_download.py} +1 -1
  5. gwaslab/{bd_get_hapmap3.py → bd/bd_get_hapmap3.py} +9 -6
  6. gwaslab/bd/bd_path_manager.py +110 -0
  7. gwaslab/data/formatbook.json +805 -9
  8. gwaslab/{ldsc_irwls.py → extension/ldsc/ldsc_irwls.py} +1 -1
  9. gwaslab/{ldsc_regressions.py → extension/ldsc/ldsc_regressions.py} +2 -2
  10. gwaslab/{ldsc_sumstats.py → extension/ldsc/ldsc_sumstats.py} +2 -2
  11. gwaslab/{prscs_mcmc_gtb.py → extension/prscs/prscs_mcmc_gtb.py} +1 -1
  12. gwaslab/g_Sumstats.py +130 -96
  13. gwaslab/g_SumstatsMulti.py +69 -40
  14. gwaslab/g_SumstatsPair.py +54 -37
  15. gwaslab/g_SumstatsSet.py +88 -81
  16. gwaslab/g_SumstatsT.py +6 -6
  17. gwaslab/g_Sumstats_polars.py +84 -84
  18. gwaslab/g_meta_update.py +1 -1
  19. gwaslab/g_vchange_status.py +4 -4
  20. gwaslab/g_version.py +2 -2
  21. gwaslab/{hm_casting.py → hm/hm_casting.py} +4 -4
  22. gwaslab/{hm_casting_polars.py → hm/hm_casting_polars.py} +4 -4
  23. gwaslab/hm/hm_harmonize_sumstats.py +1635 -0
  24. gwaslab/hm_harmonize_sumstats.py +3 -8
  25. gwaslab/{io_load_ld.py → io/io_load_ld.py} +16 -13
  26. gwaslab/{io_preformat_input.py → io/io_preformat_input.py} +152 -73
  27. gwaslab/{io_preformat_input_polars.py → io/io_preformat_input_polars.py} +7 -7
  28. gwaslab/{io_read_pipcs.py → io/io_read_pipcs.py} +6 -5
  29. gwaslab/{io_read_tabular.py → io/io_read_tabular.py} +2 -2
  30. gwaslab/{io_to_formats.py → io/io_to_formats.py} +13 -9
  31. gwaslab/{io_to_pickle.py → io/io_to_pickle.py} +16 -1
  32. gwaslab/{qc_check_datatype_polars.py → qc/qc_check_datatype_polars.py} +2 -2
  33. gwaslab/{qc_fix_sumstats.py → qc/qc_fix_sumstats.py} +60 -33
  34. gwaslab/{qc_fix_sumstats_polars.py → qc/qc_fix_sumstats_polars.py} +15 -11
  35. gwaslab/{util_abf_finemapping.py → util/util_abf_finemapping.py} +2 -2
  36. gwaslab/{util_ex_calculate_ldmatrix.py → util/util_ex_calculate_ldmatrix.py} +18 -8
  37. gwaslab/{util_ex_calculate_prs.py → util/util_ex_calculate_prs.py} +2 -2
  38. gwaslab/{util_ex_ldproxyfinder.py → util/util_ex_ldproxyfinder.py} +6 -6
  39. gwaslab/{util_ex_ldsc.py → util/util_ex_ldsc.py} +18 -13
  40. gwaslab/{util_ex_match_ldmatrix.py → util/util_ex_match_ldmatrix.py} +8 -7
  41. gwaslab/util/util_ex_phewwas.py +117 -0
  42. gwaslab/{util_ex_process_h5.py → util/util_ex_process_h5.py} +2 -2
  43. gwaslab/{util_ex_process_ref.py → util/util_ex_process_ref.py} +2 -2
  44. gwaslab/{util_ex_run_2samplemr.py → util/util_ex_run_2samplemr.py} +18 -7
  45. gwaslab/{util_ex_run_ccgwas.py → util/util_ex_run_ccgwas.py} +4 -4
  46. gwaslab/{util_ex_run_clumping.py → util/util_ex_run_clumping.py} +28 -13
  47. gwaslab/{util_ex_run_coloc.py → util/util_ex_run_coloc.py} +22 -10
  48. gwaslab/{util_ex_run_hyprcoloc.py → util/util_ex_run_hyprcoloc.py} +4 -4
  49. gwaslab/{util_ex_run_magma.py → util/util_ex_run_magma.py} +21 -11
  50. gwaslab/{util_ex_run_mesusie.py → util/util_ex_run_mesusie.py} +3 -3
  51. gwaslab/{util_ex_run_mtag.py → util/util_ex_run_mtag.py} +50 -18
  52. gwaslab/{util_ex_run_prscs.py → util/util_ex_run_prscs.py} +3 -3
  53. gwaslab/{util_ex_run_scdrs.py → util/util_ex_run_scdrs.py} +10 -4
  54. gwaslab/{util_ex_run_susie.py → util/util_ex_run_susie.py} +49 -26
  55. gwaslab/{util_in_fill_data.py → util/util_in_fill_data.py} +1 -1
  56. gwaslab/{util_in_filter_value.py → util/util_in_filter_value.py} +18 -11
  57. gwaslab/{util_in_get_sig.py → util/util_in_get_sig.py} +15 -13
  58. gwaslab/{util_in_meta.py → util/util_in_meta.py} +1 -1
  59. gwaslab/{util_in_meta_polars.py → util/util_in_meta_polars.py} +1 -1
  60. gwaslab/{viz_aux_annotate_plot.py → viz/viz_aux_annotate_plot.py} +1 -1
  61. gwaslab/{viz_aux_quickfix.py → viz/viz_aux_quickfix.py} +2 -2
  62. gwaslab/{viz_plot_compare_af.py → viz/viz_plot_compare_af.py} +1 -1
  63. gwaslab/{viz_plot_compare_effect.py → viz/viz_plot_compare_effect.py} +16 -8
  64. gwaslab/{viz_plot_credible_sets.py → viz/viz_plot_credible_sets.py} +6 -6
  65. gwaslab/{viz_plot_effect.py → viz/viz_plot_effect.py} +37 -69
  66. gwaslab/{viz_plot_miamiplot.py → viz/viz_plot_miamiplot.py} +28 -20
  67. gwaslab/{viz_plot_miamiplot2.py → viz/viz_plot_miamiplot2.py} +27 -22
  68. gwaslab/{viz_plot_mqqplot.py → viz/viz_plot_mqqplot.py} +48 -38
  69. gwaslab/{viz_plot_phe_heatmap.py → viz/viz_plot_phe_heatmap.py} +18 -15
  70. gwaslab/{viz_plot_qqplot.py → viz/viz_plot_qqplot.py} +4 -2
  71. gwaslab/{viz_plot_regional2.py → viz/viz_plot_regional2.py} +11 -9
  72. gwaslab/{viz_plot_regionalplot.py → viz/viz_plot_regionalplot.py} +5 -4
  73. gwaslab/{viz_plot_rg_heatmap.py → viz/viz_plot_rg_heatmap.py} +1 -1
  74. gwaslab/{viz_plot_scatter_with_reg.py → viz/viz_plot_scatter_with_reg.py} +10 -7
  75. gwaslab/{viz_plot_stackedregional.py → viz/viz_plot_stackedregional.py} +67 -33
  76. gwaslab/{viz_plot_trumpetplot.py → viz/viz_plot_trumpetplot.py} +11 -9
  77. {gwaslab-3.6.5.dist-info → gwaslab-3.6.7.dist-info}/METADATA +1 -1
  78. gwaslab-3.6.7.dist-info/RECORD +123 -0
  79. gwaslab/bd_config.py +0 -18
  80. gwaslab-3.6.5.dist-info/RECORD +0 -120
  81. /gwaslab/{ldsc_jackknife.py → extension/ldsc/ldsc_jackknife.py} +0 -0
  82. /gwaslab/{ldsc_ldscore.py → extension/ldsc/ldsc_ldscore.py} +0 -0
  83. /gwaslab/{ldsc_parse.py → extension/ldsc/ldsc_parse.py} +0 -0
  84. /gwaslab/{prscs_gigrnd.py → extension/prscs/prscs_gigrnd.py} +0 -0
  85. /gwaslab/{prscs_parse_genet.py → extension/prscs/prscs_parse_genet.py} +0 -0
  86. /gwaslab/{hm_rsid_to_chrpos.py → hm/hm_rsid_to_chrpos.py} +0 -0
  87. /gwaslab/{io_process_args.py → io/io_process_args.py} +0 -0
  88. /gwaslab/{io_read_ldsc.py → io/io_read_ldsc.py} +0 -0
  89. /gwaslab/{qc_build.py → qc/qc_build.py} +0 -0
  90. /gwaslab/{qc_check_datatype.py → qc/qc_check_datatype.py} +0 -0
  91. /gwaslab/{util_ex_gwascatalog.py → util/util_ex_gwascatalog.py} +0 -0
  92. /gwaslab/{util_ex_infer_ancestry.py → util/util_ex_infer_ancestry.py} +0 -0
  93. /gwaslab/{util_ex_plink_filter.py → util/util_ex_plink_filter.py} +0 -0
  94. /gwaslab/{util_in_calculate_gc.py → util/util_in_calculate_gc.py} +0 -0
  95. /gwaslab/{util_in_calculate_power.py → util/util_in_calculate_power.py} +0 -0
  96. /gwaslab/{util_in_convert_h2.py → util/util_in_convert_h2.py} +0 -0
  97. /gwaslab/{util_in_correct_winnerscurse.py → util/util_in_correct_winnerscurse.py} +0 -0
  98. /gwaslab/{util_in_estimate_ess.py → util/util_in_estimate_ess.py} +0 -0
  99. /gwaslab/{util_in_get_density.py → util/util_in_get_density.py} +0 -0
  100. /gwaslab/{util_in_merge.py → util/util_in_merge.py} +0 -0
  101. /gwaslab/{util_in_snphwe.py → util/util_in_snphwe.py} +0 -0
  102. /gwaslab/{viz_aux_chromatin.py → viz/viz_aux_chromatin.py} +0 -0
  103. /gwaslab/{viz_aux_property.py → viz/viz_aux_property.py} +0 -0
  104. /gwaslab/{viz_aux_reposition_text.py → viz/viz_aux_reposition_text.py} +0 -0
  105. /gwaslab/{viz_aux_save_figure.py → viz/viz_aux_save_figure.py} +0 -0
  106. /gwaslab/{viz_plot_forestplot.py → viz/viz_plot_forestplot.py} +0 -0
  107. {gwaslab-3.6.5.dist-info → gwaslab-3.6.7.dist-info}/WHEEL +0 -0
  108. {gwaslab-3.6.5.dist-info → gwaslab-3.6.7.dist-info}/licenses/LICENSE +0 -0
  109. {gwaslab-3.6.5.dist-info → gwaslab-3.6.7.dist-info}/licenses/LICENSE_before_v3.4.39 +0 -0
  110. {gwaslab-3.6.5.dist-info → gwaslab-3.6.7.dist-info}/top_level.txt +0 -0
@@ -7,22 +7,29 @@ from multiprocessing import Pool
7
7
  from liftover import get_lifter
8
8
  from liftover import ChainFile
9
9
  from functools import partial
10
+
10
11
  from gwaslab.g_vchange_status import vchange_status
11
12
  from gwaslab.g_vchange_status import status_match
12
13
  from gwaslab.g_vchange_status import change_status
13
14
  from gwaslab.g_Log import Log
14
- from gwaslab.bd_common_data import get_chr_to_number
15
- from gwaslab.bd_common_data import get_number_to_chr
16
- from gwaslab.bd_common_data import get_chr_list
17
- from gwaslab.qc_check_datatype import check_datatype
18
- from gwaslab.qc_check_datatype import check_dataframe_shape
19
- from gwaslab.qc_build import _process_build
20
- from gwaslab.qc_build import _set_build
21
15
  from gwaslab.g_version import _get_version
22
- from gwaslab.util_in_fill_data import _convert_betase_to_mlog10p
23
- from gwaslab.util_in_fill_data import _convert_betase_to_p
24
- from gwaslab.util_in_fill_data import _convert_mlog10p_to_p
25
- from gwaslab.bd_common_data import get_chain
16
+ from gwaslab.g_vchange_status import STATUS_CATEGORIES
17
+
18
+ from gwaslab.bd.bd_common_data import get_chr_to_number
19
+ from gwaslab.bd.bd_common_data import get_number_to_chr
20
+ from gwaslab.bd.bd_common_data import get_chr_list
21
+ from gwaslab.bd.bd_common_data import get_chain
22
+ from gwaslab.bd.bd_common_data import NA_STRINGS
23
+
24
+ from gwaslab.qc.qc_check_datatype import check_datatype
25
+ from gwaslab.qc.qc_check_datatype import check_dataframe_shape
26
+ from gwaslab.qc.qc_build import _process_build
27
+ from gwaslab.qc.qc_build import _set_build
28
+
29
+ from gwaslab.util.util_in_fill_data import _convert_betase_to_mlog10p
30
+ from gwaslab.util.util_in_fill_data import _convert_betase_to_p
31
+ from gwaslab.util.util_in_fill_data import _convert_mlog10p_to_p
32
+
26
33
  #process build
27
34
  #setbuild
28
35
  #fixID
@@ -69,7 +76,7 @@ from gwaslab.bd_common_data import get_chain
69
76
 
70
77
  def fixID(sumstats,
71
78
  snpid="SNPID",rsid="rsID",chrom="CHR",pos="POS",nea="NEA",ea="EA",status="STATUS",fixprefix=False,
72
- fixchrpos=False,fixid=False,fixeanea=False,fixeanea_flip=False,fixsep=False,
79
+ fixchrpos=False,fixid=False,fixeanea=False,fixeanea_flip=False,fixsep=False, reversea=False,
73
80
  overwrite=False,verbose=True,forcefixid=False,log=Log()):
74
81
  '''
75
82
  1. fx SNPid
@@ -120,7 +127,21 @@ def fixID(sumstats,
120
127
  except:
121
128
  log.write(" -Force converting SNPID to pd.string data type...",verbose=verbose)
122
129
  sumstats[snpid] = sumstats[snpid].astype("string")
130
+ ############################ checking string NA ###################################################
131
+ log.write(" -Checking NA strings :{}".format(",".join(NA_STRINGS)),verbose=verbose)
132
+ if snpid in sumstats.columns:
133
+ log.write(" -Checking if SNPID contains NA strings...",verbose=verbose)
134
+ is_snpid_string_na = sumstats[snpid].isin(NA_STRINGS)
135
+ if sum(is_snpid_string_na) >0:
136
+ log.write(" -Converting {} NA strings in SNPID to pd.NA...".format(sum(is_snpid_string_na)),verbose=verbose)
137
+ sumstats.loc[is_snpid_string_na ,snpid] = pd.NA
123
138
 
139
+ if rsid in sumstats.columns:
140
+ log.write(" -Checking if rsID contains NA strings...",verbose=verbose)
141
+ is_rsid_string_na = sumstats[rsid].isin(NA_STRINGS)
142
+ if sum(is_rsid_string_na) >0:
143
+ log.write(" -Converting {} NA strings in rsID to pd.NA...".format(sum(is_rsid_string_na)),verbose=verbose)
144
+ sumstats.loc[is_rsid_string_na ,rsid] = pd.NA
124
145
  ############################ checking ###################################################
125
146
  if snpid in sumstats.columns:
126
147
  log.write(" -Checking if SNPID is CHR:POS:NEA:EA...(separator: - ,: , _)",verbose=verbose)
@@ -148,7 +169,15 @@ def fixID(sumstats,
148
169
  log.write(" -A look at the unrecognized rsID :",set(sumstats.loc[(~is_rsid)&(~is_rs_chrpos),rsid].head()),"...", verbose=verbose)
149
170
 
150
171
  ############################ fixing chr pos###################################################
151
-
172
+ if reversea == True:
173
+ if snpid in sumstats.columns:
174
+ log.write(" -Reversing Alleles in SNPID...", verbose=verbose)
175
+ to_fix = is_chrposrefalt
176
+ to_fix_num = sum(to_fix)
177
+ if to_fix_num>0 and verbose: log.write(" -Number of variants could be reversed: "+str(to_fix_num)+" ...")
178
+ extracted = sumstats.loc[to_fix, snpid].str.extract(r'(^\w+[:_-]\d+[:_-])([ATCG]+)([:_-])([ATCG]+$)', flags=re.IGNORECASE)
179
+ sumstats.loc[to_fix, snpid] = extracted[0] + extracted[3] + extracted[2] + extracted[1]
180
+
152
181
  if fixchrpos == True:
153
182
  # from snpid or rsid, extract CHR:POS to fix CHR and POS
154
183
  if snpid in sumstats.columns:
@@ -537,24 +566,24 @@ def removedup(sumstats,mode="dm",chrom="CHR",pos="POS",snpid="SNPID",ea="EA",nea
537
566
  pre_number =len(sumstats)
538
567
  specified_columns = []
539
568
  if "d" in mode:
540
- specified_columns.append(rsid)
541
- specified_columns.append(snpid)
542
- specified_columns.append(chrom)
543
- specified_columns.append(pos)
544
- specified_columns.append(ea)
545
- specified_columns.append(nea)
569
+ if rsid in sumstats.columns: specified_columns.append(rsid)
570
+ if snpid in sumstats.columns: specified_columns.append(snpid)
571
+ if chrom in sumstats.columns: specified_columns.append(chrom)
572
+ if pos in sumstats.columns: specified_columns.append(pos)
573
+ if ea in sumstats.columns: specified_columns.append(ea)
574
+ if nea in sumstats.columns: specified_columns.append(nea)
546
575
  if "r" in mode:
547
- specified_columns.append(rsid)
576
+ if rsid in sumstats.columns:specified_columns.append(rsid)
548
577
  if "s" in mode:
549
- specified_columns.append(snpid)
578
+ if snpid in sumstats.columns:specified_columns.append(snpid)
550
579
  if "m" in mode:
551
- specified_columns.append(chrom)
552
- specified_columns.append(pos)
580
+ if chrom in sumstats.columns:specified_columns.append(chrom)
581
+ if pos in sumstats.columns:specified_columns.append(pos)
553
582
  if "c" in mode:
554
- specified_columns.append(chrom)
555
- specified_columns.append(pos)
556
- specified_columns.append(ea)
557
- specified_columns.append(nea)
583
+ if chrom in sumstats.columns:specified_columns.append(chrom)
584
+ if pos in sumstats.columns:specified_columns.append(pos)
585
+ if ea in sumstats.columns:specified_columns.append(ea)
586
+ if nea in sumstats.columns:specified_columns.append(nea)
558
587
  sumstats = sumstats.loc[~sumstats[specified_columns].isna().any(axis=1),:]
559
588
  after_number=len(sumstats)
560
589
  log.write(" -Removed ",pre_number -after_number," variants with NA values in {} .".format(set(specified_columns)), verbose=verbose)
@@ -1123,19 +1152,17 @@ def check_range(sumstats, var_range, header, coltocheck, cols_to_check, log, ver
1123
1152
  cols_to_check.append(header)
1124
1153
  if header=="STATUS":
1125
1154
  log.write(" -Checking STATUS and converting STATUS to categories....", verbose=verbose)
1126
- categories = {str(j+i) for j in [1300000,1800000,1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
1127
- sumstats[header] = pd.Categorical(sumstats[header],categories=categories)
1155
+ sumstats[header] = pd.Categorical(sumstats[header],categories=STATUS_CATEGORIES)
1128
1156
  return sumstats
1129
1157
 
1130
1158
  if dtype in ["Int64","Int32","int","int32","in64"]:
1131
1159
  log.write(" -Checking if {} <= {} <= {} ...".format( var_range[0] ,header, var_range[1]), verbose=verbose)
1132
1160
  sumstats[header] = np.floor(pd.to_numeric(sumstats[header], errors='coerce')).astype(dtype)
1133
-
1161
+ is_valid = (sumstats[header]>=var_range[0]) & (sumstats[header]<=var_range[1])
1134
1162
  elif dtype in ["Float64","Float32","float","float64","float32"]:
1135
1163
  log.write(" -Checking if {} < {} < {} ...".format( var_range[0] ,header, var_range[1]),verbose=verbose)
1136
1164
  sumstats[header] = pd.to_numeric(sumstats[header], errors='coerce').astype(dtype)
1137
-
1138
- is_valid = (sumstats[header]>=var_range[0]) & (sumstats[header]<=var_range[1])
1165
+ is_valid = (sumstats[header]>var_range[0]) & (sumstats[header]<var_range[1])
1139
1166
  is_valid = is_valid.fillna(False)
1140
1167
 
1141
1168
  if header=="P":
@@ -1835,4 +1862,4 @@ def check_col(df_col_names, verbose=True, log=Log(), cols=None, function=None):
1835
1862
  ###############################################################################################################
1836
1863
  def _df_split(dataframe, n):
1837
1864
  k, m = divmod(len(dataframe), n)
1838
- return [dataframe.iloc[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n)]
1865
+ return [dataframe.iloc[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n)]
@@ -11,18 +11,22 @@ from gwaslab.g_vchange_status_polars import vchange_statusp
11
11
  from gwaslab.g_vchange_status import status_match
12
12
  from gwaslab.g_vchange_status import change_status
13
13
  from gwaslab.g_Log import Log
14
- from gwaslab.bd_common_data import get_chr_to_number
15
- from gwaslab.bd_common_data import get_number_to_chr
16
- from gwaslab.bd_common_data import get_chr_list
17
- from gwaslab.qc_check_datatype import check_datatype
18
- from gwaslab.qc_check_datatype import check_dataframe_shape
19
- from gwaslab.qc_build import _process_build
20
- from gwaslab.qc_build import _set_build
21
14
  from gwaslab.g_version import _get_version
22
- from gwaslab.util_in_fill_data import _convert_betase_to_mlog10p
23
- from gwaslab.util_in_fill_data import _convert_betase_to_p
24
- from gwaslab.util_in_fill_data import _convert_mlog10p_to_p
25
- from gwaslab.bd_common_data import get_chain
15
+
16
+ from gwaslab.bd.bd_common_data import get_chr_to_number
17
+ from gwaslab.bd.bd_common_data import get_number_to_chr
18
+ from gwaslab.bd.bd_common_data import get_chr_list
19
+ from gwaslab.bd.bd_common_data import get_chain
20
+
21
+ from gwaslab.qc.qc_check_datatype import check_datatype
22
+ from gwaslab.qc.qc_check_datatype import check_dataframe_shape
23
+ from gwaslab.qc.qc_build import _process_build
24
+ from gwaslab.qc.qc_build import _set_build
25
+
26
+ from gwaslab.util.util_in_fill_data import _convert_betase_to_mlog10p
27
+ from gwaslab.util.util_in_fill_data import _convert_betase_to_p
28
+ from gwaslab.util.util_in_fill_data import _convert_mlog10p_to_p
29
+
26
30
  import polars as pl
27
31
  ###############################################################################################################
28
32
  # 20220426
@@ -1,8 +1,8 @@
1
1
  import pandas as pd
2
2
  import numpy as np
3
3
  from gwaslab.g_Log import Log
4
- from gwaslab.util_in_filter_value import _get_flanking_by_chrpos
5
- from gwaslab.util_in_filter_value import _get_flanking_by_id
4
+ from gwaslab.util.util_in_filter_value import _get_flanking_by_chrpos
5
+ from gwaslab.util.util_in_filter_value import _get_flanking_by_id
6
6
 
7
7
  # Calculate PIP based on approximate Bayesian factor (ABF)
8
8
  # Wakefield, J. A bayesian measure of the probability of false discovery in genetic epidemiology studies. Am J Hum Genet 81, 208–227 (2007).
@@ -4,14 +4,14 @@ import gc
4
4
  import pandas as pd
5
5
  import numpy as np
6
6
  from gwaslab.g_Log import Log
7
- from gwaslab.qc_fix_sumstats import start_to
8
- from gwaslab.qc_fix_sumstats import finished
9
- from gwaslab.util_in_get_sig import getsig
10
- from gwaslab.util_ex_process_ref import _process_plink_input_files
7
+ from gwaslab.qc.qc_fix_sumstats import start_to
8
+ from gwaslab.qc.qc_fix_sumstats import finished
9
+ from gwaslab.util.util_in_get_sig import getsig
10
+ from gwaslab.util.util_ex_process_ref import _process_plink_input_files
11
+ from gwaslab.util.util_in_filter_value import _exclude_hla
11
12
  from gwaslab.g_version import _checking_plink_version
12
- from gwaslab.util_in_filter_value import _exclude_hla
13
13
 
14
- def tofinemapping(sumstats,
14
+ def tofinemapping(gls,
15
15
  study=None,
16
16
  bfile=None,
17
17
  vcf=None,
@@ -39,6 +39,9 @@ def tofinemapping(sumstats,
39
39
  _start_cols =["SNPID","CHR","POS","EA","NEA"]
40
40
  _start_function = ".calculate_ld_matrix()"
41
41
  _must_args ={}
42
+
43
+ sumstats = gls.data
44
+ gls.offload()
42
45
 
43
46
  is_enough_info = start_to(sumstats=sumstats,
44
47
  log=log,
@@ -114,7 +117,8 @@ def tofinemapping(sumstats,
114
117
  locus_sumstats=locus_sumstats,
115
118
  ref_bim=ref_bim[0],
116
119
  log=log,suffixes=suffixes)
117
-
120
+ del locus_sumstats
121
+ gc.collect()
118
122
  #########################################################################################################
119
123
  # create matched snp list
120
124
  matched_snp_list_path,matched_sumstats_path=_export_snplist_and_locus_sumstats(matched_sumstats=matched_sumstats,
@@ -144,7 +148,8 @@ def tofinemapping(sumstats,
144
148
  extra_plink_option=extra_plink_option,
145
149
  ref_allele_path = matched_sumstats_path,
146
150
  verbose=verbose)
147
-
151
+ del matched_sumstats
152
+ gc.collect()
148
153
 
149
154
  # print file list
150
155
  row_dict={}
@@ -166,7 +171,12 @@ def tofinemapping(sumstats,
166
171
  output_file_list_path=None
167
172
  log.write(" -No avaialable lead variants.",verbose=verbose)
168
173
  log.write(" -Stopped LD matrix calculation.",verbose=verbose)
174
+
175
+ del sumstats
176
+
169
177
  finished(log=log, verbose=verbose, end_line=_end_line)
178
+ gls.reload()
179
+
170
180
  return output_file_list_path, output_file_list, plink_log
171
181
 
172
182
 
@@ -4,8 +4,8 @@ import gc
4
4
  import pandas as pd
5
5
  import numpy as np
6
6
  from gwaslab.g_Log import Log
7
- from gwaslab.util_in_get_sig import getsig
8
- from gwaslab.util_ex_process_ref import _process_plink_input_files
7
+ from gwaslab.util.util_in_get_sig import getsig
8
+ from gwaslab.util.util_ex_process_ref import _process_plink_input_files
9
9
  from gwaslab.g_version import _checking_plink_version
10
10
 
11
11
  def _calculate_prs(sumstats,
@@ -17,12 +17,12 @@ from mpl_toolkits.axes_grid1.inset_locator import mark_inset
17
17
  from adjustText import adjust_text
18
18
  from gtfparse import read_gtf
19
19
  from gwaslab.g_Log import Log
20
- from gwaslab.bd_common_data import get_chr_to_number
21
- from gwaslab.bd_common_data import get_number_to_chr
22
- from gwaslab.bd_common_data import get_recombination_rate
23
- from gwaslab.bd_common_data import get_gtf
24
- from gwaslab.util_in_filter_value import _get_flanking
25
- from gwaslab.hm_harmonize_sumstats import auto_check_vcf_chr_dict
20
+ from gwaslab.bd.bd_common_data import get_chr_to_number
21
+ from gwaslab.bd.bd_common_data import get_number_to_chr
22
+ from gwaslab.bd.bd_common_data import get_recombination_rate
23
+ from gwaslab.bd.bd_common_data import get_gtf
24
+ from gwaslab.util.util_in_filter_value import _get_flanking
25
+ from gwaslab.hm.hm_harmonize_sumstats import auto_check_vcf_chr_dict
26
26
  # unmatched SNP list 1
27
27
 
28
28
  # for each SNP in unmatched SNP list 1:
@@ -1,17 +1,22 @@
1
- from gwaslab.ldsc_sumstats import estimate_h2
2
- from gwaslab.ldsc_sumstats import estimate_rg
3
- from gwaslab.ldsc_sumstats import cell_type_specific
4
- from gwaslab.g_Log import Log
5
- from gwaslab.qc_fix_sumstats import start_to
6
- from gwaslab.qc_fix_sumstats import finished
7
- from gwaslab.qc_fix_sumstats import skipped
8
- from gwaslab.io_read_ldsc import parse_ldsc_summary
9
- from gwaslab.io_read_ldsc import parse_partitioned_ldsc_summary
10
- from gwaslab.util_in_filter_value import filtervalues
11
- from gwaslab.util_in_filter_value import _filter_palindromic
12
- from gwaslab.util_in_filter_value import _exclude_hla
13
- from gwaslab.util_in_filter_value import _exclude_sexchr
14
1
  import copy
2
+ from gwaslab.g_Log import Log
3
+
4
+ from gwaslab.extension.ldsc.ldsc_sumstats import estimate_h2
5
+ from gwaslab.extension.ldsc.ldsc_sumstats import estimate_rg
6
+ from gwaslab.extension.ldsc.ldsc_sumstats import cell_type_specific
7
+
8
+ from gwaslab.qc.qc_fix_sumstats import start_to
9
+ from gwaslab.qc.qc_fix_sumstats import finished
10
+ from gwaslab.qc.qc_fix_sumstats import skipped
11
+
12
+ from gwaslab.io.io_read_ldsc import parse_ldsc_summary
13
+ from gwaslab.io.io_read_ldsc import parse_partitioned_ldsc_summary
14
+
15
+ from gwaslab.util.util_in_filter_value import filtervalues
16
+ from gwaslab.util.util_in_filter_value import _filter_palindromic
17
+ from gwaslab.util.util_in_filter_value import _exclude_hla
18
+ from gwaslab.util.util_in_filter_value import _exclude_sexchr
19
+
15
20
 
16
21
  class ARGS():
17
22
  def __init__(self, kwargs=None):
@@ -1,7 +1,7 @@
1
1
  import scipy.sparse as sparse
2
2
  import numpy as np
3
3
  import pandas as pd
4
- from gwaslab.hm_casting import _merge_mold_with_sumstats_by_chrpos
4
+
5
5
  import subprocess
6
6
  import os
7
7
  import re
@@ -9,16 +9,17 @@ import gc
9
9
  import pandas as pd
10
10
  import numpy as np
11
11
  from gwaslab.g_Log import Log
12
- from gwaslab.qc_fix_sumstats import start_to
13
- from gwaslab.qc_fix_sumstats import finished
14
- from gwaslab.util_in_get_sig import getsig
15
- from gwaslab.util_ex_process_ref import _process_plink_input_files
16
12
  from gwaslab.g_version import _checking_plink_version
17
- from gwaslab.util_in_filter_value import _exclude_hla
18
- from gwaslab.util_ex_calculate_ldmatrix import _extract_variants_in_locus
19
13
 
14
+ from gwaslab.hm.hm_casting import _merge_mold_with_sumstats_by_chrpos
20
15
 
16
+ from gwaslab.qc.qc_fix_sumstats import start_to
17
+ from gwaslab.qc.qc_fix_sumstats import finished
21
18
 
19
+ from gwaslab.util.util_in_get_sig import getsig
20
+ from gwaslab.util.util_ex_process_ref import _process_plink_input_files
21
+ from gwaslab.util.util_in_filter_value import _exclude_hla
22
+ from gwaslab.util.util_ex_calculate_ldmatrix import _extract_variants_in_locus
22
23
 
23
24
  def tofinemapping_m(sumstats,
24
25
  studies=None,
@@ -0,0 +1,117 @@
1
+ import pandas as pd
2
+ from gwaslab.g_Log import Log
3
+ import pandas as pd
4
+ import numpy as np
5
+
6
+ def _extract_associations(sumstats, rsid="rsID", log = Log(), verbose=True):
7
+
8
+ assoc, traits, studies, variants = get_associations_from_gwascatalog(sumstats, rsid=rsid, log=log, verbose=verbose)
9
+
10
+ assoc = _fix_beta(assoc)
11
+
12
+ traits_agg = traits.groupby("associationId")[["trait","shortForm"]].agg(lambda x: ",".join(x)).reset_index()
13
+
14
+ assoc_traits_agg= pd.merge(assoc, traits_agg, on ="associationId",how="left")
15
+
16
+ assoc_traits_agg= pd.merge(assoc_traits_agg, studies, on ="associationId", how="left")
17
+
18
+ assoc_traits_agg= pd.merge(assoc_traits_agg, variants, on ="associationId",how="left")
19
+
20
+ assoc_traits_agg = assoc_traits_agg.rename(columns={"trait":"GWASCATALOG_TRAIT",
21
+ "riskFrequency":"RAF",
22
+ "betaNum":"Beta",
23
+ "pvalue":"P-value"
24
+ })
25
+
26
+ summary_columns=['GWASCATALOG_TRAIT','associationId', 'rsID', "geneName",
27
+ 'RA', 'RAF','Beta', 'P-value','cohort','initialSampleSize','publicationInfo.pubmedId',
28
+ "functionalClass","gene.geneName"]
29
+
30
+ assoc_traits_agg_summary = assoc_traits_agg[summary_columns]
31
+
32
+ return assoc_traits_agg, assoc_traits_agg_summary
33
+
34
+ def get_associations_from_gwascatalog(sumstats, rsid="rsID", log=Log(), verbose=True):
35
+ from pandasgwas import get_associations
36
+ from pandasgwas import get_traits
37
+ from pandasgwas import get_studies
38
+ from pandasgwas import get_variants
39
+
40
+ association = pd.DataFrame()
41
+ strongest_risk_alleles=pd.DataFrame()
42
+ author_reported_genes = pd.DataFrame()
43
+ unique_sumstats = sumstats.dropna(subset=[rsid]).drop_duplicates(subset=[rsid])
44
+
45
+ for index,row in unique_sumstats.iterrows():
46
+ log.write(f"Getting associations from GWAS Catalog for {row[rsid]}...",verbose=verbose)
47
+
48
+ df = get_associations(variant_id = row[rsid])
49
+
50
+ empty=[]
51
+ if len(df.associations)>0:
52
+ df.associations[rsid] = row[rsid]
53
+ association = pd.concat([association, df.associations],ignore_index=True)
54
+
55
+ df.strongest_risk_alleles[rsid] = row[rsid]
56
+ strongest_risk_alleles = pd.concat([strongest_risk_alleles, df.strongest_risk_alleles],ignore_index=True)
57
+
58
+ try:
59
+ author_reported_genes = pd.concat([author_reported_genes, df.author_reported_genes],ignore_index=True)
60
+ except:
61
+ pass
62
+ log.write("", show_time=False, verbose=verbose)
63
+ else:
64
+ empty.append(row[rsid])
65
+
66
+ log.write(f"No associations: {empty}", verbose=verbose)
67
+
68
+ if len(strongest_risk_alleles)>0:
69
+ strongest_risk_alleles["RA"] = strongest_risk_alleles["riskAlleleName"].str.split("-").str[-1]
70
+
71
+ if len(association)>0:
72
+ association = pd.merge(association, strongest_risk_alleles[["associationId","RA"]],on="associationId",how="left")
73
+
74
+ author_reported_genes = author_reported_genes.groupby("associationId")["geneName"].agg(lambda x: ",".join(x))
75
+ association = pd.merge(association, author_reported_genes,on="associationId",how="left")
76
+
77
+ log.write(f"Retrieved {len(association)} associations from GWAS Catalog...", verbose=verbose)
78
+
79
+ traits = pd.DataFrame()
80
+ studies = pd.DataFrame()
81
+ variants = pd.DataFrame()
82
+
83
+ for index,row in association.drop_duplicates(subset=["associationId"]).iterrows():
84
+ log.write(f'Getting traits/studies/variants from GWAS Catalog for associationId: {row["associationId"]}...',verbose=verbose)
85
+
86
+ df = get_traits(association_id = row["associationId"])
87
+ df.efo_traits["associationId"] = row["associationId"]
88
+ traits = pd.concat([traits, df.efo_traits],ignore_index=True)
89
+
90
+ df = get_studies(association_id = row["associationId"])
91
+ df.studies["associationId"] = row["associationId"]
92
+ studies = pd.concat([studies, df.studies],ignore_index=True)
93
+
94
+ df = get_variants(association_id = row["associationId"])
95
+ df.variants["associationId"] = row["associationId"]
96
+ min_distance = df.genomic_contexts["distance"].min()
97
+ df.genomic_contexts = df.genomic_contexts.loc[df.genomic_contexts["distance"]==min_distance,:].drop_duplicates("gene.geneName").groupby("rsId")["gene.geneName"].agg(lambda x: ",".join(x))
98
+ df.variants = pd.merge(df.variants[["rsId","functionalClass","associationId"]],df.genomic_contexts, on="rsId")
99
+ variants = pd.concat([variants, df.variants[["associationId","functionalClass","gene.geneName"]]],ignore_index=True)
100
+
101
+ return association, traits, studies, variants
102
+
103
+ def _fix_beta(association):
104
+
105
+ is_or_available = (association["betaNum"].isna()) & (~association["orPerCopyNum"].isna())
106
+ is_range_available = (association["betaNum"].isna()) & (association["orPerCopyNum"].isna()) & (~association["range"].isna())
107
+
108
+ association.loc[is_or_available ,"betaNum"] = np.log(association.loc[is_or_available,"orPerCopyNum"])
109
+ association.loc[is_range_available ,"betaNum"] = association.loc[is_range_available,"range"].apply(lambda x: parse_range(x))
110
+ return association
111
+
112
+ def parse_range(x):
113
+ range_list = x.strip("[|]").split("-")
114
+ high = np.log(range_list[1])
115
+ low = np.log(range_list[0])
116
+ beta = (high + low)/2
117
+ return beta
@@ -2,8 +2,8 @@ import pandas as pd
2
2
  import os
3
3
  import numpy as np
4
4
  from gwaslab.g_Log import Log
5
- from gwaslab.qc_fix_sumstats import start_to
6
- from gwaslab.qc_fix_sumstats import finished
5
+ from gwaslab.qc.qc_fix_sumstats import start_to
6
+ from gwaslab.qc.qc_fix_sumstats import finished
7
7
 
8
8
  def process_vcf_to_hfd5(vcf,
9
9
  directory=None,
@@ -4,8 +4,8 @@ import subprocess
4
4
  from gwaslab.g_Log import Log
5
5
  import os
6
6
  from gwaslab.g_version import _checking_plink_version
7
- from gwaslab.qc_fix_sumstats import start_to
8
- from gwaslab.qc_fix_sumstats import finished
7
+ from gwaslab.qc.qc_fix_sumstats import start_to
8
+ from gwaslab.qc.qc_fix_sumstats import finished
9
9
 
10
10
  def _process_plink_input_files(chrlist,
11
11
  bfile=None,
@@ -7,13 +7,14 @@ import numpy as np
7
7
  from gwaslab.g_Log import Log
8
8
  from gwaslab.g_version import _checking_r_version
9
9
  from gwaslab.g_version import _check_susie_version
10
- from gwaslab.util_in_convert_h2 import _get_per_snp_r2
11
- from gwaslab.qc_fix_sumstats import start_to
12
- from gwaslab.qc_fix_sumstats import finished
10
+ from gwaslab.util.util_in_convert_h2 import _get_per_snp_r2
11
+ from gwaslab.qc.qc_fix_sumstats import start_to
12
+ from gwaslab.qc.qc_fix_sumstats import finished
13
13
 
14
14
 
15
15
  def _run_two_sample_mr(sumstatspair_object,
16
16
  r,
17
+ out="./",
17
18
  clump=False,
18
19
  f_check=10,
19
20
  exposure1="Trait1",
@@ -77,9 +78,15 @@ def _run_two_sample_mr(sumstatspair_object,
77
78
  # Clumping
78
79
 
79
80
  prefix = "{exposure}_{outcome}_{memory_id}".format(exposure = exposure1, outcome= outcome2, memory_id = id(sumstatspair))
80
- temp_sumstats_path = "twosample_mr_{exposure}_{outcome}_{memory_id}.csv.gz".format(exposure = exposure1, outcome= outcome2, memory_id = id(sumstatspair))
81
- sumstatspair.to_csv(temp_sumstats_path ,index=None)
82
-
81
+ prefix = "{}{}".format(out.rstrip('/') + "/",prefix)
82
+ temp_sumstats_path = "{out}twosample_mr_{exposure}_{outcome}_{memory_id}.csv.gz".format(out=out.rstrip('/') + "/",
83
+ exposure = exposure1,
84
+ outcome= outcome2,
85
+ memory_id = id(sumstatspair))
86
+ if len(sumstatspair)>0:
87
+ sumstatspair.to_csv(temp_sumstats_path ,index=None)
88
+ else:
89
+ return 0
83
90
  ###
84
91
  calculate_r_script = ""
85
92
 
@@ -173,7 +180,10 @@ def _run_two_sample_mr(sumstatspair_object,
173
180
  directionality_test = directionality_test_script
174
181
  )
175
182
 
176
- temp_r_script_path = "_{}_{}_{}_gwaslab_2smr_temp.R".format(exposure1,outcome2,id(sumstatspair))
183
+ temp_r_script_path = "{}_{}_{}_{}_gwaslab_2smr_temp.R".format(out.rstrip('/') + "/",
184
+ exposure1,
185
+ outcome2,
186
+ id(sumstatspair))
177
187
  with open(temp_r_script_path,"w") as file:
178
188
  file.write(rscript)
179
189
 
@@ -200,6 +210,7 @@ def _run_two_sample_mr(sumstatspair_object,
200
210
  log.write(rscript)
201
211
  log.write(e.output)
202
212
  os.remove(temp_r_script_path)
213
+ log.write(" Finished running MR using twosampleMR from command line.")
203
214
 
204
215
 
205
216
 
@@ -6,10 +6,10 @@ import numpy as np
6
6
  from gwaslab.g_Log import Log
7
7
  from gwaslab.g_version import _checking_r_version
8
8
  from gwaslab.g_version import _check_susie_version
9
- from gwaslab.qc_fix_sumstats import start_to
10
- from gwaslab.qc_fix_sumstats import finished
11
- from gwaslab.util_ex_calculate_ldmatrix import _extract_variants_in_locus
12
- from gwaslab.util_in_get_sig import getsig
9
+ from gwaslab.qc.qc_fix_sumstats import start_to
10
+ from gwaslab.qc.qc_fix_sumstats import finished
11
+ from gwaslab.util.util_ex_calculate_ldmatrix import _extract_variants_in_locus
12
+ from gwaslab.util.util_in_get_sig import getsig
13
13
 
14
14
  def _run_ccgwas( sumstats_pair,
15
15
  r="Rscript",