gwaslab 3.6.6__py3-none-any.whl → 3.6.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/__init__.py +57 -47
- gwaslab/{bd_common_data.py → bd/bd_common_data.py} +10 -9
- gwaslab/bd/bd_config.py +28 -0
- gwaslab/{bd_download.py → bd/bd_download.py} +1 -1
- gwaslab/{bd_get_hapmap3.py → bd/bd_get_hapmap3.py} +9 -6
- gwaslab/bd/bd_path_manager.py +110 -0
- gwaslab/data/formatbook.json +805 -9
- gwaslab/{ldsc_irwls.py → extension/ldsc/ldsc_irwls.py} +1 -1
- gwaslab/{ldsc_regressions.py → extension/ldsc/ldsc_regressions.py} +2 -2
- gwaslab/{ldsc_sumstats.py → extension/ldsc/ldsc_sumstats.py} +2 -2
- gwaslab/{prscs_mcmc_gtb.py → extension/prscs/prscs_mcmc_gtb.py} +1 -1
- gwaslab/g_Sumstats.py +130 -96
- gwaslab/g_SumstatsMulti.py +69 -40
- gwaslab/g_SumstatsPair.py +54 -37
- gwaslab/g_SumstatsSet.py +88 -81
- gwaslab/g_SumstatsT.py +6 -6
- gwaslab/g_Sumstats_polars.py +84 -84
- gwaslab/g_meta_update.py +1 -1
- gwaslab/g_vchange_status.py +4 -4
- gwaslab/g_version.py +2 -2
- gwaslab/{hm_casting.py → hm/hm_casting.py} +4 -4
- gwaslab/{hm_casting_polars.py → hm/hm_casting_polars.py} +4 -4
- gwaslab/hm/hm_harmonize_sumstats.py +1635 -0
- gwaslab/hm_harmonize_sumstats.py +3 -8
- gwaslab/{io_load_ld.py → io/io_load_ld.py} +16 -13
- gwaslab/{io_preformat_input.py → io/io_preformat_input.py} +152 -73
- gwaslab/{io_preformat_input_polars.py → io/io_preformat_input_polars.py} +7 -7
- gwaslab/{io_read_pipcs.py → io/io_read_pipcs.py} +2 -2
- gwaslab/{io_read_tabular.py → io/io_read_tabular.py} +2 -2
- gwaslab/{io_to_formats.py → io/io_to_formats.py} +11 -8
- gwaslab/{io_to_pickle.py → io/io_to_pickle.py} +16 -1
- gwaslab/{qc_check_datatype_polars.py → qc/qc_check_datatype_polars.py} +2 -2
- gwaslab/{qc_fix_sumstats.py → qc/qc_fix_sumstats.py} +60 -33
- gwaslab/{qc_fix_sumstats_polars.py → qc/qc_fix_sumstats_polars.py} +15 -11
- gwaslab/{util_abf_finemapping.py → util/util_abf_finemapping.py} +2 -2
- gwaslab/{util_ex_calculate_ldmatrix.py → util/util_ex_calculate_ldmatrix.py} +18 -8
- gwaslab/{util_ex_calculate_prs.py → util/util_ex_calculate_prs.py} +2 -2
- gwaslab/{util_ex_ldproxyfinder.py → util/util_ex_ldproxyfinder.py} +6 -6
- gwaslab/{util_ex_ldsc.py → util/util_ex_ldsc.py} +18 -13
- gwaslab/{util_ex_match_ldmatrix.py → util/util_ex_match_ldmatrix.py} +8 -7
- gwaslab/util/util_ex_phewwas.py +117 -0
- gwaslab/{util_ex_process_h5.py → util/util_ex_process_h5.py} +2 -2
- gwaslab/{util_ex_process_ref.py → util/util_ex_process_ref.py} +2 -2
- gwaslab/{util_ex_run_2samplemr.py → util/util_ex_run_2samplemr.py} +18 -7
- gwaslab/{util_ex_run_ccgwas.py → util/util_ex_run_ccgwas.py} +4 -4
- gwaslab/{util_ex_run_clumping.py → util/util_ex_run_clumping.py} +28 -13
- gwaslab/{util_ex_run_coloc.py → util/util_ex_run_coloc.py} +22 -10
- gwaslab/{util_ex_run_hyprcoloc.py → util/util_ex_run_hyprcoloc.py} +4 -4
- gwaslab/{util_ex_run_magma.py → util/util_ex_run_magma.py} +21 -11
- gwaslab/{util_ex_run_mesusie.py → util/util_ex_run_mesusie.py} +3 -3
- gwaslab/{util_ex_run_mtag.py → util/util_ex_run_mtag.py} +50 -18
- gwaslab/{util_ex_run_prscs.py → util/util_ex_run_prscs.py} +3 -3
- gwaslab/{util_ex_run_scdrs.py → util/util_ex_run_scdrs.py} +10 -4
- gwaslab/{util_ex_run_susie.py → util/util_ex_run_susie.py} +49 -26
- gwaslab/{util_in_fill_data.py → util/util_in_fill_data.py} +1 -1
- gwaslab/{util_in_filter_value.py → util/util_in_filter_value.py} +18 -11
- gwaslab/{util_in_get_sig.py → util/util_in_get_sig.py} +15 -13
- gwaslab/{util_in_meta.py → util/util_in_meta.py} +1 -1
- gwaslab/{util_in_meta_polars.py → util/util_in_meta_polars.py} +1 -1
- gwaslab/{viz_aux_annotate_plot.py → viz/viz_aux_annotate_plot.py} +1 -1
- gwaslab/{viz_aux_quickfix.py → viz/viz_aux_quickfix.py} +2 -2
- gwaslab/{viz_plot_compare_af.py → viz/viz_plot_compare_af.py} +1 -1
- gwaslab/{viz_plot_compare_effect.py → viz/viz_plot_compare_effect.py} +16 -8
- gwaslab/{viz_plot_credible_sets.py → viz/viz_plot_credible_sets.py} +6 -6
- gwaslab/{viz_plot_effect.py → viz/viz_plot_effect.py} +37 -69
- gwaslab/{viz_plot_miamiplot.py → viz/viz_plot_miamiplot.py} +28 -20
- gwaslab/{viz_plot_miamiplot2.py → viz/viz_plot_miamiplot2.py} +27 -22
- gwaslab/{viz_plot_mqqplot.py → viz/viz_plot_mqqplot.py} +48 -38
- gwaslab/{viz_plot_phe_heatmap.py → viz/viz_plot_phe_heatmap.py} +18 -15
- gwaslab/{viz_plot_qqplot.py → viz/viz_plot_qqplot.py} +4 -2
- gwaslab/{viz_plot_regional2.py → viz/viz_plot_regional2.py} +11 -9
- gwaslab/{viz_plot_regionalplot.py → viz/viz_plot_regionalplot.py} +5 -4
- gwaslab/{viz_plot_rg_heatmap.py → viz/viz_plot_rg_heatmap.py} +1 -1
- gwaslab/{viz_plot_scatter_with_reg.py → viz/viz_plot_scatter_with_reg.py} +10 -7
- gwaslab/{viz_plot_stackedregional.py → viz/viz_plot_stackedregional.py} +67 -33
- gwaslab/{viz_plot_trumpetplot.py → viz/viz_plot_trumpetplot.py} +11 -9
- {gwaslab-3.6.6.dist-info → gwaslab-3.6.7.dist-info}/METADATA +1 -1
- gwaslab-3.6.7.dist-info/RECORD +123 -0
- gwaslab/bd_config.py +0 -18
- gwaslab-3.6.6.dist-info/RECORD +0 -120
- /gwaslab/{ldsc_jackknife.py → extension/ldsc/ldsc_jackknife.py} +0 -0
- /gwaslab/{ldsc_ldscore.py → extension/ldsc/ldsc_ldscore.py} +0 -0
- /gwaslab/{ldsc_parse.py → extension/ldsc/ldsc_parse.py} +0 -0
- /gwaslab/{prscs_gigrnd.py → extension/prscs/prscs_gigrnd.py} +0 -0
- /gwaslab/{prscs_parse_genet.py → extension/prscs/prscs_parse_genet.py} +0 -0
- /gwaslab/{hm_rsid_to_chrpos.py → hm/hm_rsid_to_chrpos.py} +0 -0
- /gwaslab/{io_process_args.py → io/io_process_args.py} +0 -0
- /gwaslab/{io_read_ldsc.py → io/io_read_ldsc.py} +0 -0
- /gwaslab/{qc_build.py → qc/qc_build.py} +0 -0
- /gwaslab/{qc_check_datatype.py → qc/qc_check_datatype.py} +0 -0
- /gwaslab/{util_ex_gwascatalog.py → util/util_ex_gwascatalog.py} +0 -0
- /gwaslab/{util_ex_infer_ancestry.py → util/util_ex_infer_ancestry.py} +0 -0
- /gwaslab/{util_ex_plink_filter.py → util/util_ex_plink_filter.py} +0 -0
- /gwaslab/{util_in_calculate_gc.py → util/util_in_calculate_gc.py} +0 -0
- /gwaslab/{util_in_calculate_power.py → util/util_in_calculate_power.py} +0 -0
- /gwaslab/{util_in_convert_h2.py → util/util_in_convert_h2.py} +0 -0
- /gwaslab/{util_in_correct_winnerscurse.py → util/util_in_correct_winnerscurse.py} +0 -0
- /gwaslab/{util_in_estimate_ess.py → util/util_in_estimate_ess.py} +0 -0
- /gwaslab/{util_in_get_density.py → util/util_in_get_density.py} +0 -0
- /gwaslab/{util_in_merge.py → util/util_in_merge.py} +0 -0
- /gwaslab/{util_in_snphwe.py → util/util_in_snphwe.py} +0 -0
- /gwaslab/{viz_aux_chromatin.py → viz/viz_aux_chromatin.py} +0 -0
- /gwaslab/{viz_aux_property.py → viz/viz_aux_property.py} +0 -0
- /gwaslab/{viz_aux_reposition_text.py → viz/viz_aux_reposition_text.py} +0 -0
- /gwaslab/{viz_aux_save_figure.py → viz/viz_aux_save_figure.py} +0 -0
- /gwaslab/{viz_plot_forestplot.py → viz/viz_plot_forestplot.py} +0 -0
- {gwaslab-3.6.6.dist-info → gwaslab-3.6.7.dist-info}/WHEEL +0 -0
- {gwaslab-3.6.6.dist-info → gwaslab-3.6.7.dist-info}/licenses/LICENSE +0 -0
- {gwaslab-3.6.6.dist-info → gwaslab-3.6.7.dist-info}/licenses/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.6.6.dist-info → gwaslab-3.6.7.dist-info}/top_level.txt +0 -0
|
@@ -4,9 +4,9 @@ import gc
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import numpy as np
|
|
6
6
|
from gwaslab.g_Log import Log
|
|
7
|
-
from gwaslab.util_in_filter_value import _exclude_hla
|
|
7
|
+
from gwaslab.util.util_in_filter_value import _exclude_hla
|
|
8
8
|
|
|
9
|
-
def _run_magma(
|
|
9
|
+
def _run_magma(gls,
|
|
10
10
|
magma="magma",
|
|
11
11
|
study="Study1",
|
|
12
12
|
exclude_hla=True,
|
|
@@ -15,7 +15,7 @@ def _run_magma(sumstats,
|
|
|
15
15
|
ref=None,
|
|
16
16
|
ncbi=None,
|
|
17
17
|
set_annot=None,
|
|
18
|
-
out=
|
|
18
|
+
out=None,
|
|
19
19
|
delete=True,
|
|
20
20
|
ncol="N",
|
|
21
21
|
build="19",
|
|
@@ -24,33 +24,42 @@ def _run_magma(sumstats,
|
|
|
24
24
|
|
|
25
25
|
log.write(" Start to run magma from command line:", verbose=verbose)
|
|
26
26
|
|
|
27
|
+
sumstats = gls.data
|
|
28
|
+
gls.offload()
|
|
27
29
|
if exclude_hla==True:
|
|
28
30
|
sumstats = _exclude_hla(sumstats, build =build)
|
|
31
|
+
|
|
32
|
+
if out is None:
|
|
33
|
+
out = os.path.join("./", study)
|
|
34
|
+
else:
|
|
35
|
+
out = os.path.join(out, study)
|
|
29
36
|
|
|
30
|
-
snploc="{}
|
|
31
|
-
pval="{}
|
|
37
|
+
snploc="{}.rsid.chr.pos.tsv".format(out)
|
|
38
|
+
pval="{}.rsid.p.n.tsv".format(out)
|
|
32
39
|
|
|
33
40
|
log.write(f" -writing temp file for --snp-loc:{snploc}", verbose=verbose)
|
|
34
|
-
sumstats.dropna()[[id_to_use,"CHR","POS"]].rename(columns={id_to_use:"SNP"}).to_csv("{}
|
|
41
|
+
sumstats.dropna()[[id_to_use,"CHR","POS"]].rename(columns={id_to_use:"SNP"}).to_csv("{}.rsid.chr.pos.tsv".format(out),index=None, sep="\t")
|
|
35
42
|
|
|
36
43
|
log.write(f" -writing temp file for --pval:{pval}", verbose=verbose)
|
|
37
|
-
sumstats.dropna()[[id_to_use,"P","N"]].rename(columns={id_to_use:"SNP"}).to_csv("{}
|
|
44
|
+
sumstats.dropna()[[id_to_use,"P","N"]].rename(columns={id_to_use:"SNP"}).to_csv("{}.rsid.p.n.tsv".format(out),index=None, sep="\t")
|
|
38
45
|
|
|
39
46
|
log.write(f" --annotate window: {window}", verbose=verbose)
|
|
40
47
|
log.write(f" --gene-loc: {ncbi}", verbose=verbose)
|
|
41
48
|
log.write(f" --bfile: {ref}", verbose=verbose)
|
|
42
49
|
log.write(f" Output prefix: {out}", verbose=verbose)
|
|
43
|
-
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
44
53
|
bash_script=f'''#!/bin/bash
|
|
45
54
|
|
|
46
|
-
{magma} --annotate window={window} --snp-loc {snploc} --gene-loc {ncbi} --out {
|
|
55
|
+
{magma} --annotate window={window} --snp-loc {snploc} --gene-loc {ncbi} --out {out}
|
|
47
56
|
|
|
48
|
-
{magma} --bfile {ref} --pval {pval} ncol={ncol} --gene-annot {
|
|
57
|
+
{magma} --bfile {ref} --pval {pval} ncol={ncol} --gene-annot {out}.genes.annot --out {out}
|
|
49
58
|
'''
|
|
50
59
|
|
|
51
60
|
if set_annot is not None:
|
|
52
61
|
bash_script+=f'''
|
|
53
|
-
{magma} --gene-results {
|
|
62
|
+
{magma} --gene-results {out}.genes.raw --set-annot {set_annot} --out {out}
|
|
54
63
|
'''
|
|
55
64
|
log.write(f"Script: {bash_script}")
|
|
56
65
|
|
|
@@ -67,5 +76,6 @@ def _run_magma(sumstats,
|
|
|
67
76
|
log.warning("ERROR!")
|
|
68
77
|
log.write(e.output)
|
|
69
78
|
|
|
79
|
+
gls.reload()
|
|
70
80
|
log.write("Finished running magma.", verbose=verbose)
|
|
71
81
|
|
|
@@ -6,9 +6,9 @@ import numpy as np
|
|
|
6
6
|
from gwaslab.g_Log import Log
|
|
7
7
|
from gwaslab.g_version import _checking_r_version
|
|
8
8
|
from gwaslab.g_version import _check_susie_version
|
|
9
|
-
from gwaslab.qc_fix_sumstats import start_to
|
|
10
|
-
from gwaslab.qc_fix_sumstats import finished
|
|
11
|
-
from gwaslab.viz_plot_stackedregional import _sort_args
|
|
9
|
+
from gwaslab.qc.qc_fix_sumstats import start_to
|
|
10
|
+
from gwaslab.qc.qc_fix_sumstats import finished
|
|
11
|
+
from gwaslab.viz.viz_plot_stackedregional import _sort_args
|
|
12
12
|
|
|
13
13
|
def _run_mesusie(filepath,
|
|
14
14
|
r="Rscript",
|
|
@@ -4,19 +4,22 @@ import gc
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import numpy as np
|
|
6
6
|
from gwaslab.g_Log import Log
|
|
7
|
+
from gwaslab.bd.bd_path_manager import _path
|
|
7
8
|
|
|
8
9
|
def _run_mtag( sumstats_multi,
|
|
9
|
-
python="
|
|
10
|
+
python="python",
|
|
10
11
|
mtag="",
|
|
11
12
|
study="Group1",
|
|
13
|
+
special_flags="",
|
|
14
|
+
ld_ref_panel = None,
|
|
12
15
|
traits=None,
|
|
13
16
|
out_prefix=None,
|
|
14
|
-
|
|
17
|
+
perfect_gencov = False,
|
|
18
|
+
equal_h2 = False,
|
|
19
|
+
no_overlap = False,
|
|
20
|
+
fdr=False,
|
|
15
21
|
n_min=0,
|
|
16
|
-
loci=None,
|
|
17
22
|
nstudy=2,
|
|
18
|
-
windowsizekb=1000,
|
|
19
|
-
build="99",
|
|
20
23
|
log=Log(),
|
|
21
24
|
verbose=True):
|
|
22
25
|
|
|
@@ -49,16 +52,37 @@ def _run_mtag( sumstats_multi,
|
|
|
49
52
|
"N_{}".format( i+1) :"n",
|
|
50
53
|
|
|
51
54
|
}
|
|
55
|
+
csv_path = _path(study = study,
|
|
56
|
+
trait = traits_to_form_string[i],
|
|
57
|
+
suffix="tsv.gz")
|
|
58
|
+
|
|
59
|
+
sumstats_multi.data[output_snp_info_cols+ output_stats_cols].rename(columns=rename_dict).to_csv(csv_path, index=None,sep="\t")
|
|
60
|
+
sumstats_paths.append(csv_path)
|
|
52
61
|
|
|
53
|
-
|
|
54
|
-
sumstats_paths.append("{}_{}.tsv.gz".format(study, traits_to_form_string[i]))
|
|
62
|
+
sumstats_multi.offload()
|
|
55
63
|
|
|
56
64
|
python_log=""
|
|
57
65
|
if out_prefix is None:
|
|
58
|
-
out_prefix =
|
|
59
|
-
|
|
66
|
+
out_prefix = _path(study=study,
|
|
67
|
+
nstudy = nstudy)
|
|
68
|
+
|
|
69
|
+
#out_prefix = "./{study}_{nstudy}studies".format(study=study, nstudy=nstudy)
|
|
70
|
+
if ld_ref_panel is not None:
|
|
71
|
+
ld_ref_flag = "--ld_ref_panel {}".format(ld_ref_panel)
|
|
72
|
+
else:
|
|
73
|
+
ld_ref_flag=""
|
|
74
|
+
|
|
75
|
+
if perfect_gencov == True:
|
|
76
|
+
special_flags += "--perfect_gencov "
|
|
77
|
+
if equal_h2 == True:
|
|
78
|
+
special_flags += "--equal_h2 "
|
|
79
|
+
if no_overlap == True:
|
|
80
|
+
special_flags += "--no_overlap "
|
|
81
|
+
if fdr == True:
|
|
82
|
+
special_flags += "--fdr "
|
|
83
|
+
|
|
60
84
|
script='''
|
|
61
|
-
{python} {mtag} \
|
|
85
|
+
{python} {mtag} {special_flags} {ld_ref_flag} \
|
|
62
86
|
--sumstats {sumstats_paths_string} \
|
|
63
87
|
--out {out_prefix} \
|
|
64
88
|
--n_min {n_min} \
|
|
@@ -67,26 +91,34 @@ def _run_mtag( sumstats_multi,
|
|
|
67
91
|
python=python,
|
|
68
92
|
n_min=n_min,
|
|
69
93
|
mtag=mtag,
|
|
94
|
+
special_flags=special_flags,
|
|
70
95
|
out_prefix=out_prefix,
|
|
96
|
+
ld_ref_flag=ld_ref_flag,
|
|
71
97
|
sumstats_paths_string = ",".join(sumstats_paths)
|
|
72
98
|
)
|
|
73
|
-
log.write("
|
|
99
|
+
log.write("MTAG script: {} ".format(script), verbose=verbose)
|
|
74
100
|
|
|
75
|
-
|
|
76
|
-
|
|
101
|
+
temp_script_path = _path(tmp=True,
|
|
102
|
+
study=study,
|
|
103
|
+
analysis="mtag",
|
|
104
|
+
suffix="sh"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
with open(temp_script_path,"w") as file:
|
|
77
108
|
file.write(script)
|
|
78
109
|
|
|
79
|
-
os.chmod(
|
|
80
|
-
|
|
81
|
-
script_run = "./_{}_gwaslab_mtag_temp.sh".format(study)
|
|
110
|
+
os.chmod(temp_script_path, 0o700)
|
|
82
111
|
|
|
83
112
|
try:
|
|
84
|
-
log.write(" Running MTAG from command line...", verbose=verbose)
|
|
85
|
-
output = subprocess.check_output(
|
|
113
|
+
log.write(" -Running MTAG from command line...", verbose=verbose)
|
|
114
|
+
output = subprocess.check_output(os.path.join(temp_script_path)
|
|
115
|
+
,stderr=subprocess.STDOUT, shell=True,text=True)
|
|
86
116
|
log.write(output)
|
|
87
117
|
python_log+= output + "\n"
|
|
88
118
|
|
|
89
119
|
except subprocess.CalledProcessError as e:
|
|
90
120
|
log.write(e.output)
|
|
91
121
|
|
|
122
|
+
sumstats_multi.reload()
|
|
123
|
+
|
|
92
124
|
log.write("Finished MTAG.", verbose=verbose)
|
|
@@ -20,9 +20,9 @@ import os
|
|
|
20
20
|
import sys
|
|
21
21
|
import getopt
|
|
22
22
|
|
|
23
|
-
import gwaslab.prscs_parse_genet as parse_genet
|
|
24
|
-
import gwaslab.prscs_mcmc_gtb as mcmc_gtb
|
|
25
|
-
import gwaslab.prscs_gigrnd as gigrnd
|
|
23
|
+
import gwaslab.extension.prscs.prscs_parse_genet as parse_genet
|
|
24
|
+
import gwaslab.extension.prscs.prscs_mcmc_gtb as mcmc_gtb
|
|
25
|
+
import gwaslab.extension.prscs.prscs_gigrnd as gigrnd
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def _run_prscs(
|
|
@@ -5,7 +5,8 @@ import pandas as pd
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
from gwaslab.g_Log import Log
|
|
7
7
|
|
|
8
|
-
def _run_scdrs(
|
|
8
|
+
def _run_scdrs( gls,
|
|
9
|
+
scdrs="scdrs",
|
|
9
10
|
python="python",
|
|
10
11
|
study="Study1",
|
|
11
12
|
conda_env=None,
|
|
@@ -32,16 +33,21 @@ def _run_scdrs( scdrs="scdrs",
|
|
|
32
33
|
|
|
33
34
|
log.write(" Start to run scDRS from command line:", verbose=verbose)
|
|
34
35
|
|
|
36
|
+
log.write(f" Output prefix: {out}", verbose=verbose)
|
|
37
|
+
gls.offload()
|
|
35
38
|
trait = study
|
|
39
|
+
|
|
36
40
|
if out_file is None:
|
|
37
41
|
out_file = f"./{trait}.gs"
|
|
42
|
+
out_file = os.path.join(out, out_file)
|
|
38
43
|
if out_folder is None:
|
|
39
|
-
out_folder =
|
|
44
|
+
out_folder = out
|
|
45
|
+
|
|
40
46
|
if conda_env is not None:
|
|
41
47
|
conda_env_string = f"conda init bash\n conda activate {conda_env}\n"
|
|
42
48
|
else:
|
|
43
49
|
conda_env_string=""
|
|
44
|
-
|
|
50
|
+
|
|
45
51
|
|
|
46
52
|
if group_analysis is not None:
|
|
47
53
|
analysis_string = f"--group-analysis {group_analysis} "
|
|
@@ -104,5 +110,5 @@ def _run_scdrs( scdrs="scdrs",
|
|
|
104
110
|
except subprocess.CalledProcessError as e:
|
|
105
111
|
log.warning("ERROR!")
|
|
106
112
|
log.write(e.output)
|
|
107
|
-
|
|
113
|
+
gls.reload()
|
|
108
114
|
log.write("Finished running scDRS.", verbose=verbose)
|
|
@@ -6,22 +6,23 @@ import numpy as np
|
|
|
6
6
|
from gwaslab.g_Log import Log
|
|
7
7
|
from gwaslab.g_version import _checking_r_version
|
|
8
8
|
from gwaslab.g_version import _check_susie_version
|
|
9
|
-
from gwaslab.qc_fix_sumstats import start_to
|
|
10
|
-
from gwaslab.qc_fix_sumstats import finished
|
|
9
|
+
from gwaslab.qc.qc_fix_sumstats import start_to
|
|
10
|
+
from gwaslab.qc.qc_fix_sumstats import finished
|
|
11
11
|
|
|
12
|
-
def _run_susie_rss(
|
|
12
|
+
def _run_susie_rss(gls,
|
|
13
|
+
filepath,
|
|
13
14
|
r="Rscript",
|
|
14
15
|
mode="bs",
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
16
|
+
out=None,
|
|
17
|
+
max_iter=100,
|
|
18
|
+
min_abs_corr=0.5,
|
|
19
|
+
refine="FALSE",
|
|
18
20
|
L=10,
|
|
19
21
|
fillldna=True,
|
|
20
22
|
n=None,
|
|
21
23
|
delete=False, #if delete output file
|
|
22
24
|
susie_args="",
|
|
23
25
|
log=Log(),
|
|
24
|
-
main_sumstats=None,
|
|
25
26
|
verbose=True):
|
|
26
27
|
##start function with col checking##########################################################
|
|
27
28
|
_start_line = "run finemapping using SuSieR from command line"
|
|
@@ -44,7 +45,9 @@ def _run_susie_rss(filepath,
|
|
|
44
45
|
log.write(" -File path is None.")
|
|
45
46
|
log.write("Finished finemapping using SuSieR.")
|
|
46
47
|
return pd.DataFrame()
|
|
47
|
-
|
|
48
|
+
|
|
49
|
+
gls.offload()
|
|
50
|
+
|
|
48
51
|
filelist = pd.read_csv(filepath,sep="\t")
|
|
49
52
|
r_log=""
|
|
50
53
|
# write R script
|
|
@@ -52,38 +55,49 @@ def _run_susie_rss(filepath,
|
|
|
52
55
|
|
|
53
56
|
log = _checking_r_version(r, log)
|
|
54
57
|
log = _check_susie_version(r,log)
|
|
55
|
-
|
|
58
|
+
|
|
56
59
|
for index, row in filelist.iterrows():
|
|
57
60
|
gc.collect()
|
|
58
61
|
study = row["STUDY"]
|
|
59
62
|
ld_r_matrix = row["LD_R_MATRIX"] #ld matrix path
|
|
60
63
|
sumstats = row["LOCUS_SUMSTATS"] #sumsttas path
|
|
61
|
-
|
|
64
|
+
|
|
65
|
+
# out: directory for output files
|
|
66
|
+
if out is None:
|
|
67
|
+
output_prefix = sumstats.replace(".sumstats.gz","")
|
|
68
|
+
else:
|
|
69
|
+
output_prefix = os.path.join(out, os.path.basename(sumstats.replace(".sumstats.gz","")))
|
|
70
|
+
|
|
62
71
|
log.write(" -Running for: {} - {}".format(row["SNPID"],row["STUDY"] ))
|
|
63
72
|
log.write(" -Locus sumstats:{}".format(sumstats))
|
|
64
73
|
log.write(" -LD r matrix:{}".format(ld_r_matrix))
|
|
65
74
|
log.write(" -output_prefix:{}".format(output_prefix))
|
|
66
75
|
|
|
67
76
|
rscript='''
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
77
|
+
library(susieR)
|
|
78
|
+
|
|
79
|
+
sumstats <- read.csv("{}",sep="\t")
|
|
80
|
+
|
|
81
|
+
R <- as.matrix(read.csv("{}",sep="\t",header=FALSE))
|
|
82
|
+
{}
|
|
83
|
+
|
|
84
|
+
n <- floor(mean(sumstats$N))
|
|
74
85
|
|
|
75
|
-
|
|
86
|
+
fitted_rss1 <- susie_rss({}, n = {}, R = R, max_iter = {}, min_abs_corr={}, refine = {}, L = {}{})
|
|
76
87
|
|
|
77
|
-
|
|
88
|
+
susie_fitted_summary <- summary(fitted_rss1)
|
|
78
89
|
|
|
79
|
-
|
|
90
|
+
output <- susie_fitted_summary$vars
|
|
91
|
+
output$SNPID <- sumstats$SNPID[susie_fitted_summary$vars$variable]
|
|
92
|
+
output$LOCUS <- "{}"
|
|
93
|
+
output$STUDY <- "{}"
|
|
80
94
|
|
|
81
|
-
|
|
82
|
-
output$SNPID <- sumstats$SNPID[susie_fitted_summary$vars$variable]
|
|
83
|
-
output$LOCUS <- "{}"
|
|
84
|
-
output$STUDY <- "{}"
|
|
95
|
+
write.csv(output, "{}.pipcs", row.names = FALSE)
|
|
85
96
|
|
|
86
|
-
|
|
97
|
+
png(filename="{}_diagnostic.png")
|
|
98
|
+
diagnostic <- kriging_rss({}, R, n=n)
|
|
99
|
+
diagnostic$plot
|
|
100
|
+
dev.off()
|
|
87
101
|
'''.format(sumstats,
|
|
88
102
|
ld_r_matrix,
|
|
89
103
|
"R[is.na(R)] <- 0" if fillldna==True else "",
|
|
@@ -96,7 +110,9 @@ def _run_susie_rss(filepath,
|
|
|
96
110
|
susie_args,
|
|
97
111
|
row["SNPID"],
|
|
98
112
|
row["STUDY"],
|
|
99
|
-
output_prefix
|
|
113
|
+
output_prefix,
|
|
114
|
+
output_prefix,
|
|
115
|
+
"sumstats$Z" if mode=="z" else "sumstats$BETA/sumstats$SE")
|
|
100
116
|
susier_line = "susie_rss({}, n = {}, R = R, max_iter = {}, min_abs_corr={}, refine = {}, L = {}{})".format("z= sumstats$Z," if mode=="z" else "bhat = sumstats$BETA,shat = sumstats$SE,",
|
|
101
117
|
n if n is not None else "n",
|
|
102
118
|
max_iter,
|
|
@@ -106,7 +122,12 @@ def _run_susie_rss(filepath,
|
|
|
106
122
|
susie_args)
|
|
107
123
|
log.write(" -SuSieR script: {}".format(susier_line))
|
|
108
124
|
|
|
125
|
+
# temporary R script path
|
|
109
126
|
temp_r_path = "_{}_{}_{}_gwaslab_susie_temp.R".format(study,row["SNPID"],id(sumstats))
|
|
127
|
+
if out is not None:
|
|
128
|
+
temp_r_path = os.path.join(out, temp_r_path)
|
|
129
|
+
|
|
130
|
+
|
|
110
131
|
log.write(" -Createing temp R script: {}".format(temp_r_path))
|
|
111
132
|
with open(temp_r_path,"w") as file:
|
|
112
133
|
file.write(rscript)
|
|
@@ -140,8 +161,10 @@ def _run_susie_rss(filepath,
|
|
|
140
161
|
os.remove(temp_r_path)
|
|
141
162
|
log.write(" -Removing temp R script: {}".format(temp_r_path))
|
|
142
163
|
|
|
164
|
+
gls.reload()
|
|
165
|
+
|
|
143
166
|
locus_pip_cs = locus_pip_cs.rename(columns={"variable":"N_SNP","variable_prob":"PIP","cs":"CREDIBLE_SET_INDEX"})
|
|
144
|
-
locus_pip_cs = pd.merge(locus_pip_cs,
|
|
167
|
+
locus_pip_cs = pd.merge(locus_pip_cs, gls.data[["SNPID","CHR","POS"]], on="SNPID",how="left")
|
|
145
168
|
|
|
146
169
|
finished(log=log, verbose=verbose, end_line=_end_line)
|
|
147
170
|
return locus_pip_cs
|
|
@@ -7,7 +7,7 @@ from gwaslab.g_Log import Log
|
|
|
7
7
|
import gc
|
|
8
8
|
#from gwaslab.qc_fix_sumstats import sortcolumn
|
|
9
9
|
from gwaslab.g_version import _get_version
|
|
10
|
-
from gwaslab.qc_check_datatype import check_datatype
|
|
10
|
+
from gwaslab.qc.qc_check_datatype import check_datatype
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def filldata(
|
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
import re
|
|
2
|
-
#import modin.pandas as pd
|
|
3
2
|
import pandas as pd
|
|
4
3
|
import numpy as np
|
|
5
4
|
from os import path
|
|
6
|
-
from
|
|
7
|
-
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
8
7
|
from gwaslab.g_Log import Log
|
|
9
8
|
from gwaslab.g_vchange_status import vchange_status
|
|
10
|
-
|
|
11
|
-
from gwaslab.qc_fix_sumstats import
|
|
12
|
-
from gwaslab.qc_fix_sumstats import
|
|
13
|
-
from gwaslab.qc_fix_sumstats import
|
|
14
|
-
from gwaslab.
|
|
9
|
+
|
|
10
|
+
from gwaslab.qc.qc_fix_sumstats import sortcoordinate
|
|
11
|
+
from gwaslab.qc.qc_fix_sumstats import start_to
|
|
12
|
+
from gwaslab.qc.qc_fix_sumstats import finished
|
|
13
|
+
from gwaslab.qc.qc_fix_sumstats import _process_build
|
|
14
|
+
|
|
15
|
+
from gwaslab.bd.bd_common_data import get_high_ld
|
|
16
|
+
from gwaslab.bd.bd_common_data import get_chr_to_number
|
|
17
|
+
|
|
18
|
+
from gwaslab.hm.hm_harmonize_sumstats import is_palindromic
|
|
15
19
|
|
|
16
20
|
import gc
|
|
17
21
|
def filtervalues(sumstats,expr,remove=False,verbose=True,log=Log()):
|
|
@@ -221,6 +225,8 @@ def inferbuild(sumstats,status="STATUS",chrom="CHR", pos="POS",
|
|
|
221
225
|
ea="EA", nea="NEA",build="19",
|
|
222
226
|
change_status=True,
|
|
223
227
|
verbose=True,log=Log()):
|
|
228
|
+
|
|
229
|
+
|
|
224
230
|
##start function with col checking##########################################################
|
|
225
231
|
_start_line = "infer genome build version using hapmap3 SNPs"
|
|
226
232
|
_end_line = "inferring genome build version using hapmap3 SNPs"
|
|
@@ -241,8 +247,10 @@ def inferbuild(sumstats,status="STATUS",chrom="CHR", pos="POS",
|
|
|
241
247
|
|
|
242
248
|
inferred_build="Unknown"
|
|
243
249
|
log.write("Start to infer genome build version using hapmap3 SNPs...", verbose=verbose)
|
|
244
|
-
|
|
245
|
-
|
|
250
|
+
|
|
251
|
+
data_path_19 = path.join( Path(__file__).parents[1], "data","hapmap3_SNPs","hapmap3_db150_hg19.snplist.gz")
|
|
252
|
+
data_path_38 = path.join( Path(__file__).parents[1], "data","hapmap3_SNPs","hapmap3_db151_hg38.snplist.gz")
|
|
253
|
+
|
|
246
254
|
log.write(" -Loading Hapmap3 variants data...", verbose=verbose)
|
|
247
255
|
hapmap3_ref_19 = pd.read_csv(data_path_19,sep="\s+",usecols=["#CHROM","POS"],dtype={"#CHROM":"string","POS":"string"})
|
|
248
256
|
hapmap3_ref_38 = pd.read_csv(data_path_38,sep="\s+",usecols=["#CHROM","POS"],dtype={"#CHROM":"string","POS":"string"})
|
|
@@ -266,7 +274,6 @@ def inferbuild(sumstats,status="STATUS",chrom="CHR", pos="POS",
|
|
|
266
274
|
log.write(" -Since num_hg19 >> num_hg38, assigning genome build hg19...", verbose=verbose)
|
|
267
275
|
if change_status==True:
|
|
268
276
|
sumstats[status] = vchange_status(sumstats[status],1,"9","1")
|
|
269
|
-
sumstats[status] = vchange_status(sumstats[status],2,"9","9")
|
|
270
277
|
inferred_build="19"
|
|
271
278
|
elif match_count_for_19 < match_count_for_38:
|
|
272
279
|
log.write(" -Since num_hg19 << num_hg38, assigning genome build hg38...", verbose=verbose)
|
|
@@ -5,20 +5,22 @@ import gc
|
|
|
5
5
|
from pyensembl import EnsemblRelease
|
|
6
6
|
from pyensembl import Genome
|
|
7
7
|
from os import path
|
|
8
|
-
from gwaslab.util_in_fill_data import fill_p
|
|
9
8
|
from gwaslab.g_Log import Log
|
|
10
|
-
|
|
11
|
-
from gwaslab.bd_common_data import
|
|
12
|
-
from gwaslab.bd_common_data import
|
|
13
|
-
from gwaslab.bd_common_data import
|
|
14
|
-
from gwaslab.bd_common_data import
|
|
15
|
-
from gwaslab.
|
|
16
|
-
from gwaslab.
|
|
17
|
-
|
|
18
|
-
from gwaslab.qc_fix_sumstats import
|
|
19
|
-
from gwaslab.qc_fix_sumstats import
|
|
20
|
-
from gwaslab.
|
|
21
|
-
from gwaslab.
|
|
9
|
+
|
|
10
|
+
from gwaslab.bd.bd_common_data import get_chr_to_number
|
|
11
|
+
from gwaslab.bd.bd_common_data import get_number_to_chr
|
|
12
|
+
from gwaslab.bd.bd_common_data import get_chr_to_NC
|
|
13
|
+
from gwaslab.bd.bd_common_data import gtf_to_protein_coding
|
|
14
|
+
from gwaslab.bd.bd_common_data import gtf_to_all_gene
|
|
15
|
+
from gwaslab.bd.bd_download import check_and_download
|
|
16
|
+
|
|
17
|
+
from gwaslab.qc.qc_fix_sumstats import check_dataframe_shape
|
|
18
|
+
from gwaslab.qc.qc_fix_sumstats import start_to
|
|
19
|
+
from gwaslab.qc.qc_fix_sumstats import finished
|
|
20
|
+
from gwaslab.qc.qc_build import _check_build
|
|
21
|
+
from gwaslab.util.util_in_correct_winnerscurse import wc_correct
|
|
22
|
+
from gwaslab.util.util_ex_gwascatalog import gwascatalog_trait
|
|
23
|
+
from gwaslab.util.util_in_fill_data import fill_p
|
|
22
24
|
# getsig
|
|
23
25
|
# closest_gene
|
|
24
26
|
# annogene
|
|
@@ -4,7 +4,7 @@ import numpy as np
|
|
|
4
4
|
from scipy.stats.distributions import chi2
|
|
5
5
|
from scipy.stats import norm
|
|
6
6
|
from gwaslab.g_Log import Log
|
|
7
|
-
from gwaslab.io_to_pickle import load_data_from_pickle
|
|
7
|
+
from gwaslab.io.io_to_pickle import load_data_from_pickle
|
|
8
8
|
from gwaslab.g_Sumstats import Sumstats
|
|
9
9
|
import gc
|
|
10
10
|
|
|
@@ -4,7 +4,7 @@ import numpy as np
|
|
|
4
4
|
from scipy.stats.distributions import chi2
|
|
5
5
|
from scipy.stats import norm
|
|
6
6
|
from gwaslab.g_Log import Log
|
|
7
|
-
from gwaslab.io_to_pickle import load_data_from_pickle
|
|
7
|
+
from gwaslab.io.io_to_pickle import load_data_from_pickle
|
|
8
8
|
from gwaslab.g_Sumstats import Sumstats
|
|
9
9
|
import polars as pl
|
|
10
10
|
########################################################################################################################################################################################################################################################################################################################################################
|
|
@@ -9,7 +9,7 @@ from scipy import stats
|
|
|
9
9
|
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
|
|
10
10
|
from adjustText import adjust_text
|
|
11
11
|
from gwaslab.g_Log import Log
|
|
12
|
-
from gwaslab.viz_aux_reposition_text import adjust_text_position
|
|
12
|
+
from gwaslab.viz.viz_aux_reposition_text import adjust_text_position
|
|
13
13
|
from pandas.api.types import is_string_dtype
|
|
14
14
|
|
|
15
15
|
# single mqqplot
|
|
@@ -3,8 +3,8 @@ import numpy as np
|
|
|
3
3
|
from gwaslab.g_Log import Log
|
|
4
4
|
from matplotlib import ticker
|
|
5
5
|
import matplotlib.pyplot as plt
|
|
6
|
-
from gwaslab.bd_common_data import get_chr_to_number
|
|
7
|
-
from gwaslab.bd_common_data import get_number_to_chr
|
|
6
|
+
from gwaslab.bd.bd_common_data import get_chr_to_number
|
|
7
|
+
from gwaslab.bd.bd_common_data import get_number_to_chr
|
|
8
8
|
from math import ceil
|
|
9
9
|
|
|
10
10
|
def _quick_fix(sumstats, chr_dict=get_chr_to_number(), scaled=False, chrom="CHR", pos="POS", p="P", mlog10p="MLOG10P",log=Log(), verbose=True):
|
|
@@ -4,7 +4,7 @@ import matplotlib.pyplot as plt
|
|
|
4
4
|
import scipy.stats as ss
|
|
5
5
|
import seaborn as sns
|
|
6
6
|
from gwaslab.g_Log import Log
|
|
7
|
-
from gwaslab.viz_aux_save_figure import save_figure
|
|
7
|
+
from gwaslab.viz.viz_aux_save_figure import save_figure
|
|
8
8
|
|
|
9
9
|
################################################################################################################################
|
|
10
10
|
def plotdaf(sumstats,
|
|
@@ -8,15 +8,18 @@ import math
|
|
|
8
8
|
import scipy.stats as ss
|
|
9
9
|
from matplotlib.patches import Rectangle
|
|
10
10
|
from adjustText import adjust_text
|
|
11
|
-
from gwaslab.viz_aux_save_figure import save_figure
|
|
12
|
-
from gwaslab.util_in_get_sig import getsig
|
|
13
|
-
from gwaslab.util_in_get_sig import annogene
|
|
14
11
|
from gwaslab.g_Log import Log
|
|
15
|
-
from gwaslab.util_in_correct_winnerscurse import wc_correct
|
|
16
|
-
from gwaslab.util_in_correct_winnerscurse import wc_correct_test
|
|
17
12
|
from gwaslab.g_Sumstats import Sumstats
|
|
18
|
-
|
|
19
|
-
from gwaslab.
|
|
13
|
+
|
|
14
|
+
from gwaslab.viz.viz_aux_save_figure import save_figure
|
|
15
|
+
|
|
16
|
+
from gwaslab.util.util_in_get_sig import getsig
|
|
17
|
+
from gwaslab.util.util_in_get_sig import annogene
|
|
18
|
+
from gwaslab.util.util_in_correct_winnerscurse import wc_correct
|
|
19
|
+
from gwaslab.util.util_in_correct_winnerscurse import wc_correct_test
|
|
20
|
+
|
|
21
|
+
from gwaslab.io.io_process_args import _merge_and_sync_dic
|
|
22
|
+
from gwaslab.io.io_process_args import _extract_kwargs
|
|
20
23
|
#20220422
|
|
21
24
|
def compare_effect(path1,
|
|
22
25
|
path2,
|
|
@@ -91,13 +94,17 @@ def compare_effect(path1,
|
|
|
91
94
|
exponent = math.floor(math.log10(sig_level))
|
|
92
95
|
mantissa = sig_level / 10**exponent
|
|
93
96
|
|
|
94
|
-
legend_title = '$\mathregular{ P < {} x 10^{{{}}}}$ in:'.format(mantissa, exponent)
|
|
97
|
+
legend_title = '$\mathregular{{ P < {} x 10^{{{}}} }}$ in:'.format(mantissa, exponent)
|
|
95
98
|
|
|
99
|
+
# what method to use for correction
|
|
96
100
|
if is_q_mc=="fdr" or is_q_mc=="bon":
|
|
97
101
|
is_q = True
|
|
102
|
+
|
|
103
|
+
# if heterogeneity test
|
|
98
104
|
if is_q == True:
|
|
99
105
|
if is_q_mc not in [False,"fdr","bon","non"]:
|
|
100
106
|
raise ValueError('Please select either "fdr" or "bon" or "non"/False for is_q_mc.')
|
|
107
|
+
|
|
101
108
|
if save_args is None:
|
|
102
109
|
save_args = {"dpi":300,"facecolor":"white"}
|
|
103
110
|
if reg_box is None:
|
|
@@ -985,6 +992,7 @@ def test_q(df,beta1,se1,beta2,se2,q_level=0.05,is_q_mc=False, log=Log(), verbose
|
|
|
985
992
|
log.write(" -Bonferroni correction applied...", verbose=verbose)
|
|
986
993
|
df[rawpq] = df[pq]
|
|
987
994
|
df[pq] = df[pq] * len(df[pq])
|
|
995
|
+
# P value upper bound -> 1
|
|
988
996
|
df.loc[df[pq]>1,pq] = 1
|
|
989
997
|
|
|
990
998
|
df.loc[df[pq]<q_level,"Edge_color"]="black"
|
|
@@ -3,12 +3,12 @@ import matplotlib.pyplot as plt
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import seaborn as sns
|
|
5
5
|
from gwaslab.g_Log import Log
|
|
6
|
-
from gwaslab.viz_aux_quickfix import _quick_assign_i_with_rank
|
|
7
|
-
from gwaslab.viz_plot_mqqplot import _process_xtick
|
|
8
|
-
from gwaslab.viz_plot_mqqplot import _process_xlabel
|
|
9
|
-
from gwaslab.bd_common_data import get_number_to_chr
|
|
10
|
-
from gwaslab.util_in_filter_value import _filter_region
|
|
11
|
-
from gwaslab.io_process_args import _extract_kwargs
|
|
6
|
+
from gwaslab.viz.viz_aux_quickfix import _quick_assign_i_with_rank
|
|
7
|
+
from gwaslab.viz.viz_plot_mqqplot import _process_xtick
|
|
8
|
+
from gwaslab.viz.viz_plot_mqqplot import _process_xlabel
|
|
9
|
+
from gwaslab.bd.bd_common_data import get_number_to_chr
|
|
10
|
+
from gwaslab.util.util_in_filter_value import _filter_region
|
|
11
|
+
from gwaslab.io.io_process_args import _extract_kwargs
|
|
12
12
|
import copy
|
|
13
13
|
|
|
14
14
|
def _plot_cs(pipcs_raw,
|