gwaslab 3.5.7__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/__init__.py +2 -0
- gwaslab/bd_common_data.py +1 -0
- gwaslab/bd_get_hapmap3.py +0 -1
- gwaslab/data/formatbook.json +78 -0
- gwaslab/data/reference.json +3 -1
- gwaslab/g_Sumstats.py +110 -25
- gwaslab/g_SumstatsMulti.py +287 -0
- gwaslab/g_SumstatsPair.py +101 -16
- gwaslab/g_Sumstats_polars.py +245 -0
- gwaslab/g_headers.py +12 -3
- gwaslab/g_meta.py +124 -47
- gwaslab/g_meta_update.py +48 -0
- gwaslab/g_vchange_status_polars.py +44 -0
- gwaslab/g_version.py +2 -2
- gwaslab/hm_casting.py +169 -110
- gwaslab/hm_casting_polars.py +202 -0
- gwaslab/hm_harmonize_sumstats.py +19 -8
- gwaslab/io_load_ld.py +529 -0
- gwaslab/io_preformat_input.py +11 -0
- gwaslab/io_preformat_input_polars.py +632 -0
- gwaslab/io_process_args.py +25 -1
- gwaslab/io_read_ldsc.py +34 -3
- gwaslab/io_read_pipcs.py +62 -6
- gwaslab/prscs_gigrnd.py +122 -0
- gwaslab/prscs_mcmc_gtb.py +136 -0
- gwaslab/prscs_parse_genet.py +98 -0
- gwaslab/qc_build.py +53 -0
- gwaslab/qc_check_datatype.py +10 -8
- gwaslab/qc_check_datatype_polars.py +128 -0
- gwaslab/qc_fix_sumstats.py +25 -23
- gwaslab/qc_fix_sumstats_polars.py +193 -0
- gwaslab/util_ex_calculate_ldmatrix.py +49 -19
- gwaslab/util_ex_gwascatalog.py +71 -28
- gwaslab/util_ex_infer_ancestry.py +65 -0
- gwaslab/util_ex_ldsc.py +67 -21
- gwaslab/util_ex_match_ldmatrix.py +396 -0
- gwaslab/util_ex_run_2samplemr.py +0 -2
- gwaslab/util_ex_run_ccgwas.py +155 -0
- gwaslab/util_ex_run_coloc.py +1 -1
- gwaslab/util_ex_run_hyprcoloc.py +117 -0
- gwaslab/util_ex_run_magma.py +74 -0
- gwaslab/util_ex_run_mesusie.py +155 -0
- gwaslab/util_ex_run_mtag.py +92 -0
- gwaslab/util_ex_run_prscs.py +85 -0
- gwaslab/util_ex_run_susie.py +40 -9
- gwaslab/util_in_estimate_ess.py +18 -0
- gwaslab/util_in_fill_data.py +20 -1
- gwaslab/util_in_filter_value.py +10 -5
- gwaslab/util_in_get_sig.py +71 -13
- gwaslab/util_in_meta.py +168 -4
- gwaslab/util_in_meta_polars.py +174 -0
- gwaslab/viz_aux_annotate_plot.py +13 -2
- gwaslab/viz_plot_compare_effect.py +87 -23
- gwaslab/viz_plot_credible_sets.py +55 -11
- gwaslab/viz_plot_effect.py +22 -12
- gwaslab/viz_plot_miamiplot2.py +3 -2
- gwaslab/viz_plot_mqqplot.py +94 -84
- gwaslab/viz_plot_qqplot.py +9 -7
- gwaslab/viz_plot_regional2.py +2 -1
- gwaslab/viz_plot_stackedregional.py +4 -1
- {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info}/METADATA +46 -68
- gwaslab-3.6.0.dist-info/RECORD +119 -0
- {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info}/WHEEL +1 -1
- gwaslab-3.5.7.dist-info/RECORD +0 -96
- {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info/licenses}/LICENSE +0 -0
- {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info/licenses}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.5.7.dist-info → gwaslab-3.6.0.dist-info}/top_level.txt +0 -0
gwaslab/__init__.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from gwaslab.g_Sumstats import Sumstats
|
|
2
|
+
from gwaslab.g_Sumstats_polars import Sumstatsp
|
|
2
3
|
from gwaslab.g_SumstatsT import SumstatsT
|
|
3
4
|
from gwaslab.g_SumstatsPair import SumstatsPair
|
|
5
|
+
from gwaslab.g_SumstatsMulti import SumstatsMulti
|
|
4
6
|
from gwaslab.util_in_convert_h2 import h2_obs_to_liab
|
|
5
7
|
from gwaslab.util_in_convert_h2 import _get_per_snp_r2
|
|
6
8
|
from gwaslab.util_in_convert_h2 import h2_se_to_p
|
gwaslab/bd_common_data.py
CHANGED
|
@@ -337,6 +337,7 @@ def _maketrans(complement_mapping):
|
|
|
337
337
|
"""
|
|
338
338
|
keys = "".join(complement_mapping.keys()).encode("ASCII")
|
|
339
339
|
values = "".join(complement_mapping.values()).encode("ASCII")
|
|
340
|
+
|
|
340
341
|
return bytes.maketrans(keys + keys.lower(), values + values.lower())
|
|
341
342
|
|
|
342
343
|
####################################################################################################################
|
gwaslab/bd_get_hapmap3.py
CHANGED
|
@@ -43,7 +43,6 @@ def gethapmap3(sumstats,rsid="rsID",chrom="CHR", pos="POS", ea="EA", nea="NEA",b
|
|
|
43
43
|
else:
|
|
44
44
|
additional_cols=[]
|
|
45
45
|
hapmap3_ref = pd.read_csv(data_path,sep="\s+",usecols=["#CHROM","POS","rsid"]+additional_cols, dtype={"#CHROM":"string","POS":"string"})
|
|
46
|
-
|
|
47
46
|
#rsid A1 A2 #CHROM POS
|
|
48
47
|
#rs3094315 G A 1 752566
|
|
49
48
|
|
gwaslab/data/formatbook.json
CHANGED
|
@@ -237,6 +237,16 @@
|
|
|
237
237
|
"TotalSampleSize": "N",
|
|
238
238
|
"Nsample": "N",
|
|
239
239
|
"num_samples": "N",
|
|
240
|
+
"Neff": "N_EFF",
|
|
241
|
+
"N_EFF": "N_EFF",
|
|
242
|
+
"N_CASE": "N_CASE",
|
|
243
|
+
"Ncase": "N_CASE",
|
|
244
|
+
"ncase": "N_CASE",
|
|
245
|
+
"n_case": "N_CASE",
|
|
246
|
+
"Ncontrol": "N_CONTROL",
|
|
247
|
+
"N_control": "N_CONTROL",
|
|
248
|
+
"N_Control": "N_CONTROL",
|
|
249
|
+
"NCONTROL": "N_CONTROL",
|
|
240
250
|
"beta": "BETA",
|
|
241
251
|
"BETA": "BETA",
|
|
242
252
|
"Beta": "BETA",
|
|
@@ -461,6 +471,31 @@
|
|
|
461
471
|
"Direction": "DIRECTION"
|
|
462
472
|
}
|
|
463
473
|
},
|
|
474
|
+
"genomicsem": {
|
|
475
|
+
"meta_data": {
|
|
476
|
+
"format_name": "genomicSEM",
|
|
477
|
+
"format_source": "https://github.com/GenomicSEM/GenomicSEM/wiki/4.-Common-Factor-GWAS",
|
|
478
|
+
"format_source2": "https://github.com/GenomicSEM/GenomicSEM/wiki/5.-Multivariate-GWAS",
|
|
479
|
+
"format_version": 20241210
|
|
480
|
+
},
|
|
481
|
+
"format_dict": {
|
|
482
|
+
"SNP": "rsID",
|
|
483
|
+
"A2": "NEA",
|
|
484
|
+
"A1": "EA",
|
|
485
|
+
"Frq": "EAF",
|
|
486
|
+
"MAF": "MAF",
|
|
487
|
+
"N": "N",
|
|
488
|
+
"est": "BETA",
|
|
489
|
+
"se_c": "SE",
|
|
490
|
+
"Pval_Estimate": "P",
|
|
491
|
+
"Z_Estimate": "Z",
|
|
492
|
+
"Q": "Q",
|
|
493
|
+
"Q_df": "DOF",
|
|
494
|
+
"Q_pval": "P_HET",
|
|
495
|
+
"CHR": "CHR",
|
|
496
|
+
"BP": "POS"
|
|
497
|
+
}
|
|
498
|
+
},
|
|
464
499
|
"plink_fam": {
|
|
465
500
|
"meta_data": {
|
|
466
501
|
"format_name": "plink_fam",
|
|
@@ -880,6 +915,21 @@
|
|
|
880
915
|
"POS": "POS"
|
|
881
916
|
}
|
|
882
917
|
},
|
|
918
|
+
"mesusie": {
|
|
919
|
+
"meta_data": {
|
|
920
|
+
"format_name": "MESuSiE",
|
|
921
|
+
"format_source": "https://borangao.github.io/meSuSie_Analysis/installation.html",
|
|
922
|
+
"format_version": 20221109
|
|
923
|
+
},
|
|
924
|
+
"format_dict": {
|
|
925
|
+
"SNP": "SNPID",
|
|
926
|
+
"N": "N",
|
|
927
|
+
"Beta": "BETA",
|
|
928
|
+
"Se": "SE",
|
|
929
|
+
"Z": "Z",
|
|
930
|
+
"POS": "POS"
|
|
931
|
+
}
|
|
932
|
+
},
|
|
883
933
|
"plink2_linear": {
|
|
884
934
|
"meta_data": {
|
|
885
935
|
"format_name": "PLINK2 .glm.linear",
|
|
@@ -1032,6 +1082,34 @@
|
|
|
1032
1082
|
"ci_upper": "OR_95U"
|
|
1033
1083
|
}
|
|
1034
1084
|
},
|
|
1085
|
+
"ccgwas": {
|
|
1086
|
+
"meta_data": {
|
|
1087
|
+
"format_name": "CCGWAS",
|
|
1088
|
+
"format_source": "https://github.com/wouterpeyrot/CCGWAS",
|
|
1089
|
+
"format_version": "20220901",
|
|
1090
|
+
"last_check_date": "20250416",
|
|
1091
|
+
"format_separator": "\t",
|
|
1092
|
+
"format_citation": "Peyrot, W. J., & Price, A. L. (2021). Identifying loci with different allele frequencies among cases of eight psychiatric disorders using CC-GWAS. Nature genetics, 53(4), 445-454.",
|
|
1093
|
+
"format_comment": null,
|
|
1094
|
+
"format_na": null,
|
|
1095
|
+
"format_other_cols": [
|
|
1096
|
+
"Exact_beta",
|
|
1097
|
+
"Exact_se",
|
|
1098
|
+
"Exact_pval",
|
|
1099
|
+
"CCGWAS_signif"
|
|
1100
|
+
]
|
|
1101
|
+
},
|
|
1102
|
+
"format_dict": {
|
|
1103
|
+
"SNP": "SNPID",
|
|
1104
|
+
"CHR": "CHR",
|
|
1105
|
+
"BP": "POS",
|
|
1106
|
+
"A2": "NEA",
|
|
1107
|
+
"A1": "EA",
|
|
1108
|
+
"OLS_beta": "BETA",
|
|
1109
|
+
"OLS_se": "SE",
|
|
1110
|
+
"OLS_pval": "P"
|
|
1111
|
+
}
|
|
1112
|
+
},
|
|
1035
1113
|
"fastgwa": {
|
|
1036
1114
|
"meta_data": {
|
|
1037
1115
|
"format_name": "fastgwa",
|
gwaslab/data/reference.json
CHANGED
|
@@ -103,7 +103,9 @@
|
|
|
103
103
|
"13to19":"https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/chm13v2-hg19.chain",
|
|
104
104
|
"13to38":"https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/chm13v2-grch38.chain",
|
|
105
105
|
"18to19":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz",
|
|
106
|
-
"18to38":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg38.over.chain.gz"
|
|
106
|
+
"18to38":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg38.over.chain.gz",
|
|
107
|
+
"1kg_hm3_hg38_eaf":"https://www.dropbox.com/scl/fi/ymkqfsaec6mwjzlvxsm45/PAN.hapmap3.hg38.EAF.tsv.gz?rlkey=p1auef5y1kk7ui41k6j3s8b0z&dl=1",
|
|
108
|
+
"1kg_hm3_hg19_eaf":"https://www.dropbox.com/scl/fi/dmv9wtfchv6ahim86d49r/PAN.hapmap3.hg19.EAF.tsv.gz?rlkey=ywne2gj1rlm2nj42q9lt2d99n&dl=1"
|
|
107
109
|
}
|
|
108
110
|
|
|
109
111
|
|
gwaslab/g_Sumstats.py
CHANGED
|
@@ -39,6 +39,7 @@ from gwaslab.util_in_filter_value import filterregionout
|
|
|
39
39
|
from gwaslab.util_in_filter_value import _filter_indel
|
|
40
40
|
from gwaslab.util_in_filter_value import _filter_palindromic
|
|
41
41
|
from gwaslab.util_in_filter_value import _filter_snp
|
|
42
|
+
from gwaslab.util_in_filter_value import _filter_region
|
|
42
43
|
from gwaslab.util_in_filter_value import _exclude_hla
|
|
43
44
|
from gwaslab.util_in_filter_value import _search_variants
|
|
44
45
|
from gwaslab.util_in_filter_value import inferbuild
|
|
@@ -67,26 +68,35 @@ from gwaslab.g_version import _show_version
|
|
|
67
68
|
from gwaslab.g_version import gwaslab_info
|
|
68
69
|
from gwaslab.g_meta import _init_meta
|
|
69
70
|
from gwaslab.g_meta import _append_meta_record
|
|
71
|
+
from gwaslab.g_meta_update import _update_meta
|
|
70
72
|
from gwaslab.util_ex_run_clumping import _clump
|
|
71
73
|
from gwaslab.util_ex_calculate_ldmatrix import tofinemapping
|
|
74
|
+
from gwaslab.io_load_ld import tofinemapping_using_ld
|
|
72
75
|
from gwaslab.util_ex_calculate_prs import _calculate_prs
|
|
73
76
|
from gwaslab.viz_plot_mqqplot import mqqplot
|
|
74
77
|
from gwaslab.viz_plot_trumpetplot import plottrumpet
|
|
75
78
|
from gwaslab.viz_plot_compare_af import plotdaf
|
|
76
79
|
from gwaslab.util_ex_run_susie import _run_susie_rss
|
|
80
|
+
from gwaslab.util_ex_run_susie import _get_cs_lead
|
|
77
81
|
from gwaslab.qc_fix_sumstats import _check_data_consistency
|
|
78
82
|
from gwaslab.util_ex_ldsc import _estimate_h2_by_ldsc
|
|
79
83
|
from gwaslab.util_ex_ldsc import _estimate_rg_by_ldsc
|
|
80
84
|
from gwaslab.util_ex_ldsc import _estimate_h2_cts_by_ldsc
|
|
81
85
|
from gwaslab.util_ex_ldsc import _estimate_partitioned_h2_by_ldsc
|
|
82
86
|
from gwaslab.util_ex_ldproxyfinder import _extract_ld_proxy
|
|
87
|
+
from gwaslab.util_ex_run_magma import _run_magma
|
|
88
|
+
from gwaslab.util_ex_infer_ancestry import _infer_ancestry
|
|
83
89
|
from gwaslab.bd_get_hapmap3 import gethapmap3
|
|
84
90
|
from gwaslab.util_abf_finemapping import abf_finemapping
|
|
85
91
|
from gwaslab.util_abf_finemapping import make_cs
|
|
86
92
|
from gwaslab.io_read_pipcs import _read_pipcs
|
|
93
|
+
from gwaslab.util_in_estimate_ess import _get_ess
|
|
87
94
|
from gwaslab.viz_plot_credible_sets import _plot_cs
|
|
95
|
+
from gwaslab.hm_casting import _align_with_mold
|
|
96
|
+
from gwaslab.hm_casting import _merge_mold_with_sumstats_by_chrpos
|
|
88
97
|
import gc
|
|
89
98
|
from gwaslab.viz_plot_phe_heatmap import _gwheatmap
|
|
99
|
+
from gwaslab.util_ex_run_prscs import _run_prscs
|
|
90
100
|
|
|
91
101
|
#20220309
|
|
92
102
|
class Sumstats():
|
|
@@ -113,6 +123,7 @@ class Sumstats():
|
|
|
113
123
|
f=None,
|
|
114
124
|
t=None,
|
|
115
125
|
p=None,
|
|
126
|
+
q=None,
|
|
116
127
|
mlog10p=None,
|
|
117
128
|
test=None,
|
|
118
129
|
info=None,
|
|
@@ -126,6 +137,7 @@ class Sumstats():
|
|
|
126
137
|
HR_95U=None,
|
|
127
138
|
ncase=None,
|
|
128
139
|
ncontrol=None,
|
|
140
|
+
neff=None,
|
|
129
141
|
i2=None,
|
|
130
142
|
phet=None,
|
|
131
143
|
dof=None,
|
|
@@ -149,7 +161,7 @@ class Sumstats():
|
|
|
149
161
|
self.log = Log()
|
|
150
162
|
self.ldsc_h2 = None
|
|
151
163
|
self.ldsc_h2_results = None
|
|
152
|
-
self.ldsc_rg =
|
|
164
|
+
self.ldsc_rg = pd.DataFrame()
|
|
153
165
|
self.ldsc_h2_cts = None
|
|
154
166
|
self.ldsc_partitioned_h2_summary = None
|
|
155
167
|
self.ldsc_partitioned_h2_results = None
|
|
@@ -200,6 +212,7 @@ class Sumstats():
|
|
|
200
212
|
f=f,
|
|
201
213
|
t=t,
|
|
202
214
|
p=p,
|
|
215
|
+
q=q,
|
|
203
216
|
mlog10p=mlog10p,
|
|
204
217
|
test=test,
|
|
205
218
|
info=info,
|
|
@@ -217,6 +230,7 @@ class Sumstats():
|
|
|
217
230
|
snpr2=snpr2,
|
|
218
231
|
ncase=ncase,
|
|
219
232
|
ncontrol=ncontrol,
|
|
233
|
+
neff=neff,
|
|
220
234
|
direction=direction,
|
|
221
235
|
study=study,
|
|
222
236
|
build=build,
|
|
@@ -243,18 +257,8 @@ class Sumstats():
|
|
|
243
257
|
gc.collect()
|
|
244
258
|
|
|
245
259
|
#### healper #################################################################################
|
|
246
|
-
def update_meta(self):
|
|
247
|
-
self.meta
|
|
248
|
-
if "CHR" in self.data.columns:
|
|
249
|
-
self.meta["gwaslab"]["variants"]["number_of_chromosomes"]=len(self.data["CHR"].unique())
|
|
250
|
-
if "P" in self.data.columns:
|
|
251
|
-
self.meta["gwaslab"]["variants"]["min_P"]=np.min(self.data["P"])
|
|
252
|
-
if "EAF" in self.data.columns:
|
|
253
|
-
self.meta["gwaslab"]["variants"]["min_minor_allele_freq"]=min (np.min(self.data["EAF"]) , 1- np.max(self.data["EAF"]))
|
|
254
|
-
if "N" in self.data.columns:
|
|
255
|
-
self.meta["gwaslab"]["samples"]["sample_size"] = int(self.data["N"].max())
|
|
256
|
-
self.meta["gwaslab"]["samples"]["sample_size_median"] = self.data["N"].median()
|
|
257
|
-
self.meta["gwaslab"]["samples"]["sample_size_min"] = int(self.data["N"].min())
|
|
260
|
+
def update_meta(self, **kwargs):
|
|
261
|
+
self.meta = _update_meta(self.meta, self.data,log = self.log, **kwargs)
|
|
258
262
|
|
|
259
263
|
def summary(self):
|
|
260
264
|
return summarize(self.data)
|
|
@@ -363,7 +367,8 @@ class Sumstats():
|
|
|
363
367
|
self.data = parallelnormalizeallele(self.data,log=self.log,n_cores=n_cores,**normalizeallele_args)
|
|
364
368
|
|
|
365
369
|
self.data = sortcolumn(self.data,log=self.log)
|
|
366
|
-
|
|
370
|
+
|
|
371
|
+
self.data = sortcoordinate(self.data,log=self.log)
|
|
367
372
|
gc.collect()
|
|
368
373
|
|
|
369
374
|
#####################################################
|
|
@@ -380,6 +385,7 @@ class Sumstats():
|
|
|
380
385
|
# 3.2 infer strand for palindromic SNP (target build)
|
|
381
386
|
#####################################################
|
|
382
387
|
if ref_seq is not None:
|
|
388
|
+
|
|
383
389
|
if ref_seq_mode=="v":
|
|
384
390
|
self.data = checkref(self.data,ref_seq,log=self.log,**checkref_args)
|
|
385
391
|
elif ref_seq_mode=="s":
|
|
@@ -441,6 +447,20 @@ class Sumstats():
|
|
|
441
447
|
self.meta["is_sorted"] = True
|
|
442
448
|
self.meta["is_harmonised"] = True
|
|
443
449
|
return self
|
|
450
|
+
|
|
451
|
+
def align_with_template(self, template, **kwargs):
|
|
452
|
+
## merge
|
|
453
|
+
molded_sumstats, sumstats1 = _merge_mold_with_sumstats_by_chrpos(mold=template,
|
|
454
|
+
sumstats=self.data,
|
|
455
|
+
log=self.log,
|
|
456
|
+
suffixes=("_MOLD",""),
|
|
457
|
+
return_not_matched_mold = True)
|
|
458
|
+
## align
|
|
459
|
+
aligned_data = _align_with_mold(molded_sumstats)
|
|
460
|
+
|
|
461
|
+
## flip
|
|
462
|
+
self.data =flipallelestats(aligned_data, log=self.log)
|
|
463
|
+
|
|
444
464
|
############################################################################################################
|
|
445
465
|
#customizable API to build your own QC pipeline
|
|
446
466
|
def fix_id(self,**kwargs):
|
|
@@ -507,6 +527,15 @@ class Sumstats():
|
|
|
507
527
|
|
|
508
528
|
# utilities ############################################################################################################
|
|
509
529
|
# filter series ######################################################################
|
|
530
|
+
|
|
531
|
+
def filter_region(self, inplace=False,**kwargs):
|
|
532
|
+
if inplace is False:
|
|
533
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
534
|
+
new_Sumstats_object.data = _filter_region(new_Sumstats_object.data, **kwargs)
|
|
535
|
+
return new_Sumstats_object
|
|
536
|
+
else:
|
|
537
|
+
self.data = _filter_region(self.data, **kwargs)
|
|
538
|
+
|
|
510
539
|
def filter_flanking(self, inplace=False,**kwargs):
|
|
511
540
|
if inplace is False:
|
|
512
541
|
new_Sumstats_object = copy.deepcopy(self)
|
|
@@ -647,6 +676,9 @@ class Sumstats():
|
|
|
647
676
|
fig,outliers = plotdaf(self.data, **kwargs)
|
|
648
677
|
return fig, outliers
|
|
649
678
|
|
|
679
|
+
def infer_ancestry(self, **kwargs):
|
|
680
|
+
self.meta["gwaslab"]["inferred_ancestry"] = _infer_ancestry(self.data, **kwargs)
|
|
681
|
+
|
|
650
682
|
def plot_gwheatmap(self, **kwargs):
|
|
651
683
|
fig = _gwheatmap(self.data, **kwargs)
|
|
652
684
|
return fig
|
|
@@ -749,6 +781,7 @@ class Sumstats():
|
|
|
749
781
|
chrom="CHR",
|
|
750
782
|
pos="POS",
|
|
751
783
|
p="P",
|
|
784
|
+
build=self.meta["gwaslab"]["genome_build"],
|
|
752
785
|
log=self.log,
|
|
753
786
|
**kwargs)
|
|
754
787
|
# return sumstats object
|
|
@@ -789,7 +822,22 @@ class Sumstats():
|
|
|
789
822
|
**kwargs)
|
|
790
823
|
# return sumstats object
|
|
791
824
|
return output
|
|
792
|
-
|
|
825
|
+
|
|
826
|
+
def check_cs_overlap(self, **kwargs):
|
|
827
|
+
if "SNPID" in self.pipcs.columns:
|
|
828
|
+
id_to_use = "SNPID"
|
|
829
|
+
else:
|
|
830
|
+
id_to_use = "rsID"
|
|
831
|
+
output = _check_novel_set(self.pipcs,
|
|
832
|
+
id=id_to_use,
|
|
833
|
+
chrom="CHR",
|
|
834
|
+
pos="POS",
|
|
835
|
+
p="P",
|
|
836
|
+
log=self.log,
|
|
837
|
+
**kwargs)
|
|
838
|
+
# return sumstats object
|
|
839
|
+
return output
|
|
840
|
+
|
|
793
841
|
def anno_gene(self, **kwargs):
|
|
794
842
|
if "SNPID" in self.data.columns:
|
|
795
843
|
id_to_use = "SNPID"
|
|
@@ -806,6 +854,9 @@ class Sumstats():
|
|
|
806
854
|
def get_per_snp_r2(self,**kwargs):
|
|
807
855
|
self.data = _get_per_snp_r2(self.data, beta="BETA", af="EAF", n="N", log=self.log, **kwargs)
|
|
808
856
|
#add data inplace
|
|
857
|
+
|
|
858
|
+
def get_ess(self, **kwargs):
|
|
859
|
+
self.data = _get_ess(self.data, log=self.log, **kwargs)
|
|
809
860
|
|
|
810
861
|
def get_gc(self, mode=None, **kwargs):
|
|
811
862
|
if mode is None:
|
|
@@ -831,40 +882,73 @@ class Sumstats():
|
|
|
831
882
|
credible_sets = make_cs(region_data,threshold=0.95,log=self.log)
|
|
832
883
|
return region_data, credible_sets
|
|
833
884
|
|
|
834
|
-
|
|
885
|
+
######################################################################################################
|
|
886
|
+
def run_prscs(self, build=None, verbose=True, match_allele=True, how="inner", **kwargs):
|
|
887
|
+
if build is None:
|
|
888
|
+
build = self.meta["gwaslab"]["genome_build"]
|
|
889
|
+
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
|
|
890
|
+
_run_prscs(sst_file = insumstats[["rsID","CHR","POS","EA","NEA","BETA","SE"]],
|
|
891
|
+
log=self.log,
|
|
892
|
+
**kwargs)
|
|
893
|
+
|
|
894
|
+
def run_magma(self, build=None, verbose=True, **kwargs):
|
|
895
|
+
_run_magma(self.data,
|
|
896
|
+
study=self.meta["gwaslab"]["study_name"],
|
|
897
|
+
build=build, verbose=verbose, log=self.log, **kwargs)
|
|
835
898
|
## LDSC ##############################################################################################
|
|
836
899
|
def estimate_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, how="right", **kwargs):
|
|
837
900
|
if build is None:
|
|
838
901
|
build = self.meta["gwaslab"]["genome_build"]
|
|
839
902
|
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
|
|
840
|
-
self.ldsc_h2, self.ldsc_h2_results = _estimate_h2_by_ldsc(insumstats=insumstats,
|
|
903
|
+
self.ldsc_h2, self.ldsc_h2_results = _estimate_h2_by_ldsc(insumstats=insumstats,
|
|
904
|
+
meta=self.meta,
|
|
905
|
+
log=self.log,
|
|
906
|
+
verbose=verbose,
|
|
907
|
+
**kwargs)
|
|
841
908
|
|
|
842
909
|
def estimate_rg_by_ldsc(self, build=None, verbose=True, match_allele=True, how="right",**kwargs):
|
|
843
910
|
if build is None:
|
|
844
911
|
build = self.meta["gwaslab"]["genome_build"]
|
|
845
912
|
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
|
|
846
|
-
|
|
913
|
+
ldsc_rg = _estimate_rg_by_ldsc(insumstats=insumstats,
|
|
914
|
+
meta=self.meta,
|
|
915
|
+
log=self.log,
|
|
916
|
+
verbose=verbose,
|
|
917
|
+
**kwargs)
|
|
918
|
+
self.ldsc_rg = pd.concat([self.ldsc_rg, ldsc_rg],ignore_index=True)
|
|
847
919
|
|
|
848
920
|
def estimate_h2_cts_by_ldsc(self, build=None, verbose=True, match_allele=True, how="right",**kwargs):
|
|
849
921
|
if build is None:
|
|
850
922
|
build = self.meta["gwaslab"]["genome_build"]
|
|
851
923
|
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
|
|
852
|
-
self.ldsc_h2_cts = _estimate_h2_cts_by_ldsc(insumstats=insumstats,
|
|
924
|
+
self.ldsc_h2_cts = _estimate_h2_cts_by_ldsc(insumstats=insumstats,
|
|
925
|
+
log=self.log,
|
|
926
|
+
verbose=verbose,
|
|
927
|
+
**kwargs)
|
|
853
928
|
|
|
854
929
|
def estimate_partitioned_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, how="right",**kwargs):
|
|
855
930
|
if build is None:
|
|
856
931
|
build = self.meta["gwaslab"]["genome_build"]
|
|
857
932
|
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
|
|
858
|
-
self.ldsc_partitioned_h2_summary, self.ldsc_partitioned_h2_results = _estimate_partitioned_h2_by_ldsc(insumstats=insumstats,
|
|
933
|
+
self.ldsc_partitioned_h2_summary, self.ldsc_partitioned_h2_results = _estimate_partitioned_h2_by_ldsc(insumstats=insumstats,
|
|
934
|
+
meta=self.meta,
|
|
935
|
+
log=self.log,
|
|
936
|
+
verbose=verbose,
|
|
937
|
+
**kwargs)
|
|
859
938
|
# external ################################################################################################
|
|
860
939
|
|
|
861
940
|
def calculate_ld_matrix(self,**kwargs):
|
|
862
941
|
self.finemapping["path"],self.finemapping["file"],self.finemapping["plink_log"]= tofinemapping(self.data,study = self.meta["gwaslab"]["study_name"],**kwargs)
|
|
863
942
|
#self.to_finemapping_file_path, self.to_finemapping_file, self.plink_log = tofinemapping(self.data,study = self.meta["gwaslab"]["study_name"],**kwargs)
|
|
864
|
-
|
|
943
|
+
def extract_ld_matrix(self,**kwargs):
|
|
944
|
+
self.finemapping["path"],self.finemapping["file"],self.finemapping["plink_log"]= tofinemapping_using_ld(self.data,study = self.meta["gwaslab"]["study_name"],**kwargs)
|
|
945
|
+
|
|
865
946
|
def run_susie_rss(self,**kwargs):
|
|
866
|
-
self.pipcs=_run_susie_rss(self.finemapping["path"]
|
|
947
|
+
self.pipcs=_run_susie_rss(self.finemapping["path"], main_sumstats = self.data[["SNPID","CHR","POS"]], **kwargs)
|
|
948
|
+
self.finemapping["pipcs"] = self.pipcs
|
|
867
949
|
#self.pipcs=_run_susie_rss(self.to_finemapping_file_path,**kwargs)
|
|
950
|
+
def get_cs_lead(self,**kwargs):
|
|
951
|
+
return _get_cs_lead(self.pipcs,**kwargs)
|
|
868
952
|
|
|
869
953
|
def clump(self,**kwargs):
|
|
870
954
|
self.clumps["clumps"], self.clumps["clumps_raw"], self.clumps["plink_log"] = _clump(self.data, log=self.log, study = self.meta["gwaslab"]["study_name"], **kwargs)
|
|
@@ -875,10 +959,11 @@ class Sumstats():
|
|
|
875
959
|
|
|
876
960
|
# loading aux data
|
|
877
961
|
def read_pipcs(self,prefix,**kwargs):
|
|
878
|
-
self.pipcs = _read_pipcs(self.data[["SNPID","CHR","POS"]],prefix, **kwargs)
|
|
962
|
+
self.pipcs = _read_pipcs(self.data[["SNPID","CHR","POS"]],prefix, study= self.meta["gwaslab"]["study_name"], **kwargs)
|
|
963
|
+
self.finemapping["pipcs"] = self.pipcs
|
|
879
964
|
|
|
880
|
-
def plot_pipcs(self, region
|
|
881
|
-
_plot_cs(self.pipcs, region, **kwargs)
|
|
965
|
+
def plot_pipcs(self, region=None, locus=None, **kwargs):
|
|
966
|
+
_plot_cs(self.pipcs, region=region,locus=locus, **kwargs)
|
|
882
967
|
# to_format ###############################################################################################
|
|
883
968
|
|
|
884
969
|
def to_format(self, path, build=None, verbose=True, **kwargs):
|