gwaslab 3.5.6__py3-none-any.whl → 3.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/__init__.py +2 -0
- gwaslab/bd_common_data.py +1 -0
- gwaslab/bd_get_hapmap3.py +0 -1
- gwaslab/data/formatbook.json +78 -0
- gwaslab/g_Sumstats.py +98 -24
- gwaslab/g_SumstatsMulti.py +287 -0
- gwaslab/g_SumstatsPair.py +101 -16
- gwaslab/g_Sumstats_polars.py +245 -0
- gwaslab/g_headers.py +12 -3
- gwaslab/g_meta.py +123 -47
- gwaslab/g_meta_update.py +48 -0
- gwaslab/g_vchange_status_polars.py +44 -0
- gwaslab/g_version.py +2 -2
- gwaslab/hm_casting.py +169 -110
- gwaslab/hm_casting_polars.py +202 -0
- gwaslab/hm_harmonize_sumstats.py +19 -8
- gwaslab/io_load_ld.py +529 -0
- gwaslab/io_preformat_input.py +11 -0
- gwaslab/io_preformat_input_polars.py +632 -0
- gwaslab/io_process_args.py +25 -1
- gwaslab/io_read_ldsc.py +34 -3
- gwaslab/io_read_pipcs.py +62 -6
- gwaslab/prscs_gigrnd.py +122 -0
- gwaslab/prscs_mcmc_gtb.py +136 -0
- gwaslab/prscs_parse_genet.py +98 -0
- gwaslab/qc_build.py +53 -0
- gwaslab/qc_check_datatype.py +10 -8
- gwaslab/qc_check_datatype_polars.py +128 -0
- gwaslab/qc_fix_sumstats.py +25 -23
- gwaslab/qc_fix_sumstats_polars.py +193 -0
- gwaslab/util_ex_calculate_ldmatrix.py +49 -19
- gwaslab/util_ex_gwascatalog.py +71 -28
- gwaslab/util_ex_ldsc.py +67 -21
- gwaslab/util_ex_match_ldmatrix.py +396 -0
- gwaslab/util_ex_run_2samplemr.py +0 -2
- gwaslab/util_ex_run_ccgwas.py +155 -0
- gwaslab/util_ex_run_coloc.py +1 -1
- gwaslab/util_ex_run_hyprcoloc.py +117 -0
- gwaslab/util_ex_run_mesusie.py +155 -0
- gwaslab/util_ex_run_mtag.py +92 -0
- gwaslab/util_ex_run_prscs.py +85 -0
- gwaslab/util_ex_run_susie.py +40 -9
- gwaslab/util_in_estimate_ess.py +18 -0
- gwaslab/util_in_fill_data.py +20 -1
- gwaslab/util_in_filter_value.py +10 -5
- gwaslab/util_in_get_sig.py +71 -13
- gwaslab/util_in_meta.py +168 -4
- gwaslab/util_in_meta_polars.py +174 -0
- gwaslab/viz_plot_compare_effect.py +87 -23
- gwaslab/viz_plot_credible_sets.py +55 -11
- gwaslab/viz_plot_effect.py +22 -12
- gwaslab/viz_plot_miamiplot2.py +3 -2
- gwaslab/viz_plot_mqqplot.py +165 -141
- gwaslab/viz_plot_qqplot.py +6 -6
- gwaslab/viz_plot_regional2.py +5 -13
- gwaslab/viz_plot_rg_heatmap.py +6 -1
- gwaslab/viz_plot_stackedregional.py +21 -6
- {gwaslab-3.5.6.dist-info → gwaslab-3.5.8.dist-info}/METADATA +9 -7
- gwaslab-3.5.8.dist-info/RECORD +117 -0
- {gwaslab-3.5.6.dist-info → gwaslab-3.5.8.dist-info}/WHEEL +1 -1
- gwaslab-3.5.6.dist-info/RECORD +0 -96
- {gwaslab-3.5.6.dist-info → gwaslab-3.5.8.dist-info/licenses}/LICENSE +0 -0
- {gwaslab-3.5.6.dist-info → gwaslab-3.5.8.dist-info/licenses}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.5.6.dist-info → gwaslab-3.5.8.dist-info}/top_level.txt +0 -0
gwaslab/__init__.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from gwaslab.g_Sumstats import Sumstats
|
|
2
|
+
from gwaslab.g_Sumstats_polars import Sumstatsp
|
|
2
3
|
from gwaslab.g_SumstatsT import SumstatsT
|
|
3
4
|
from gwaslab.g_SumstatsPair import SumstatsPair
|
|
5
|
+
from gwaslab.g_SumstatsMulti import SumstatsMulti
|
|
4
6
|
from gwaslab.util_in_convert_h2 import h2_obs_to_liab
|
|
5
7
|
from gwaslab.util_in_convert_h2 import _get_per_snp_r2
|
|
6
8
|
from gwaslab.util_in_convert_h2 import h2_se_to_p
|
gwaslab/bd_common_data.py
CHANGED
|
@@ -337,6 +337,7 @@ def _maketrans(complement_mapping):
|
|
|
337
337
|
"""
|
|
338
338
|
keys = "".join(complement_mapping.keys()).encode("ASCII")
|
|
339
339
|
values = "".join(complement_mapping.values()).encode("ASCII")
|
|
340
|
+
|
|
340
341
|
return bytes.maketrans(keys + keys.lower(), values + values.lower())
|
|
341
342
|
|
|
342
343
|
####################################################################################################################
|
gwaslab/bd_get_hapmap3.py
CHANGED
|
@@ -43,7 +43,6 @@ def gethapmap3(sumstats,rsid="rsID",chrom="CHR", pos="POS", ea="EA", nea="NEA",b
|
|
|
43
43
|
else:
|
|
44
44
|
additional_cols=[]
|
|
45
45
|
hapmap3_ref = pd.read_csv(data_path,sep="\s+",usecols=["#CHROM","POS","rsid"]+additional_cols, dtype={"#CHROM":"string","POS":"string"})
|
|
46
|
-
|
|
47
46
|
#rsid A1 A2 #CHROM POS
|
|
48
47
|
#rs3094315 G A 1 752566
|
|
49
48
|
|
gwaslab/data/formatbook.json
CHANGED
|
@@ -237,6 +237,16 @@
|
|
|
237
237
|
"TotalSampleSize": "N",
|
|
238
238
|
"Nsample": "N",
|
|
239
239
|
"num_samples": "N",
|
|
240
|
+
"Neff": "N_EFF",
|
|
241
|
+
"N_EFF": "N_EFF",
|
|
242
|
+
"N_CASE": "N_CASE",
|
|
243
|
+
"Ncase": "N_CASE",
|
|
244
|
+
"ncase": "N_CASE",
|
|
245
|
+
"n_case": "N_CASE",
|
|
246
|
+
"Ncontrol": "N_CONTROL",
|
|
247
|
+
"N_control": "N_CONTROL",
|
|
248
|
+
"N_Control": "N_CONTROL",
|
|
249
|
+
"NCONTROL": "N_CONTROL",
|
|
240
250
|
"beta": "BETA",
|
|
241
251
|
"BETA": "BETA",
|
|
242
252
|
"Beta": "BETA",
|
|
@@ -461,6 +471,31 @@
|
|
|
461
471
|
"Direction": "DIRECTION"
|
|
462
472
|
}
|
|
463
473
|
},
|
|
474
|
+
"genomicsem": {
|
|
475
|
+
"meta_data": {
|
|
476
|
+
"format_name": "genomicSEM",
|
|
477
|
+
"format_source": "https://github.com/GenomicSEM/GenomicSEM/wiki/4.-Common-Factor-GWAS",
|
|
478
|
+
"format_source2": "https://github.com/GenomicSEM/GenomicSEM/wiki/5.-Multivariate-GWAS",
|
|
479
|
+
"format_version": 20241210
|
|
480
|
+
},
|
|
481
|
+
"format_dict": {
|
|
482
|
+
"SNP": "rsID",
|
|
483
|
+
"A2": "NEA",
|
|
484
|
+
"A1": "EA",
|
|
485
|
+
"Frq": "EAF",
|
|
486
|
+
"MAF": "MAF",
|
|
487
|
+
"N": "N",
|
|
488
|
+
"est": "BETA",
|
|
489
|
+
"se_c": "SE",
|
|
490
|
+
"Pval_Estimate": "P",
|
|
491
|
+
"Z_Estimate": "Z",
|
|
492
|
+
"Q": "Q",
|
|
493
|
+
"Q_df": "DOF",
|
|
494
|
+
"Q_pval": "P_HET",
|
|
495
|
+
"CHR": "CHR",
|
|
496
|
+
"BP": "POS"
|
|
497
|
+
}
|
|
498
|
+
},
|
|
464
499
|
"plink_fam": {
|
|
465
500
|
"meta_data": {
|
|
466
501
|
"format_name": "plink_fam",
|
|
@@ -880,6 +915,21 @@
|
|
|
880
915
|
"POS": "POS"
|
|
881
916
|
}
|
|
882
917
|
},
|
|
918
|
+
"mesusie": {
|
|
919
|
+
"meta_data": {
|
|
920
|
+
"format_name": "MESuSiE",
|
|
921
|
+
"format_source": "https://borangao.github.io/meSuSie_Analysis/installation.html",
|
|
922
|
+
"format_version": 20221109
|
|
923
|
+
},
|
|
924
|
+
"format_dict": {
|
|
925
|
+
"SNP": "SNPID",
|
|
926
|
+
"N": "N",
|
|
927
|
+
"Beta": "BETA",
|
|
928
|
+
"Se": "SE",
|
|
929
|
+
"Z": "Z",
|
|
930
|
+
"POS": "POS"
|
|
931
|
+
}
|
|
932
|
+
},
|
|
883
933
|
"plink2_linear": {
|
|
884
934
|
"meta_data": {
|
|
885
935
|
"format_name": "PLINK2 .glm.linear",
|
|
@@ -1032,6 +1082,34 @@
|
|
|
1032
1082
|
"ci_upper": "OR_95U"
|
|
1033
1083
|
}
|
|
1034
1084
|
},
|
|
1085
|
+
"ccgwas": {
|
|
1086
|
+
"meta_data": {
|
|
1087
|
+
"format_name": "CCGWAS",
|
|
1088
|
+
"format_source": "https://github.com/wouterpeyrot/CCGWAS",
|
|
1089
|
+
"format_version": "20220901",
|
|
1090
|
+
"last_check_date": "20250416",
|
|
1091
|
+
"format_separator": "\t",
|
|
1092
|
+
"format_citation": "Peyrot, W. J., & Price, A. L. (2021). Identifying loci with different allele frequencies among cases of eight psychiatric disorders using CC-GWAS. Nature genetics, 53(4), 445-454.",
|
|
1093
|
+
"format_comment": null,
|
|
1094
|
+
"format_na": null,
|
|
1095
|
+
"format_other_cols": [
|
|
1096
|
+
"Exact_beta",
|
|
1097
|
+
"Exact_se",
|
|
1098
|
+
"Exact_pval",
|
|
1099
|
+
"CCGWAS_signif"
|
|
1100
|
+
]
|
|
1101
|
+
},
|
|
1102
|
+
"format_dict": {
|
|
1103
|
+
"SNP": "SNPID",
|
|
1104
|
+
"CHR": "CHR",
|
|
1105
|
+
"BP": "POS",
|
|
1106
|
+
"A2": "NEA",
|
|
1107
|
+
"A1": "EA",
|
|
1108
|
+
"OLS_beta": "BETA",
|
|
1109
|
+
"OLS_se": "SE",
|
|
1110
|
+
"OLS_pval": "P"
|
|
1111
|
+
}
|
|
1112
|
+
},
|
|
1035
1113
|
"fastgwa": {
|
|
1036
1114
|
"meta_data": {
|
|
1037
1115
|
"format_name": "fastgwa",
|
gwaslab/g_Sumstats.py
CHANGED
|
@@ -39,6 +39,7 @@ from gwaslab.util_in_filter_value import filterregionout
|
|
|
39
39
|
from gwaslab.util_in_filter_value import _filter_indel
|
|
40
40
|
from gwaslab.util_in_filter_value import _filter_palindromic
|
|
41
41
|
from gwaslab.util_in_filter_value import _filter_snp
|
|
42
|
+
from gwaslab.util_in_filter_value import _filter_region
|
|
42
43
|
from gwaslab.util_in_filter_value import _exclude_hla
|
|
43
44
|
from gwaslab.util_in_filter_value import _search_variants
|
|
44
45
|
from gwaslab.util_in_filter_value import inferbuild
|
|
@@ -67,13 +68,16 @@ from gwaslab.g_version import _show_version
|
|
|
67
68
|
from gwaslab.g_version import gwaslab_info
|
|
68
69
|
from gwaslab.g_meta import _init_meta
|
|
69
70
|
from gwaslab.g_meta import _append_meta_record
|
|
71
|
+
from gwaslab.g_meta_update import _update_meta
|
|
70
72
|
from gwaslab.util_ex_run_clumping import _clump
|
|
71
73
|
from gwaslab.util_ex_calculate_ldmatrix import tofinemapping
|
|
74
|
+
from gwaslab.io_load_ld import tofinemapping_using_ld
|
|
72
75
|
from gwaslab.util_ex_calculate_prs import _calculate_prs
|
|
73
76
|
from gwaslab.viz_plot_mqqplot import mqqplot
|
|
74
77
|
from gwaslab.viz_plot_trumpetplot import plottrumpet
|
|
75
78
|
from gwaslab.viz_plot_compare_af import plotdaf
|
|
76
79
|
from gwaslab.util_ex_run_susie import _run_susie_rss
|
|
80
|
+
from gwaslab.util_ex_run_susie import _get_cs_lead
|
|
77
81
|
from gwaslab.qc_fix_sumstats import _check_data_consistency
|
|
78
82
|
from gwaslab.util_ex_ldsc import _estimate_h2_by_ldsc
|
|
79
83
|
from gwaslab.util_ex_ldsc import _estimate_rg_by_ldsc
|
|
@@ -84,9 +88,13 @@ from gwaslab.bd_get_hapmap3 import gethapmap3
|
|
|
84
88
|
from gwaslab.util_abf_finemapping import abf_finemapping
|
|
85
89
|
from gwaslab.util_abf_finemapping import make_cs
|
|
86
90
|
from gwaslab.io_read_pipcs import _read_pipcs
|
|
91
|
+
from gwaslab.util_in_estimate_ess import _get_ess
|
|
87
92
|
from gwaslab.viz_plot_credible_sets import _plot_cs
|
|
93
|
+
from gwaslab.hm_casting import _align_with_mold
|
|
94
|
+
from gwaslab.hm_casting import _merge_mold_with_sumstats_by_chrpos
|
|
88
95
|
import gc
|
|
89
96
|
from gwaslab.viz_plot_phe_heatmap import _gwheatmap
|
|
97
|
+
from gwaslab.util_ex_run_prscs import _run_prscs
|
|
90
98
|
|
|
91
99
|
#20220309
|
|
92
100
|
class Sumstats():
|
|
@@ -113,6 +121,7 @@ class Sumstats():
|
|
|
113
121
|
f=None,
|
|
114
122
|
t=None,
|
|
115
123
|
p=None,
|
|
124
|
+
q=None,
|
|
116
125
|
mlog10p=None,
|
|
117
126
|
test=None,
|
|
118
127
|
info=None,
|
|
@@ -126,6 +135,7 @@ class Sumstats():
|
|
|
126
135
|
HR_95U=None,
|
|
127
136
|
ncase=None,
|
|
128
137
|
ncontrol=None,
|
|
138
|
+
neff=None,
|
|
129
139
|
i2=None,
|
|
130
140
|
phet=None,
|
|
131
141
|
dof=None,
|
|
@@ -149,7 +159,7 @@ class Sumstats():
|
|
|
149
159
|
self.log = Log()
|
|
150
160
|
self.ldsc_h2 = None
|
|
151
161
|
self.ldsc_h2_results = None
|
|
152
|
-
self.ldsc_rg =
|
|
162
|
+
self.ldsc_rg = pd.DataFrame()
|
|
153
163
|
self.ldsc_h2_cts = None
|
|
154
164
|
self.ldsc_partitioned_h2_summary = None
|
|
155
165
|
self.ldsc_partitioned_h2_results = None
|
|
@@ -200,6 +210,7 @@ class Sumstats():
|
|
|
200
210
|
f=f,
|
|
201
211
|
t=t,
|
|
202
212
|
p=p,
|
|
213
|
+
q=q,
|
|
203
214
|
mlog10p=mlog10p,
|
|
204
215
|
test=test,
|
|
205
216
|
info=info,
|
|
@@ -217,6 +228,7 @@ class Sumstats():
|
|
|
217
228
|
snpr2=snpr2,
|
|
218
229
|
ncase=ncase,
|
|
219
230
|
ncontrol=ncontrol,
|
|
231
|
+
neff=neff,
|
|
220
232
|
direction=direction,
|
|
221
233
|
study=study,
|
|
222
234
|
build=build,
|
|
@@ -243,18 +255,8 @@ class Sumstats():
|
|
|
243
255
|
gc.collect()
|
|
244
256
|
|
|
245
257
|
#### healper #################################################################################
|
|
246
|
-
def update_meta(self):
|
|
247
|
-
self.meta
|
|
248
|
-
if "CHR" in self.data.columns:
|
|
249
|
-
self.meta["gwaslab"]["variants"]["number_of_chromosomes"]=len(self.data["CHR"].unique())
|
|
250
|
-
if "P" in self.data.columns:
|
|
251
|
-
self.meta["gwaslab"]["variants"]["min_P"]=np.min(self.data["P"])
|
|
252
|
-
if "EAF" in self.data.columns:
|
|
253
|
-
self.meta["gwaslab"]["variants"]["min_minor_allele_freq"]=min (np.min(self.data["EAF"]) , 1- np.max(self.data["EAF"]))
|
|
254
|
-
if "N" in self.data.columns:
|
|
255
|
-
self.meta["gwaslab"]["samples"]["sample_size"] = int(self.data["N"].max())
|
|
256
|
-
self.meta["gwaslab"]["samples"]["sample_size_median"] = self.data["N"].median()
|
|
257
|
-
self.meta["gwaslab"]["samples"]["sample_size_min"] = int(self.data["N"].min())
|
|
258
|
+
def update_meta(self, **kwargs):
|
|
259
|
+
self.meta = _update_meta(self.meta, self.data,log = self.log, **kwargs)
|
|
258
260
|
|
|
259
261
|
def summary(self):
|
|
260
262
|
return summarize(self.data)
|
|
@@ -363,7 +365,8 @@ class Sumstats():
|
|
|
363
365
|
self.data = parallelnormalizeallele(self.data,log=self.log,n_cores=n_cores,**normalizeallele_args)
|
|
364
366
|
|
|
365
367
|
self.data = sortcolumn(self.data,log=self.log)
|
|
366
|
-
|
|
368
|
+
|
|
369
|
+
self.data = sortcoordinate(self.data,log=self.log)
|
|
367
370
|
gc.collect()
|
|
368
371
|
|
|
369
372
|
#####################################################
|
|
@@ -380,6 +383,7 @@ class Sumstats():
|
|
|
380
383
|
# 3.2 infer strand for palindromic SNP (target build)
|
|
381
384
|
#####################################################
|
|
382
385
|
if ref_seq is not None:
|
|
386
|
+
|
|
383
387
|
if ref_seq_mode=="v":
|
|
384
388
|
self.data = checkref(self.data,ref_seq,log=self.log,**checkref_args)
|
|
385
389
|
elif ref_seq_mode=="s":
|
|
@@ -441,6 +445,20 @@ class Sumstats():
|
|
|
441
445
|
self.meta["is_sorted"] = True
|
|
442
446
|
self.meta["is_harmonised"] = True
|
|
443
447
|
return self
|
|
448
|
+
|
|
449
|
+
def align_with_template(self, template, **kwargs):
|
|
450
|
+
## merge
|
|
451
|
+
molded_sumstats, sumstats1 = _merge_mold_with_sumstats_by_chrpos(mold=template,
|
|
452
|
+
sumstats=self.data,
|
|
453
|
+
log=self.log,
|
|
454
|
+
suffixes=("_MOLD",""),
|
|
455
|
+
return_not_matched_mold = True)
|
|
456
|
+
## align
|
|
457
|
+
aligned_data = _align_with_mold(molded_sumstats)
|
|
458
|
+
|
|
459
|
+
## flip
|
|
460
|
+
self.data =flipallelestats(aligned_data, log=self.log)
|
|
461
|
+
|
|
444
462
|
############################################################################################################
|
|
445
463
|
#customizable API to build your own QC pipeline
|
|
446
464
|
def fix_id(self,**kwargs):
|
|
@@ -507,6 +525,15 @@ class Sumstats():
|
|
|
507
525
|
|
|
508
526
|
# utilities ############################################################################################################
|
|
509
527
|
# filter series ######################################################################
|
|
528
|
+
|
|
529
|
+
def filter_region(self, inplace=False,**kwargs):
|
|
530
|
+
if inplace is False:
|
|
531
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
532
|
+
new_Sumstats_object.data = _filter_region(new_Sumstats_object.data, **kwargs)
|
|
533
|
+
return new_Sumstats_object
|
|
534
|
+
else:
|
|
535
|
+
self.data = _filter_region(self.data, **kwargs)
|
|
536
|
+
|
|
510
537
|
def filter_flanking(self, inplace=False,**kwargs):
|
|
511
538
|
if inplace is False:
|
|
512
539
|
new_Sumstats_object = copy.deepcopy(self)
|
|
@@ -749,6 +776,7 @@ class Sumstats():
|
|
|
749
776
|
chrom="CHR",
|
|
750
777
|
pos="POS",
|
|
751
778
|
p="P",
|
|
779
|
+
build=self.meta["gwaslab"]["genome_build"],
|
|
752
780
|
log=self.log,
|
|
753
781
|
**kwargs)
|
|
754
782
|
# return sumstats object
|
|
@@ -789,7 +817,22 @@ class Sumstats():
|
|
|
789
817
|
**kwargs)
|
|
790
818
|
# return sumstats object
|
|
791
819
|
return output
|
|
792
|
-
|
|
820
|
+
|
|
821
|
+
def check_cs_overlap(self, **kwargs):
|
|
822
|
+
if "SNPID" in self.pipcs.columns:
|
|
823
|
+
id_to_use = "SNPID"
|
|
824
|
+
else:
|
|
825
|
+
id_to_use = "rsID"
|
|
826
|
+
output = _check_novel_set(self.pipcs,
|
|
827
|
+
id=id_to_use,
|
|
828
|
+
chrom="CHR",
|
|
829
|
+
pos="POS",
|
|
830
|
+
p="P",
|
|
831
|
+
log=self.log,
|
|
832
|
+
**kwargs)
|
|
833
|
+
# return sumstats object
|
|
834
|
+
return output
|
|
835
|
+
|
|
793
836
|
def anno_gene(self, **kwargs):
|
|
794
837
|
if "SNPID" in self.data.columns:
|
|
795
838
|
id_to_use = "SNPID"
|
|
@@ -806,6 +849,9 @@ class Sumstats():
|
|
|
806
849
|
def get_per_snp_r2(self,**kwargs):
|
|
807
850
|
self.data = _get_per_snp_r2(self.data, beta="BETA", af="EAF", n="N", log=self.log, **kwargs)
|
|
808
851
|
#add data inplace
|
|
852
|
+
|
|
853
|
+
def get_ess(self, **kwargs):
|
|
854
|
+
self.data = _get_ess(self.data, log=self.log, **kwargs)
|
|
809
855
|
|
|
810
856
|
def get_gc(self, mode=None, **kwargs):
|
|
811
857
|
if mode is None:
|
|
@@ -831,40 +877,67 @@ class Sumstats():
|
|
|
831
877
|
credible_sets = make_cs(region_data,threshold=0.95,log=self.log)
|
|
832
878
|
return region_data, credible_sets
|
|
833
879
|
|
|
880
|
+
######################################################################################################
|
|
881
|
+
def run_prscs(self, build=None, verbose=True, match_allele=True, how="inner", **kwargs):
|
|
882
|
+
if build is None:
|
|
883
|
+
build = self.meta["gwaslab"]["genome_build"]
|
|
884
|
+
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
|
|
885
|
+
_run_prscs(sst_file = insumstats[["rsID","CHR","POS","EA","NEA","BETA","SE"]], log=self.log, **kwargs)
|
|
834
886
|
|
|
835
887
|
## LDSC ##############################################################################################
|
|
836
888
|
def estimate_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, how="right", **kwargs):
|
|
837
889
|
if build is None:
|
|
838
890
|
build = self.meta["gwaslab"]["genome_build"]
|
|
839
891
|
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
|
|
840
|
-
self.ldsc_h2, self.ldsc_h2_results = _estimate_h2_by_ldsc(insumstats=insumstats,
|
|
892
|
+
self.ldsc_h2, self.ldsc_h2_results = _estimate_h2_by_ldsc(insumstats=insumstats,
|
|
893
|
+
meta=self.meta,
|
|
894
|
+
log=self.log,
|
|
895
|
+
verbose=verbose,
|
|
896
|
+
**kwargs)
|
|
841
897
|
|
|
842
898
|
def estimate_rg_by_ldsc(self, build=None, verbose=True, match_allele=True, how="right",**kwargs):
|
|
843
899
|
if build is None:
|
|
844
900
|
build = self.meta["gwaslab"]["genome_build"]
|
|
845
901
|
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
|
|
846
|
-
|
|
902
|
+
ldsc_rg = _estimate_rg_by_ldsc(insumstats=insumstats,
|
|
903
|
+
meta=self.meta,
|
|
904
|
+
log=self.log,
|
|
905
|
+
verbose=verbose,
|
|
906
|
+
**kwargs)
|
|
907
|
+
self.ldsc_rg = pd.concat([self.ldsc_rg, ldsc_rg],ignore_index=True)
|
|
847
908
|
|
|
848
909
|
def estimate_h2_cts_by_ldsc(self, build=None, verbose=True, match_allele=True, how="right",**kwargs):
|
|
849
910
|
if build is None:
|
|
850
911
|
build = self.meta["gwaslab"]["genome_build"]
|
|
851
912
|
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
|
|
852
|
-
self.ldsc_h2_cts = _estimate_h2_cts_by_ldsc(insumstats=insumstats,
|
|
913
|
+
self.ldsc_h2_cts = _estimate_h2_cts_by_ldsc(insumstats=insumstats,
|
|
914
|
+
log=self.log,
|
|
915
|
+
verbose=verbose,
|
|
916
|
+
**kwargs)
|
|
853
917
|
|
|
854
918
|
def estimate_partitioned_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, how="right",**kwargs):
|
|
855
919
|
if build is None:
|
|
856
920
|
build = self.meta["gwaslab"]["genome_build"]
|
|
857
921
|
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=match_allele, how=how )
|
|
858
|
-
self.ldsc_partitioned_h2_summary, self.ldsc_partitioned_h2_results = _estimate_partitioned_h2_by_ldsc(insumstats=insumstats,
|
|
922
|
+
self.ldsc_partitioned_h2_summary, self.ldsc_partitioned_h2_results = _estimate_partitioned_h2_by_ldsc(insumstats=insumstats,
|
|
923
|
+
meta=self.meta,
|
|
924
|
+
log=self.log,
|
|
925
|
+
verbose=verbose,
|
|
926
|
+
**kwargs)
|
|
859
927
|
# external ################################################################################################
|
|
860
928
|
|
|
861
929
|
def calculate_ld_matrix(self,**kwargs):
|
|
862
930
|
self.finemapping["path"],self.finemapping["file"],self.finemapping["plink_log"]= tofinemapping(self.data,study = self.meta["gwaslab"]["study_name"],**kwargs)
|
|
863
931
|
#self.to_finemapping_file_path, self.to_finemapping_file, self.plink_log = tofinemapping(self.data,study = self.meta["gwaslab"]["study_name"],**kwargs)
|
|
864
|
-
|
|
932
|
+
def extract_ld_matrix(self,**kwargs):
|
|
933
|
+
self.finemapping["path"],self.finemapping["file"],self.finemapping["plink_log"]= tofinemapping_using_ld(self.data,study = self.meta["gwaslab"]["study_name"],**kwargs)
|
|
934
|
+
|
|
865
935
|
def run_susie_rss(self,**kwargs):
|
|
866
|
-
self.pipcs=_run_susie_rss(self.finemapping["path"]
|
|
936
|
+
self.pipcs=_run_susie_rss(self.finemapping["path"], main_sumstats = self.data[["SNPID","CHR","POS"]], **kwargs)
|
|
937
|
+
self.finemapping["pipcs"] = self.pipcs
|
|
867
938
|
#self.pipcs=_run_susie_rss(self.to_finemapping_file_path,**kwargs)
|
|
939
|
+
def get_cs_lead(self,**kwargs):
|
|
940
|
+
return _get_cs_lead(self.pipcs,**kwargs)
|
|
868
941
|
|
|
869
942
|
def clump(self,**kwargs):
|
|
870
943
|
self.clumps["clumps"], self.clumps["clumps_raw"], self.clumps["plink_log"] = _clump(self.data, log=self.log, study = self.meta["gwaslab"]["study_name"], **kwargs)
|
|
@@ -875,10 +948,11 @@ class Sumstats():
|
|
|
875
948
|
|
|
876
949
|
# loading aux data
|
|
877
950
|
def read_pipcs(self,prefix,**kwargs):
|
|
878
|
-
self.pipcs = _read_pipcs(self.data[["SNPID","CHR","POS"]],prefix, **kwargs)
|
|
951
|
+
self.pipcs = _read_pipcs(self.data[["SNPID","CHR","POS"]],prefix, study= self.meta["gwaslab"]["study_name"], **kwargs)
|
|
952
|
+
self.finemapping["pipcs"] = self.pipcs
|
|
879
953
|
|
|
880
|
-
def plot_pipcs(self, region
|
|
881
|
-
_plot_cs(self.pipcs, region, **kwargs)
|
|
954
|
+
def plot_pipcs(self, region=None, locus=None, **kwargs):
|
|
955
|
+
_plot_cs(self.pipcs, region=region,locus=locus, **kwargs)
|
|
882
956
|
# to_format ###############################################################################################
|
|
883
957
|
|
|
884
958
|
def to_format(self, path, build=None, verbose=True, **kwargs):
|