gwaslab 3.5.5__py3-none-any.whl → 3.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/__init__.py +2 -1
- gwaslab/g_Sumstats.py +27 -1
- gwaslab/g_SumstatsSet.py +663 -0
- gwaslab/g_version.py +2 -2
- gwaslab/hm_harmonize_sumstats.py +91 -1
- gwaslab/qc_fix_sumstats.py +1 -1
- gwaslab/util_ex_ldproxyfinder.py +162 -3
- gwaslab/util_in_fill_data.py +19 -2
- gwaslab/util_in_filter_value.py +52 -1
- gwaslab/util_in_merge.py +51 -0
- gwaslab/viz_aux_save_figure.py +2 -1
- gwaslab/viz_plot_effect.py +283 -0
- gwaslab/viz_plot_miamiplot2.py +1 -1
- gwaslab/viz_plot_mqqplot.py +17 -0
- gwaslab/viz_plot_regional2.py +133 -32
- gwaslab/viz_plot_stackedregional.py +0 -1
- {gwaslab-3.5.5.dist-info → gwaslab-3.5.6.dist-info}/METADATA +2 -2
- {gwaslab-3.5.5.dist-info → gwaslab-3.5.6.dist-info}/RECORD +22 -19
- {gwaslab-3.5.5.dist-info → gwaslab-3.5.6.dist-info}/WHEEL +1 -1
- {gwaslab-3.5.5.dist-info → gwaslab-3.5.6.dist-info}/LICENSE +0 -0
- {gwaslab-3.5.5.dist-info → gwaslab-3.5.6.dist-info}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.5.5.dist-info → gwaslab-3.5.6.dist-info}/top_level.txt +0 -0
gwaslab/g_SumstatsSet.py
ADDED
|
@@ -0,0 +1,663 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
import time
|
|
4
|
+
import re
|
|
5
|
+
import copy
|
|
6
|
+
from gwaslab.g_Sumstats_summary import summarize
|
|
7
|
+
from gwaslab.g_Sumstats_summary import lookupstatus
|
|
8
|
+
from gwaslab.io_preformat_input import preformat
|
|
9
|
+
from gwaslab.io_to_formats import _to_format
|
|
10
|
+
from gwaslab.g_Log import Log
|
|
11
|
+
from gwaslab.qc_fix_sumstats import fixID
|
|
12
|
+
from gwaslab.qc_fix_sumstats import flipSNPID
|
|
13
|
+
from gwaslab.qc_fix_sumstats import stripSNPID
|
|
14
|
+
from gwaslab.qc_fix_sumstats import removedup
|
|
15
|
+
from gwaslab.qc_fix_sumstats import fixchr
|
|
16
|
+
from gwaslab.qc_fix_sumstats import fixpos
|
|
17
|
+
from gwaslab.qc_fix_sumstats import fixallele
|
|
18
|
+
from gwaslab.qc_fix_sumstats import parallelnormalizeallele
|
|
19
|
+
from gwaslab.qc_fix_sumstats import sanitycheckstats
|
|
20
|
+
from gwaslab.qc_fix_sumstats import parallelizeliftovervariant
|
|
21
|
+
from gwaslab.qc_fix_sumstats import flipallelestats
|
|
22
|
+
from gwaslab.qc_fix_sumstats import sortcoordinate
|
|
23
|
+
from gwaslab.qc_fix_sumstats import sortcolumn
|
|
24
|
+
from gwaslab.qc_fix_sumstats import _set_build
|
|
25
|
+
from gwaslab.qc_fix_sumstats import _process_build
|
|
26
|
+
from gwaslab.hm_harmonize_sumstats import parallelecheckaf
|
|
27
|
+
from gwaslab.hm_harmonize_sumstats import paralleleinferaf
|
|
28
|
+
from gwaslab.hm_harmonize_sumstats import checkref
|
|
29
|
+
from gwaslab.hm_harmonize_sumstats import oldcheckref
|
|
30
|
+
from gwaslab.hm_harmonize_sumstats import rsidtochrpos
|
|
31
|
+
from gwaslab.hm_harmonize_sumstats import parallelizeassignrsid
|
|
32
|
+
from gwaslab.hm_harmonize_sumstats import parallelinferstrand
|
|
33
|
+
from gwaslab.hm_harmonize_sumstats import parallelrsidtochrpos
|
|
34
|
+
from gwaslab.hm_harmonize_sumstats import _paralleleinferafwithmaf
|
|
35
|
+
from gwaslab.util_in_filter_value import filtervalues
|
|
36
|
+
from gwaslab.util_in_filter_value import filterout
|
|
37
|
+
from gwaslab.util_in_filter_value import filterin
|
|
38
|
+
from gwaslab.util_in_filter_value import filterregionin
|
|
39
|
+
from gwaslab.util_in_filter_value import filterregionout
|
|
40
|
+
from gwaslab.util_in_filter_value import _filter_indel
|
|
41
|
+
from gwaslab.util_in_filter_value import _filter_palindromic
|
|
42
|
+
from gwaslab.util_in_filter_value import _filter_snp
|
|
43
|
+
from gwaslab.util_in_filter_value import _exclude_hla
|
|
44
|
+
from gwaslab.util_in_filter_value import inferbuild
|
|
45
|
+
from gwaslab.util_in_filter_value import sampling
|
|
46
|
+
from gwaslab.util_in_filter_value import _get_flanking
|
|
47
|
+
from gwaslab.util_in_filter_value import _get_flanking_by_chrpos
|
|
48
|
+
from gwaslab.util_in_filter_value import _get_flanking_by_id
|
|
49
|
+
from gwaslab.util_in_calculate_gc import lambdaGC
|
|
50
|
+
from gwaslab.util_in_convert_h2 import _get_per_snp_r2
|
|
51
|
+
from gwaslab.util_in_get_sig import getsig
|
|
52
|
+
from gwaslab.util_in_get_density import getsignaldensity
|
|
53
|
+
from gwaslab.util_in_get_density import assigndensity
|
|
54
|
+
from gwaslab.util_in_get_sig import annogene
|
|
55
|
+
from gwaslab.util_in_get_sig import getnovel
|
|
56
|
+
from gwaslab.util_in_get_sig import _check_cis
|
|
57
|
+
from gwaslab.util_in_get_sig import _check_novel_set
|
|
58
|
+
from gwaslab.util_in_fill_data import filldata
|
|
59
|
+
from gwaslab.bd_get_hapmap3 import gethapmap3
|
|
60
|
+
from gwaslab.bd_common_data import get_chr_list
|
|
61
|
+
from gwaslab.bd_common_data import get_number_to_chr
|
|
62
|
+
from gwaslab.bd_common_data import get_chr_to_number
|
|
63
|
+
from gwaslab.bd_common_data import get_high_ld
|
|
64
|
+
from gwaslab.bd_common_data import get_format_dict
|
|
65
|
+
from gwaslab.bd_common_data import get_formats_list
|
|
66
|
+
from gwaslab.g_version import _show_version
|
|
67
|
+
from gwaslab.g_version import gwaslab_info
|
|
68
|
+
from gwaslab.g_meta import _init_meta
|
|
69
|
+
from gwaslab.g_meta import _append_meta_record
|
|
70
|
+
from gwaslab.util_ex_run_clumping import _clump
|
|
71
|
+
from gwaslab.util_ex_calculate_ldmatrix import tofinemapping
|
|
72
|
+
from gwaslab.util_ex_calculate_prs import _calculate_prs
|
|
73
|
+
from gwaslab.viz_plot_mqqplot import mqqplot
|
|
74
|
+
from gwaslab.viz_plot_trumpetplot import plottrumpet
|
|
75
|
+
from gwaslab.viz_plot_compare_af import plotdaf
|
|
76
|
+
from gwaslab.util_ex_run_susie import _run_susie_rss
|
|
77
|
+
from gwaslab.qc_fix_sumstats import _check_data_consistency
|
|
78
|
+
from gwaslab.util_ex_ldsc import _estimate_h2_by_ldsc
|
|
79
|
+
from gwaslab.util_ex_ldsc import _estimate_rg_by_ldsc
|
|
80
|
+
from gwaslab.util_ex_ldsc import _estimate_h2_cts_by_ldsc
|
|
81
|
+
from gwaslab.util_ex_ldsc import _estimate_partitioned_h2_by_ldsc
|
|
82
|
+
from gwaslab.bd_get_hapmap3 import gethapmap3
|
|
83
|
+
from gwaslab.util_abf_finemapping import abf_finemapping
|
|
84
|
+
from gwaslab.util_abf_finemapping import make_cs
|
|
85
|
+
from gwaslab.io_read_pipcs import _read_pipcs
|
|
86
|
+
from gwaslab.viz_plot_credible_sets import _plot_cs
|
|
87
|
+
import gc
|
|
88
|
+
from gwaslab.viz_plot_phe_heatmap import _gwheatmap
|
|
89
|
+
from gwaslab.viz_plot_effect import _plot_effect
|
|
90
|
+
from gwaslab.util_in_merge import _extract_variant
|
|
91
|
+
|
|
92
|
+
#20250215
|
|
93
|
+
class SumstatsSet():
|
|
94
|
+
def __init__(self,
|
|
95
|
+
sumstats_dic,
|
|
96
|
+
variant_set=None,
|
|
97
|
+
build="99",
|
|
98
|
+
species="homo sapiens",
|
|
99
|
+
build_infer=False,
|
|
100
|
+
set="set1",
|
|
101
|
+
verbose=True,
|
|
102
|
+
**readargs):
|
|
103
|
+
|
|
104
|
+
# basic attributes
|
|
105
|
+
self.data = pd.DataFrame()
|
|
106
|
+
self.log = Log()
|
|
107
|
+
# meta information
|
|
108
|
+
|
|
109
|
+
self.meta = _init_meta()
|
|
110
|
+
self.build = build
|
|
111
|
+
self.meta["gwaslab"]["set_name"] = set
|
|
112
|
+
self.meta["gwaslab"]["species"] = species
|
|
113
|
+
|
|
114
|
+
# print gwaslab version information
|
|
115
|
+
_show_version(self.log, verbose=verbose)
|
|
116
|
+
|
|
117
|
+
self.data = _extract_variant(variant_set, sumstats_dic,log=self.log, verbose=verbose)
|
|
118
|
+
|
|
119
|
+
def plot_effect(self,**args):
|
|
120
|
+
_plot_effect(self.data,**args)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
#### healper #################################################################################
|
|
124
|
+
|
|
125
|
+
def lookup_status(self,status="STATUS"):
|
|
126
|
+
return lookupstatus(self.data[status])
|
|
127
|
+
|
|
128
|
+
def set_build(self, build, verbose=True):
|
|
129
|
+
self.data, self.meta["gwaslab"]["genome_build"] = _set_build(self.data, build=build, log=self.log,verbose=verbose)
|
|
130
|
+
gc.collect()
|
|
131
|
+
|
|
132
|
+
def infer_build(self,verbose=True,**kwargs):
|
|
133
|
+
self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,log=self.log,verbose=verbose,**kwargs)
|
|
134
|
+
|
|
135
|
+
def liftover(self,to_build, from_build=None,**kwargs):
|
|
136
|
+
if from_build is None:
|
|
137
|
+
if self.meta["gwaslab"]["genome_build"]=="99":
|
|
138
|
+
self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,**kwargs)
|
|
139
|
+
from_build = self.meta["gwaslab"]["genome_build"]
|
|
140
|
+
self.data = parallelizeliftovervariant(self.data,from_build=from_build, to_build=to_build, log=self.log,**kwargs)
|
|
141
|
+
self.meta["is_sorted"] = False
|
|
142
|
+
self.meta["is_harmonised"] = False
|
|
143
|
+
self.meta["gwaslab"]["genome_build"]=to_build
|
|
144
|
+
|
|
145
|
+
# QC ######################################################################################
|
|
146
|
+
#clean the sumstats with one line
|
|
147
|
+
def basic_check(self,
|
|
148
|
+
remove=False,
|
|
149
|
+
remove_dup=False,
|
|
150
|
+
n_cores=1,
|
|
151
|
+
fixid_args={},
|
|
152
|
+
removedup_args={},
|
|
153
|
+
fixchr_args={},
|
|
154
|
+
fixpos_args={},
|
|
155
|
+
fixallele_args={},
|
|
156
|
+
sanitycheckstats_args={},
|
|
157
|
+
consistencycheck_args={},
|
|
158
|
+
normalize=True,
|
|
159
|
+
normalizeallele_args={},
|
|
160
|
+
verbose=True):
|
|
161
|
+
###############################################
|
|
162
|
+
# try to fix data without dropping any information
|
|
163
|
+
self.data = fixID(self.data,log=self.log,verbose=verbose, **fixid_args)
|
|
164
|
+
self.data = fixchr(self.data,log=self.log,remove=remove,verbose=verbose,**fixchr_args)
|
|
165
|
+
self.data = fixpos(self.data,log=self.log,remove=remove,verbose=verbose,**fixpos_args)
|
|
166
|
+
self.data = fixallele(self.data,log=self.log,remove=remove,verbose=verbose,**fixallele_args)
|
|
167
|
+
self.data = sanitycheckstats(self.data,log=self.log,verbose=verbose,**sanitycheckstats_args)
|
|
168
|
+
_check_data_consistency(self.data,log=self.log,verbose=verbose,**consistencycheck_args)
|
|
169
|
+
|
|
170
|
+
if normalize is True:
|
|
171
|
+
self.data = parallelnormalizeallele(self.data,n_cores=n_cores,verbose=verbose,log=self.log,**normalizeallele_args)
|
|
172
|
+
if remove_dup is True:
|
|
173
|
+
self.data = removedup(self.data,log=self.log,verbose=verbose,**removedup_args)
|
|
174
|
+
self.data = sortcoordinate(self.data,verbose=verbose,log=self.log)
|
|
175
|
+
self.data = sortcolumn(self.data,verbose=verbose,log=self.log)
|
|
176
|
+
self.meta["is_sorted"] = True
|
|
177
|
+
###############################################
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def harmonize(self,
|
|
181
|
+
basic_check=True,
|
|
182
|
+
ref_seq=None,
|
|
183
|
+
ref_rsid_tsv=None,
|
|
184
|
+
ref_rsid_vcf=None,
|
|
185
|
+
ref_infer=None,
|
|
186
|
+
ref_alt_freq=None,
|
|
187
|
+
maf_threshold=0.40,
|
|
188
|
+
ref_seq_mode="v",
|
|
189
|
+
n_cores=1,
|
|
190
|
+
remove=False,
|
|
191
|
+
checkref_args={},
|
|
192
|
+
removedup_args={},
|
|
193
|
+
assignrsid_args={},
|
|
194
|
+
inferstrand_args={},
|
|
195
|
+
flipallelestats_args={},
|
|
196
|
+
liftover_args={},
|
|
197
|
+
fixid_args={},
|
|
198
|
+
fixchr_args={},
|
|
199
|
+
fixpos_args={},
|
|
200
|
+
fixallele_args={},
|
|
201
|
+
sanitycheckstats_args={},
|
|
202
|
+
normalizeallele_args={}
|
|
203
|
+
):
|
|
204
|
+
|
|
205
|
+
#Standard pipeline
|
|
206
|
+
####################################################
|
|
207
|
+
#part 1 : basic_check
|
|
208
|
+
# 1.1 fix ID
|
|
209
|
+
# 1.2 remove duplication
|
|
210
|
+
# 1.3 standardization : CHR POS EA NEA
|
|
211
|
+
# 1.4 normalization : EA NEA
|
|
212
|
+
# 1.5 sanity check : BETA SE OR EAF N OR_95L OR_95H
|
|
213
|
+
# 1.6 sorting genomic coordinates and column order
|
|
214
|
+
if basic_check is True:
|
|
215
|
+
|
|
216
|
+
self.data = fixID(self.data,log=self.log,**fixid_args)
|
|
217
|
+
|
|
218
|
+
self.data = fixchr(self.data,remove=remove,log=self.log,**fixchr_args)
|
|
219
|
+
|
|
220
|
+
self.data = fixpos(self.data,remove=remove,log=self.log,**fixpos_args)
|
|
221
|
+
|
|
222
|
+
self.data = fixallele(self.data,log=self.log,**fixallele_args)
|
|
223
|
+
|
|
224
|
+
self.data = sanitycheckstats(self.data,log=self.log,**sanitycheckstats_args)
|
|
225
|
+
|
|
226
|
+
self.data = parallelnormalizeallele(self.data,log=self.log,n_cores=n_cores,**normalizeallele_args)
|
|
227
|
+
|
|
228
|
+
self.data = sortcolumn(self.data,log=self.log)
|
|
229
|
+
|
|
230
|
+
gc.collect()
|
|
231
|
+
|
|
232
|
+
#####################################################
|
|
233
|
+
#part 2 : annotating and flipping
|
|
234
|
+
# 2.1 ref check -> flip allele and allel-specific stats
|
|
235
|
+
# 2.2 assign rsid
|
|
236
|
+
# 2.3 infer strand for palindromic SNP
|
|
237
|
+
#
|
|
238
|
+
########## liftover ###############
|
|
239
|
+
# 3 : liftover by chr and pos to target build -> reset status
|
|
240
|
+
###################################
|
|
241
|
+
# 3.1 ref check (target build) -> flip allele and allel-specific stats
|
|
242
|
+
# 3.2 assign rsid (target build)
|
|
243
|
+
# 3.2 infer strand for palindromic SNP (target build)
|
|
244
|
+
#####################################################
|
|
245
|
+
if ref_seq is not None:
|
|
246
|
+
if ref_seq_mode=="v":
|
|
247
|
+
self.data = checkref(self.data,ref_seq,log=self.log,**checkref_args)
|
|
248
|
+
elif ref_seq_mode=="s":
|
|
249
|
+
self.data = oldcheckref(self.data,ref_seq,log=self.log,**checkref_args)
|
|
250
|
+
else:
|
|
251
|
+
raise ValueError("ref_seq_mode should be 'v' (vectorized, faster) or 's' (sequential, slower)")
|
|
252
|
+
|
|
253
|
+
self.meta["gwaslab"]["references"]["ref_seq"] = ref_seq
|
|
254
|
+
|
|
255
|
+
self.data = flipallelestats(self.data,log=self.log,**flipallelestats_args)
|
|
256
|
+
|
|
257
|
+
gc.collect()
|
|
258
|
+
|
|
259
|
+
if ref_infer is not None:
|
|
260
|
+
|
|
261
|
+
self.data= parallelinferstrand(self.data,ref_infer = ref_infer,ref_alt_freq=ref_alt_freq,maf_threshold=maf_threshold,
|
|
262
|
+
n_cores=n_cores,log=self.log,**inferstrand_args)
|
|
263
|
+
|
|
264
|
+
self.meta["gwaslab"]["references"]["ref_infer"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer"] , ref_infer)
|
|
265
|
+
|
|
266
|
+
self.data =flipallelestats(self.data,log=self.log,**flipallelestats_args)
|
|
267
|
+
|
|
268
|
+
gc.collect()
|
|
269
|
+
|
|
270
|
+
if (ref_seq is not None or ref_infer is not None) and (ref_rsid_tsv is not None or ref_rsid_vcf is not None):
|
|
271
|
+
|
|
272
|
+
self.data = fixID(self.data, log=self.log, **{"fixid":True, "fixsep":True, "overwrite":True})
|
|
273
|
+
|
|
274
|
+
gc.collect()
|
|
275
|
+
|
|
276
|
+
#####################################################
|
|
277
|
+
if ref_rsid_tsv is not None:
|
|
278
|
+
|
|
279
|
+
self.data = parallelizeassignrsid(self.data,path=ref_rsid_tsv,ref_mode="tsv",
|
|
280
|
+
n_cores=n_cores,log=self.log,**assignrsid_args)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
self.meta["gwaslab"]["references"]["ref_rsid_tsv"] = ref_rsid_tsv
|
|
285
|
+
gc.collect()
|
|
286
|
+
|
|
287
|
+
if ref_rsid_vcf is not None:
|
|
288
|
+
self.data = parallelizeassignrsid(self.data,path=ref_rsid_vcf,ref_mode="vcf",
|
|
289
|
+
n_cores=n_cores,log=self.log,**assignrsid_args)
|
|
290
|
+
|
|
291
|
+
self.meta["gwaslab"]["references"]["ref_rsid_vcf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_rsid_vcf"] , ref_rsid_vcf)
|
|
292
|
+
|
|
293
|
+
gc.collect()
|
|
294
|
+
######################################################
|
|
295
|
+
if remove is True:
|
|
296
|
+
|
|
297
|
+
self.data = removedup(self.data,log=self.log,**removedup_args)
|
|
298
|
+
################################################
|
|
299
|
+
|
|
300
|
+
self.data = sortcoordinate(self.data,log=self.log)
|
|
301
|
+
|
|
302
|
+
self.data = sortcolumn(self.data,log=self.log)
|
|
303
|
+
gc.collect()
|
|
304
|
+
self.meta["is_sorted"] = True
|
|
305
|
+
self.meta["is_harmonised"] = True
|
|
306
|
+
return self
|
|
307
|
+
############################################################################################################
|
|
308
|
+
#customizable API to build your own QC pipeline
|
|
309
|
+
def fix_id(self,**kwargs):
|
|
310
|
+
self.data = fixID(self.data,log=self.log,**kwargs)
|
|
311
|
+
def flip_snpid(self,**kwargs):
|
|
312
|
+
self.data = flipSNPID(self.data,log=self.log,**kwargs)
|
|
313
|
+
def strip_snpid(self,**kwargs):
|
|
314
|
+
self.data = stripSNPID(self.data,log=self.log,**kwargs)
|
|
315
|
+
def fix_chr(self,**kwargs):
|
|
316
|
+
self.data = fixchr(self.data,log=self.log,**kwargs)
|
|
317
|
+
def fix_pos(self,**kwargs):
|
|
318
|
+
self.data = fixpos(self.data,log=self.log,**kwargs)
|
|
319
|
+
def fix_allele(self,**kwargs):
|
|
320
|
+
self.data = fixallele(self.data,log=self.log,**kwargs)
|
|
321
|
+
def remove_dup(self,**kwargs):
|
|
322
|
+
self.data = removedup(self.data,log=self.log,**kwargs)
|
|
323
|
+
def check_sanity(self,**kwargs):
|
|
324
|
+
self.data = sanitycheckstats(self.data,log=self.log,**kwargs)
|
|
325
|
+
def check_data_consistency(self, **kwargs):
|
|
326
|
+
_check_data_consistency(self.data,log=self.log,**kwargs)
|
|
327
|
+
def check_id(self,**kwargs):
|
|
328
|
+
pass
|
|
329
|
+
def check_ref(self,ref_seq,ref_seq_mode="v",**kwargs):
|
|
330
|
+
if ref_seq_mode=="v":
|
|
331
|
+
self.meta["gwaslab"]["references"]["ref_seq"] = ref_seq
|
|
332
|
+
self.data = checkref(self.data,ref_seq,log=self.log,**kwargs)
|
|
333
|
+
elif ref_seq_mode=="s":
|
|
334
|
+
self.meta["gwaslab"]["references"]["ref_seq"] = ref_seq
|
|
335
|
+
self.data = oldcheckref(self.data,ref_seq,log=self.log,**kwargs)
|
|
336
|
+
def infer_strand(self,ref_infer,**kwargs):
|
|
337
|
+
self.meta["gwaslab"]["references"]["ref_infer"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer"] , ref_infer)
|
|
338
|
+
self.data = parallelinferstrand(self.data,ref_infer=ref_infer,log=self.log,**kwargs)
|
|
339
|
+
def flip_allele_stats(self,**kwargs):
|
|
340
|
+
self.data = flipallelestats(self.data,log=self.log,**kwargs)
|
|
341
|
+
def normalize_allele(self,**kwargs):
|
|
342
|
+
self.data = parallelnormalizeallele(self.data,log=self.log,**kwargs)
|
|
343
|
+
def assign_rsid(self,
|
|
344
|
+
ref_rsid_tsv=None,
|
|
345
|
+
ref_rsid_vcf=None,
|
|
346
|
+
**kwargs):
|
|
347
|
+
if ref_rsid_tsv is not None:
|
|
348
|
+
self.data = parallelizeassignrsid(self.data,path=ref_rsid_tsv,ref_mode="tsv",log=self.log,**kwargs)
|
|
349
|
+
self.meta["gwaslab"]["references"]["ref_rsid_tsv"] = ref_rsid_tsv
|
|
350
|
+
if ref_rsid_vcf is not None:
|
|
351
|
+
self.data = parallelizeassignrsid(self.data,path=ref_rsid_vcf,ref_mode="vcf",log=self.log,**kwargs)
|
|
352
|
+
self.meta["gwaslab"]["references"]["ref_rsid_vcf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_rsid_vcf"] , ref_rsid_vcf)
|
|
353
|
+
def rsid_to_chrpos(self,**kwargs):
|
|
354
|
+
self.data = rsidtochrpos(self.data,log=self.log,**kwargs)
|
|
355
|
+
def rsid_to_chrpos2(self,**kwargs):
|
|
356
|
+
self.data = parallelrsidtochrpos(self.data,log=self.log,**kwargs)
|
|
357
|
+
|
|
358
|
+
############################################################################################################
|
|
359
|
+
|
|
360
|
+
def sort_coordinate(self,**sort_args):
|
|
361
|
+
self.data = sortcoordinate(self.data,log=self.log,**sort_args)
|
|
362
|
+
self.meta["is_sorted"] = True
|
|
363
|
+
def sort_column(self,**kwargs):
|
|
364
|
+
self.data = sortcolumn(self.data,log=self.log,**kwargs)
|
|
365
|
+
|
|
366
|
+
############################################################################################################
|
|
367
|
+
def fill_data(self, verbose=True, **kwargs):
|
|
368
|
+
self.data = filldata(self.data, verbose=verbose, log=self.log, **kwargs)
|
|
369
|
+
self.data = sortcolumn(self.data, verbose=verbose, log=self.log)
|
|
370
|
+
|
|
371
|
+
# utilities ############################################################################################################
|
|
372
|
+
# filter series ######################################################################
|
|
373
|
+
def filter_flanking(self, inplace=False,**kwargs):
|
|
374
|
+
if inplace is False:
|
|
375
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
376
|
+
new_Sumstats_object.data = _get_flanking(new_Sumstats_object.data, **kwargs)
|
|
377
|
+
return new_Sumstats_object
|
|
378
|
+
else:
|
|
379
|
+
self.data = _get_flanking(self.data, **kwargs)
|
|
380
|
+
def filter_flanking_by_chrpos(self, chrpos, inplace=False,**kwargs):
|
|
381
|
+
if inplace is False:
|
|
382
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
383
|
+
new_Sumstats_object.data = _get_flanking_by_chrpos(new_Sumstats_object.data, chrpos, **kwargs)
|
|
384
|
+
return new_Sumstats_object
|
|
385
|
+
else:
|
|
386
|
+
self.data = _get_flanking_by_chrpos(self.data, chrpos,**kwargs)
|
|
387
|
+
def filter_flanking_by_id(self, snpid, inplace=False,**kwargs):
|
|
388
|
+
if inplace is False:
|
|
389
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
390
|
+
new_Sumstats_object.data = _get_flanking_by_id(new_Sumstats_object.data, snpid, **kwargs)
|
|
391
|
+
return new_Sumstats_object
|
|
392
|
+
else:
|
|
393
|
+
self.data = _get_flanking_by_id(self.data, snpid, **kwargs)
|
|
394
|
+
def filter_value(self, expr, inplace=False, **kwargs):
|
|
395
|
+
if inplace is False:
|
|
396
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
397
|
+
new_Sumstats_object.data = filtervalues(new_Sumstats_object.data,expr,log=new_Sumstats_object.log, **kwargs)
|
|
398
|
+
return new_Sumstats_object
|
|
399
|
+
else:
|
|
400
|
+
self.data = filtervalues(self.data, expr,log=self.log,**kwargs)
|
|
401
|
+
def filter_out(self, inplace=False, **kwargs):
|
|
402
|
+
if inplace is False:
|
|
403
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
404
|
+
new_Sumstats_object.data = filterout(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
405
|
+
return new_Sumstats_object
|
|
406
|
+
else:
|
|
407
|
+
self.data = filterout(self.data,log=self.log,**kwargs)
|
|
408
|
+
def filter_in(self, inplace=False, **kwargs):
|
|
409
|
+
if inplace is False:
|
|
410
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
411
|
+
new_Sumstats_object.data = filterin(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
412
|
+
return new_Sumstats_object
|
|
413
|
+
else:
|
|
414
|
+
self.data = filterin(self.data,log=self.log,**kwargs)
|
|
415
|
+
def filter_region_in(self, inplace=False, **kwargs):
|
|
416
|
+
if inplace is False:
|
|
417
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
418
|
+
new_Sumstats_object.data = filterregionin(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
419
|
+
return new_Sumstats_object
|
|
420
|
+
else:
|
|
421
|
+
self.data = filterregionin(self.data,log=self.log,**kwargs)
|
|
422
|
+
def filter_region_out(self, inplace=False, **kwargs):
|
|
423
|
+
if inplace is False:
|
|
424
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
425
|
+
new_Sumstats_object.data = filterregionout(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
426
|
+
return new_Sumstats_object
|
|
427
|
+
else:
|
|
428
|
+
self.data = filterregionout(self.data,log=self.log,**kwargs)
|
|
429
|
+
def filter_palindromic(self, inplace=False, **kwargs):
|
|
430
|
+
if inplace is False:
|
|
431
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
432
|
+
new_Sumstats_object.data = _filter_palindromic(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
433
|
+
return new_Sumstats_object
|
|
434
|
+
else:
|
|
435
|
+
self.data = _filter_palindromic(self.data,log=self.log,**kwargs)
|
|
436
|
+
def filter_snp(self, inplace=False, **kwargs):
|
|
437
|
+
if inplace is False:
|
|
438
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
439
|
+
new_Sumstats_object.data = _filter_snp(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
440
|
+
return new_Sumstats_object
|
|
441
|
+
else:
|
|
442
|
+
self.data = _filter_snp(self.data,log=self.log,**kwargs)
|
|
443
|
+
def filter_indel(self, inplace=False, **kwargs):
|
|
444
|
+
if inplace is False:
|
|
445
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
446
|
+
new_Sumstats_object.data = _filter_indel(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
447
|
+
return new_Sumstats_object
|
|
448
|
+
else:
|
|
449
|
+
self.data = _filter_indel(self.data,log=self.log,**kwargs)
|
|
450
|
+
|
|
451
|
+
def exclude_hla(self, inplace=False, **kwargs):
|
|
452
|
+
if inplace is False:
|
|
453
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
454
|
+
new_Sumstats_object.data = _exclude_hla(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
455
|
+
return new_Sumstats_object
|
|
456
|
+
else:
|
|
457
|
+
self.data = _exclude_hla(self.data,log=self.log,**kwargs)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def random_variants(self,inplace=False,n=1,p=None,**kwargs):
|
|
461
|
+
if inplace is True:
|
|
462
|
+
self.data = sampling(self.data,n=n,p=p,log=self.log,**kwargs)
|
|
463
|
+
else:
|
|
464
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
465
|
+
new_Sumstats_object.data = sampling(new_Sumstats_object.data,n=n,p=p,log=new_Sumstats_object.log,**kwargs)
|
|
466
|
+
return new_Sumstats_object
|
|
467
|
+
|
|
468
|
+
def filter_hapmap3(self, inplace=False, build=None, **kwargs ):
|
|
469
|
+
if build is None:
|
|
470
|
+
build = self.meta["gwaslab"]["genome_build"]
|
|
471
|
+
if inplace is True:
|
|
472
|
+
self.data = gethapmap3(self.data, build=build,log=self.log, **kwargs)
|
|
473
|
+
else:
|
|
474
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
475
|
+
new_Sumstats_object.data = gethapmap3(new_Sumstats_object.data, build=build,log=self.log, **kwargs)
|
|
476
|
+
return new_Sumstats_object
|
|
477
|
+
######################################################################
|
|
478
|
+
|
|
479
|
+
def check_af(self,ref_infer,**kwargs):
|
|
480
|
+
self.data = parallelecheckaf(self.data,ref_infer=ref_infer,log=self.log,**kwargs)
|
|
481
|
+
self.meta["gwaslab"]["references"]["ref_infer_daf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer_daf"] , ref_infer)
|
|
482
|
+
|
|
483
|
+
def infer_af(self,ref_infer,**kwargs):
|
|
484
|
+
self.data = paralleleinferaf(self.data,ref_infer=ref_infer,log=self.log,**kwargs)
|
|
485
|
+
self.meta["gwaslab"]["references"]["ref_infer_af"] = ref_infer
|
|
486
|
+
self.meta["gwaslab"]["references"]["ref_infer_af"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer_af"] , ref_infer)
|
|
487
|
+
def maf_to_eaf(self,ref_infer,**kwargs):
|
|
488
|
+
self.data = _paralleleinferafwithmaf(self.data,ref_infer=ref_infer,log=self.log,**kwargs)
|
|
489
|
+
self.meta["gwaslab"]["references"]["ref_infer_maf"] = ref_infer
|
|
490
|
+
self.meta["gwaslab"]["references"]["ref_infer_maf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer_af"] , ref_infer)
|
|
491
|
+
def plot_daf(self, **kwargs):
|
|
492
|
+
fig,outliers = plotdaf(self.data, **kwargs)
|
|
493
|
+
return fig, outliers
|
|
494
|
+
|
|
495
|
+
def plot_gwheatmap(self, **kwargs):
|
|
496
|
+
fig = _gwheatmap(self.data, **kwargs)
|
|
497
|
+
return fig
|
|
498
|
+
|
|
499
|
+
def plot_mqq(self, build=None, **kwargs):
|
|
500
|
+
|
|
501
|
+
chrom="CHR"
|
|
502
|
+
pos="POS"
|
|
503
|
+
p="P"
|
|
504
|
+
|
|
505
|
+
if "SNPID" in self.data.columns:
|
|
506
|
+
snpid="SNPID"
|
|
507
|
+
elif "rsID" in self.data.columns:
|
|
508
|
+
snpid="rsID"
|
|
509
|
+
|
|
510
|
+
if "EAF" in self.data.columns:
|
|
511
|
+
eaf="EAF"
|
|
512
|
+
else:
|
|
513
|
+
eaf=None
|
|
514
|
+
|
|
515
|
+
# extract build information from meta data
|
|
516
|
+
if build is None:
|
|
517
|
+
build = self.meta["gwaslab"]["genome_build"]
|
|
518
|
+
|
|
519
|
+
plot = mqqplot(self.data,
|
|
520
|
+
snpid=snpid,
|
|
521
|
+
chrom=chrom,
|
|
522
|
+
pos=pos,
|
|
523
|
+
p=p,
|
|
524
|
+
eaf=eaf,
|
|
525
|
+
build = build,
|
|
526
|
+
**kwargs)
|
|
527
|
+
|
|
528
|
+
return plot
|
|
529
|
+
|
|
530
|
+
def plot_trumpet(self, build=None, **kwargs):
|
|
531
|
+
if build is None:
|
|
532
|
+
build = self.meta["gwaslab"]["genome_build"]
|
|
533
|
+
fig = plottrumpet(self.data,build = build, **kwargs)
|
|
534
|
+
return fig
|
|
535
|
+
|
|
536
|
+
def get_lead(self, build=None, gls=False, **kwargs):
|
|
537
|
+
if "SNPID" in self.data.columns:
|
|
538
|
+
id_to_use = "SNPID"
|
|
539
|
+
else:
|
|
540
|
+
id_to_use = "rsID"
|
|
541
|
+
|
|
542
|
+
# extract build information from meta data
|
|
543
|
+
if build is None:
|
|
544
|
+
build = self.meta["gwaslab"]["genome_build"]
|
|
545
|
+
|
|
546
|
+
output = getsig(self.data,
|
|
547
|
+
id=id_to_use,
|
|
548
|
+
chrom="CHR",
|
|
549
|
+
pos="POS",
|
|
550
|
+
p="P",
|
|
551
|
+
log=self.log,
|
|
552
|
+
build=build,
|
|
553
|
+
**kwargs)
|
|
554
|
+
# return sumstats object
|
|
555
|
+
if gls == True:
|
|
556
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
557
|
+
new_Sumstats_object.data = output
|
|
558
|
+
gc.collect()
|
|
559
|
+
return new_Sumstats_object
|
|
560
|
+
return output
|
|
561
|
+
|
|
562
|
+
def get_density(self, sig_list=None, windowsizekb=100,**kwargs):
|
|
563
|
+
|
|
564
|
+
if "SNPID" in self.data.columns:
|
|
565
|
+
id_to_use = "SNPID"
|
|
566
|
+
else:
|
|
567
|
+
id_to_use = "rsID"
|
|
568
|
+
|
|
569
|
+
if sig_list is None:
|
|
570
|
+
self.data["DENSITY"] = getsignaldensity(self.data,
|
|
571
|
+
id=id_to_use,
|
|
572
|
+
chrom="CHR",
|
|
573
|
+
pos="POS",
|
|
574
|
+
bwindowsizekb=windowsizekb,
|
|
575
|
+
log=self.log)
|
|
576
|
+
else:
|
|
577
|
+
if isinstance(sig_list, pd.DataFrame):
|
|
578
|
+
self.data["DENSITY"] = assigndensity(self.data,
|
|
579
|
+
sig_list,
|
|
580
|
+
id=id_to_use,
|
|
581
|
+
chrom="CHR",
|
|
582
|
+
pos="POS",
|
|
583
|
+
bwindowsizekb=windowsizekb,
|
|
584
|
+
log=self.log)
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
def get_novel(self, **kwargs):
|
|
588
|
+
if "SNPID" in self.data.columns:
|
|
589
|
+
id_to_use = "SNPID"
|
|
590
|
+
else:
|
|
591
|
+
id_to_use = "rsID"
|
|
592
|
+
output = getnovel(self.data,
|
|
593
|
+
id=id_to_use,
|
|
594
|
+
chrom="CHR",
|
|
595
|
+
pos="POS",
|
|
596
|
+
p="P",
|
|
597
|
+
log=self.log,
|
|
598
|
+
**kwargs)
|
|
599
|
+
# return sumstats object
|
|
600
|
+
return output
|
|
601
|
+
|
|
602
|
+
def check_cis(self, gls=False, **kwargs):
|
|
603
|
+
if "SNPID" in self.data.columns:
|
|
604
|
+
id_to_use = "SNPID"
|
|
605
|
+
else:
|
|
606
|
+
id_to_use = "rsID"
|
|
607
|
+
output = _check_cis(self.data,
|
|
608
|
+
id=id_to_use,
|
|
609
|
+
chrom="CHR",
|
|
610
|
+
pos="POS",
|
|
611
|
+
p="P",
|
|
612
|
+
log=self.log,
|
|
613
|
+
**kwargs)
|
|
614
|
+
|
|
615
|
+
# return sumstats object
|
|
616
|
+
if gls == True:
|
|
617
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
618
|
+
new_Sumstats_object.data = output
|
|
619
|
+
gc.collect()
|
|
620
|
+
return new_Sumstats_object
|
|
621
|
+
return output
|
|
622
|
+
|
|
623
|
+
def check_novel_set(self, **kwargs):
|
|
624
|
+
if "SNPID" in self.data.columns:
|
|
625
|
+
id_to_use = "SNPID"
|
|
626
|
+
else:
|
|
627
|
+
id_to_use = "rsID"
|
|
628
|
+
output = _check_novel_set(self.data,
|
|
629
|
+
id=id_to_use,
|
|
630
|
+
chrom="CHR",
|
|
631
|
+
pos="POS",
|
|
632
|
+
p="P",
|
|
633
|
+
log=self.log,
|
|
634
|
+
**kwargs)
|
|
635
|
+
# return sumstats object
|
|
636
|
+
return output
|
|
637
|
+
|
|
638
|
+
def anno_gene(self, **kwargs):
|
|
639
|
+
if "SNPID" in self.data.columns:
|
|
640
|
+
id_to_use = "SNPID"
|
|
641
|
+
else:
|
|
642
|
+
id_to_use = "rsID"
|
|
643
|
+
output = annogene(self.data,
|
|
644
|
+
id=id_to_use,
|
|
645
|
+
chrom="CHR",
|
|
646
|
+
pos="POS",
|
|
647
|
+
log=self.log,
|
|
648
|
+
**kwargs)
|
|
649
|
+
return output
|
|
650
|
+
|
|
651
|
+
def get_per_snp_r2(self,**kwargs):
|
|
652
|
+
self.data = _get_per_snp_r2(self.data, beta="BETA", af="EAF", n="N", log=self.log, **kwargs)
|
|
653
|
+
#add data inplace
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
# to_format ###############################################################################################
|
|
657
|
+
|
|
658
|
+
def to_format(self, path, build=None, verbose=True, **kwargs):
|
|
659
|
+
if build is None:
|
|
660
|
+
build = self.meta["gwaslab"]["genome_build"]
|
|
661
|
+
_to_format(self.data, path, log=self.log, verbose=verbose, meta=self.meta, build=build, **kwargs)
|
|
662
|
+
|
|
663
|
+
|