gwaslab 3.4.40__py3-none-any.whl → 3.4.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/bd_common_data.py +14 -1
- gwaslab/bd_get_hapmap3.py +7 -3
- gwaslab/g_Sumstats.py +156 -138
- gwaslab/g_SumstatsPair.py +15 -15
- gwaslab/g_version.py +2 -2
- gwaslab/hm_harmonize_sumstats.py +365 -12
- gwaslab/io_read_tabular.py +7 -7
- gwaslab/io_to_formats.py +96 -21
- gwaslab/io_to_pickle.py +1 -1
- gwaslab/ldsc_ldscore.py +1 -1
- gwaslab/qc_fix_sumstats.py +2 -2
- gwaslab/util_ex_calculate_ldmatrix.py +2 -2
- gwaslab/util_ex_calculate_prs.py +2 -2
- gwaslab/util_ex_ldsc.py +163 -110
- gwaslab/util_ex_plink_filter.py +2 -2
- gwaslab/util_ex_run_clumping.py +2 -2
- gwaslab/util_in_filter_value.py +27 -9
- gwaslab/viz_plot_regionalplot.py +2 -2
- gwaslab/viz_plot_trumpetplot.py +115 -4
- {gwaslab-3.4.40.dist-info → gwaslab-3.4.42.dist-info}/METADATA +33 -5
- {gwaslab-3.4.40.dist-info → gwaslab-3.4.42.dist-info}/RECORD +25 -25
- {gwaslab-3.4.40.dist-info → gwaslab-3.4.42.dist-info}/WHEEL +1 -1
- {gwaslab-3.4.40.dist-info → gwaslab-3.4.42.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.40.dist-info → gwaslab-3.4.42.dist-info}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.4.40.dist-info → gwaslab-3.4.42.dist-info}/top_level.txt +0 -0
gwaslab/bd_common_data.py
CHANGED
|
@@ -298,9 +298,22 @@ def gtf_to_protein_coding(gtfpath,log=Log(),verbose=True):
|
|
|
298
298
|
|
|
299
299
|
return protein_coding_path
|
|
300
300
|
|
|
301
|
+
####################################################################################################################
|
|
302
|
+
# From BioPython: https://github.com/biopython/biopython/blob/c5a6b1374267d769b19c1022b4b45472316e78b4/Bio/Seq.py#L36
|
|
303
|
+
def _maketrans(complement_mapping):
|
|
304
|
+
"""Make a python string translation table.
|
|
305
|
+
|
|
306
|
+
Arguments:
|
|
307
|
+
- complement_mapping - a dictionary.
|
|
301
308
|
|
|
302
|
-
|
|
309
|
+
Returns a translation table (a bytes object of length 256) for use with
|
|
310
|
+
the python string's translate method.
|
|
303
311
|
|
|
312
|
+
Compatible with lower case and upper case sequences.
|
|
313
|
+
"""
|
|
314
|
+
keys = "".join(complement_mapping.keys()).encode("ASCII")
|
|
315
|
+
values = "".join(complement_mapping.values()).encode("ASCII")
|
|
316
|
+
return bytes.maketrans(keys + keys.lower(), values + values.lower())
|
|
304
317
|
|
|
305
318
|
####################################################################################################################
|
|
306
319
|
|
gwaslab/bd_get_hapmap3.py
CHANGED
|
@@ -12,7 +12,7 @@ from gwaslab.qc_fix_sumstats import finished
|
|
|
12
12
|
#A P-value
|
|
13
13
|
#A signed summary statistic (beta, OR, log odds, Z-score, etc)
|
|
14
14
|
|
|
15
|
-
def gethapmap3(sumstats,rsid="rsID",chrom="CHR", pos="POS", ea="EA", nea="NEA",build="19", verbose=True, match_allele= True, log=Log()):
|
|
15
|
+
def gethapmap3(sumstats,rsid="rsID",chrom="CHR", pos="POS", ea="EA", nea="NEA",build="19", verbose=True, match_allele= True, how="inner", log=Log()):
|
|
16
16
|
##start function with col checking##########################################################
|
|
17
17
|
_start_line = "extract HapMap3 SNPs"
|
|
18
18
|
_end_line = "extracting HapMap3 SNPs"
|
|
@@ -47,7 +47,7 @@ def gethapmap3(sumstats,rsid="rsID",chrom="CHR", pos="POS", ea="EA", nea="NEA",b
|
|
|
47
47
|
#rsid A1 A2 #CHROM POS
|
|
48
48
|
#rs3094315 G A 1 752566
|
|
49
49
|
|
|
50
|
-
if rsid in sumstats.columns:
|
|
50
|
+
if rsid in sumstats.columns and how=="inner":
|
|
51
51
|
output = sumstats.loc[sumstats[rsid].isin(hapmap3_ref["rsid"].values),:].copy()
|
|
52
52
|
return output
|
|
53
53
|
|
|
@@ -56,11 +56,15 @@ def gethapmap3(sumstats,rsid="rsID",chrom="CHR", pos="POS", ea="EA", nea="NEA",b
|
|
|
56
56
|
sumstats ["chr:pos"] = sumstats[chrom].astype("string")+":"+sumstats[pos].astype("string")
|
|
57
57
|
hapmap3_ref["chr:pos"] = hapmap3_ref["#CHROM"]+":"+hapmap3_ref["POS"]
|
|
58
58
|
hapmap3_ref = hapmap3_ref.rename(columns={"rsid":"rsID"})
|
|
59
|
-
output = pd.merge(sumstats,hapmap3_ref.loc[:,["chr:pos","rsID"]+additional_cols],left_on="chr:pos",right_on="chr:pos",how=
|
|
59
|
+
output = pd.merge(sumstats,hapmap3_ref.loc[:,["chr:pos","rsID"]+additional_cols],left_on="chr:pos",right_on="chr:pos",how=how,suffixes=('', '_hapmap3')).copy()
|
|
60
60
|
if match_allele:
|
|
61
61
|
log.write(" -Checking if alleles are same...")
|
|
62
62
|
is_matched = ((output[ea].astype("string") == output["A1"]) & (output[nea].astype("string") == output["A2"])) \
|
|
63
63
|
| ((output[ea].astype("string") == output["A2"]) & (output[nea].astype("string") == output["A1"]))
|
|
64
|
+
if how=="right":
|
|
65
|
+
is_matched = ((output[ea].astype("string") == output["A1"]) & (output[nea].astype("string") == output["A2"])) \
|
|
66
|
+
| ((output[ea].astype("string") == output["A2"]) & (output[nea].astype("string") == output["A1"])) | output[ea].isna()
|
|
67
|
+
|
|
64
68
|
log.write(" -Variants with macthed alleles: {}".format(sum(is_matched)))
|
|
65
69
|
output = output.loc[is_matched,:]
|
|
66
70
|
output = output.drop(columns=["chr:pos"]+additional_cols)
|
gwaslab/g_Sumstats.py
CHANGED
|
@@ -23,6 +23,7 @@ from gwaslab.qc_fix_sumstats import _process_build
|
|
|
23
23
|
from gwaslab.hm_harmonize_sumstats import parallelecheckaf
|
|
24
24
|
from gwaslab.hm_harmonize_sumstats import paralleleinferaf
|
|
25
25
|
from gwaslab.hm_harmonize_sumstats import checkref
|
|
26
|
+
from gwaslab.hm_harmonize_sumstats import oldcheckref
|
|
26
27
|
from gwaslab.hm_harmonize_sumstats import rsidtochrpos
|
|
27
28
|
from gwaslab.hm_harmonize_sumstats import parallelizeassignrsid
|
|
28
29
|
from gwaslab.hm_harmonize_sumstats import parallelinferstrand
|
|
@@ -35,6 +36,7 @@ from gwaslab.util_in_filter_value import filterregionout
|
|
|
35
36
|
from gwaslab.util_in_filter_value import _filter_indel
|
|
36
37
|
from gwaslab.util_in_filter_value import _filter_palindromic
|
|
37
38
|
from gwaslab.util_in_filter_value import _filter_snp
|
|
39
|
+
from gwaslab.util_in_filter_value import _exclude_hla
|
|
38
40
|
from gwaslab.util_in_filter_value import inferbuild
|
|
39
41
|
from gwaslab.util_in_filter_value import sampling
|
|
40
42
|
from gwaslab.util_in_filter_value import _get_flanking
|
|
@@ -238,15 +240,15 @@ class Sumstats():
|
|
|
238
240
|
self.data, self.meta["gwaslab"]["genome_build"] = _set_build(self.data, build=build, log=self.log,verbose=verbose)
|
|
239
241
|
gc.collect()
|
|
240
242
|
|
|
241
|
-
def infer_build(self,**
|
|
242
|
-
self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,**
|
|
243
|
+
def infer_build(self,verbose=True,**kwargs):
|
|
244
|
+
self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,log=self.log,verbose=verbose,**kwargs)
|
|
243
245
|
|
|
244
|
-
def liftover(self,to_build, from_build=None,**
|
|
246
|
+
def liftover(self,to_build, from_build=None,**kwargs):
|
|
245
247
|
if from_build is None:
|
|
246
248
|
if self.meta["gwaslab"]["genome_build"]=="99":
|
|
247
|
-
self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,**
|
|
249
|
+
self.data, self.meta["gwaslab"]["genome_build"] = inferbuild(self.data,**kwargs)
|
|
248
250
|
from_build = self.meta["gwaslab"]["genome_build"]
|
|
249
|
-
self.data = parallelizeliftovervariant(self.data,from_build=from_build, to_build=to_build, log=self.log,**
|
|
251
|
+
self.data = parallelizeliftovervariant(self.data,from_build=from_build, to_build=to_build, log=self.log,**kwargs)
|
|
250
252
|
self.meta["is_sorted"] = False
|
|
251
253
|
self.meta["is_harmonised"] = False
|
|
252
254
|
self.meta["gwaslab"]["genome_build"]=to_build
|
|
@@ -259,7 +261,7 @@ class Sumstats():
|
|
|
259
261
|
n_cores=1,
|
|
260
262
|
fixid_args={},
|
|
261
263
|
removedup_args={},
|
|
262
|
-
|
|
264
|
+
fixchr_args={},
|
|
263
265
|
fixpos_args={},
|
|
264
266
|
fixallele_args={},
|
|
265
267
|
sanitycheckstats_args={},
|
|
@@ -269,8 +271,8 @@ class Sumstats():
|
|
|
269
271
|
verbose=True):
|
|
270
272
|
###############################################
|
|
271
273
|
# try to fix data without dropping any information
|
|
272
|
-
self.data = fixID(self.data,verbose=verbose, **fixid_args)
|
|
273
|
-
self.data = fixchr(self.data,log=self.log,remove=remove,verbose=verbose,**
|
|
274
|
+
self.data = fixID(self.data,log=self.log,verbose=verbose, **fixid_args)
|
|
275
|
+
self.data = fixchr(self.data,log=self.log,remove=remove,verbose=verbose,**fixchr_args)
|
|
274
276
|
self.data = fixpos(self.data,log=self.log,remove=remove,verbose=verbose,**fixpos_args)
|
|
275
277
|
self.data = fixallele(self.data,log=self.log,remove=remove,verbose=verbose,**fixallele_args)
|
|
276
278
|
self.data = sanitycheckstats(self.data,log=self.log,verbose=verbose,**sanitycheckstats_args)
|
|
@@ -294,6 +296,7 @@ class Sumstats():
|
|
|
294
296
|
ref_infer=None,
|
|
295
297
|
ref_alt_freq=None,
|
|
296
298
|
maf_threshold=0.40,
|
|
299
|
+
ref_seq_mode="v",
|
|
297
300
|
n_cores=1,
|
|
298
301
|
remove=False,
|
|
299
302
|
checkref_args={},
|
|
@@ -303,7 +306,7 @@ class Sumstats():
|
|
|
303
306
|
flipallelestats_args={},
|
|
304
307
|
liftover_args={},
|
|
305
308
|
fixid_args={},
|
|
306
|
-
|
|
309
|
+
fixchr_args={},
|
|
307
310
|
fixpos_args={},
|
|
308
311
|
fixallele_args={},
|
|
309
312
|
sanitycheckstats_args={},
|
|
@@ -321,9 +324,9 @@ class Sumstats():
|
|
|
321
324
|
# 1.6 sorting genomic coordinates and column order
|
|
322
325
|
if basic_check is True:
|
|
323
326
|
|
|
324
|
-
self.data = fixID(self.data,**fixid_args)
|
|
327
|
+
self.data = fixID(self.data,log=self.log,**fixid_args)
|
|
325
328
|
|
|
326
|
-
self.data = fixchr(self.data,remove=remove,log=self.log,**
|
|
329
|
+
self.data = fixchr(self.data,remove=remove,log=self.log,**fixchr_args)
|
|
327
330
|
|
|
328
331
|
self.data = fixpos(self.data,remove=remove,log=self.log,**fixpos_args)
|
|
329
332
|
|
|
@@ -351,8 +354,10 @@ class Sumstats():
|
|
|
351
354
|
# 3.2 infer strand for palindromic SNP (target build)
|
|
352
355
|
#####################################################
|
|
353
356
|
if ref_seq is not None:
|
|
354
|
-
|
|
355
|
-
|
|
357
|
+
if ref_seq_mode=="v":
|
|
358
|
+
self.data = checkref(self.data,ref_seq,log=self.log,**checkref_args)
|
|
359
|
+
else:
|
|
360
|
+
self.data = oldcheckref(self.data,ref_seq,log=self.log,**checkref_args)
|
|
356
361
|
|
|
357
362
|
self.meta["gwaslab"]["references"]["ref_seq"] = ref_seq
|
|
358
363
|
|
|
@@ -404,170 +409,183 @@ class Sumstats():
|
|
|
404
409
|
return self
|
|
405
410
|
############################################################################################################
|
|
406
411
|
#customizable API to build your own QC pipeline
|
|
407
|
-
def fix_id(self,**
|
|
408
|
-
self.data = fixID(self.data,log=self.log,**
|
|
409
|
-
def fix_chr(self,**
|
|
410
|
-
self.data = fixchr(self.data,log=self.log,**
|
|
411
|
-
def fix_pos(self,**
|
|
412
|
-
self.data = fixpos(self.data,log=self.log,**
|
|
413
|
-
def fix_allele(self,**
|
|
414
|
-
self.data = fixallele(self.data,log=self.log,**
|
|
415
|
-
def remove_dup(self,**
|
|
416
|
-
self.data = removedup(self.data,log=self.log,**
|
|
417
|
-
def check_sanity(self,**
|
|
418
|
-
self.data = sanitycheckstats(self.data,log=self.log,**
|
|
419
|
-
def check_data_consistency(self, **
|
|
420
|
-
_check_data_consistency(self.data,log=self.log,**
|
|
421
|
-
def check_id(self,**
|
|
412
|
+
def fix_id(self,**kwargs):
|
|
413
|
+
self.data = fixID(self.data,log=self.log,**kwargs)
|
|
414
|
+
def fix_chr(self,**kwargs):
|
|
415
|
+
self.data = fixchr(self.data,log=self.log,**kwargs)
|
|
416
|
+
def fix_pos(self,**kwargs):
|
|
417
|
+
self.data = fixpos(self.data,log=self.log,**kwargs)
|
|
418
|
+
def fix_allele(self,**kwargs):
|
|
419
|
+
self.data = fixallele(self.data,log=self.log,**kwargs)
|
|
420
|
+
def remove_dup(self,**kwargs):
|
|
421
|
+
self.data = removedup(self.data,log=self.log,**kwargs)
|
|
422
|
+
def check_sanity(self,**kwargs):
|
|
423
|
+
self.data = sanitycheckstats(self.data,log=self.log,**kwargs)
|
|
424
|
+
def check_data_consistency(self, **kwargs):
|
|
425
|
+
_check_data_consistency(self.data,log=self.log,**kwargs)
|
|
426
|
+
def check_id(self,**kwargs):
|
|
422
427
|
pass
|
|
423
|
-
def check_ref(self,ref_seq,**
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
428
|
+
def check_ref(self,ref_seq,ref_seq_mode="v",**kwargs):
|
|
429
|
+
if ref_seq_mode=="v":
|
|
430
|
+
self.meta["gwaslab"]["references"]["ref_seq"] = ref_seq
|
|
431
|
+
self.data = checkref(self.data,ref_seq,log=self.log,**kwargs)
|
|
432
|
+
else:
|
|
433
|
+
self.meta["gwaslab"]["references"]["ref_seq"] = ref_seq
|
|
434
|
+
self.data = oldcheckref(self.data,ref_seq,log=self.log,**kwargs)
|
|
435
|
+
def infer_strand(self,ref_infer,**kwargs):
|
|
427
436
|
self.meta["gwaslab"]["references"]["ref_infer"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer"] , ref_infer)
|
|
428
|
-
self.data = parallelinferstrand(self.data,ref_infer=ref_infer,log=self.log,**
|
|
429
|
-
def flip_allele_stats(self,**
|
|
430
|
-
self.data = flipallelestats(self.data,log=self.log,**
|
|
431
|
-
def normalize_allele(self,**
|
|
432
|
-
self.data = parallelnormalizeallele(self.data,log=self.log,**
|
|
437
|
+
self.data = parallelinferstrand(self.data,ref_infer=ref_infer,log=self.log,**kwargs)
|
|
438
|
+
def flip_allele_stats(self,**kwargs):
|
|
439
|
+
self.data = flipallelestats(self.data,log=self.log,**kwargs)
|
|
440
|
+
def normalize_allele(self,**kwargs):
|
|
441
|
+
self.data = parallelnormalizeallele(self.data,log=self.log,**kwargs)
|
|
433
442
|
def assign_rsid(self,
|
|
434
443
|
ref_rsid_tsv=None,
|
|
435
444
|
ref_rsid_vcf=None,
|
|
436
|
-
**
|
|
445
|
+
**kwargs):
|
|
437
446
|
if ref_rsid_tsv is not None:
|
|
438
|
-
self.data = parallelizeassignrsid(self.data,path=ref_rsid_tsv,ref_mode="tsv",log=self.log,**
|
|
447
|
+
self.data = parallelizeassignrsid(self.data,path=ref_rsid_tsv,ref_mode="tsv",log=self.log,**kwargs)
|
|
439
448
|
self.meta["gwaslab"]["references"]["ref_rsid_tsv"] = ref_rsid_tsv
|
|
440
449
|
if ref_rsid_vcf is not None:
|
|
441
|
-
self.data = parallelizeassignrsid(self.data,path=ref_rsid_vcf,ref_mode="vcf",log=self.log,**
|
|
450
|
+
self.data = parallelizeassignrsid(self.data,path=ref_rsid_vcf,ref_mode="vcf",log=self.log,**kwargs)
|
|
442
451
|
self.meta["gwaslab"]["references"]["ref_rsid_vcf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_rsid_vcf"] , ref_rsid_vcf)
|
|
443
|
-
def rsid_to_chrpos(self,**
|
|
444
|
-
self.data = rsidtochrpos(self.data,log=self.log,**
|
|
445
|
-
def rsid_to_chrpos2(self,**
|
|
446
|
-
self.data = parallelrsidtochrpos(self.data,log=self.log,**
|
|
452
|
+
def rsid_to_chrpos(self,**kwargs):
|
|
453
|
+
self.data = rsidtochrpos(self.data,log=self.log,**kwargs)
|
|
454
|
+
def rsid_to_chrpos2(self,**kwargs):
|
|
455
|
+
self.data = parallelrsidtochrpos(self.data,log=self.log,**kwargs)
|
|
447
456
|
|
|
448
457
|
############################################################################################################
|
|
449
458
|
|
|
450
459
|
def sort_coordinate(self,**sort_args):
|
|
451
460
|
self.data = sortcoordinate(self.data,log=self.log,**sort_args)
|
|
452
461
|
self.meta["is_sorted"] = True
|
|
453
|
-
def sort_column(self,**
|
|
454
|
-
self.data = sortcolumn(self.data,log=self.log,**
|
|
462
|
+
def sort_column(self,**kwargs):
|
|
463
|
+
self.data = sortcolumn(self.data,log=self.log,**kwargs)
|
|
455
464
|
|
|
456
465
|
############################################################################################################
|
|
457
|
-
def fill_data(self, verbose=True, **
|
|
458
|
-
self.data = filldata(self.data, verbose=verbose, **
|
|
466
|
+
def fill_data(self, verbose=True, **kwargs):
|
|
467
|
+
self.data = filldata(self.data, verbose=verbose, log=self.log, **kwargs)
|
|
459
468
|
self.data = sortcolumn(self.data, verbose=verbose, log=self.log)
|
|
460
469
|
|
|
461
470
|
# utilities ############################################################################################################
|
|
462
471
|
# filter series ######################################################################
|
|
463
|
-
def filter_flanking(self, inplace=False,**
|
|
472
|
+
def filter_flanking(self, inplace=False,**kwargs):
|
|
473
|
+
if inplace is False:
|
|
474
|
+
new_Sumstats_object = copy.deepcopy(self)
|
|
475
|
+
new_Sumstats_object.data = _get_flanking(new_Sumstats_object.data, **kwargs)
|
|
476
|
+
return new_Sumstats_object
|
|
477
|
+
else:
|
|
478
|
+
self.data = _get_flanking(self.data, **kwargs)
|
|
479
|
+
def filter_flanking_by_chrpos(self, chrpos, inplace=False,**kwargs):
|
|
464
480
|
if inplace is False:
|
|
465
481
|
new_Sumstats_object = copy.deepcopy(self)
|
|
466
|
-
new_Sumstats_object.data =
|
|
482
|
+
new_Sumstats_object.data = _get_flanking_by_chrpos(new_Sumstats_object.data, chrpos, **kwargs)
|
|
467
483
|
return new_Sumstats_object
|
|
468
484
|
else:
|
|
469
|
-
self.data =
|
|
470
|
-
def
|
|
485
|
+
self.data = _get_flanking_by_chrpos(self.data, chrpos,**kwargs)
|
|
486
|
+
def filter_flanking_by_id(self, snpid, inplace=False,**kwargs):
|
|
471
487
|
if inplace is False:
|
|
472
488
|
new_Sumstats_object = copy.deepcopy(self)
|
|
473
|
-
new_Sumstats_object.data =
|
|
489
|
+
new_Sumstats_object.data = _get_flanking_by_id(new_Sumstats_object.data, snpid, **kwargs)
|
|
474
490
|
return new_Sumstats_object
|
|
475
491
|
else:
|
|
476
|
-
self.data =
|
|
477
|
-
def
|
|
492
|
+
self.data = _get_flanking_by_id(self.data, snpid, **kwargs)
|
|
493
|
+
def filter_value(self, expr, inplace=False, **kwargs):
|
|
478
494
|
if inplace is False:
|
|
479
495
|
new_Sumstats_object = copy.deepcopy(self)
|
|
480
|
-
new_Sumstats_object.data =
|
|
496
|
+
new_Sumstats_object.data = filtervalues(new_Sumstats_object.data,expr,log=new_Sumstats_object.log, **kwargs)
|
|
481
497
|
return new_Sumstats_object
|
|
482
498
|
else:
|
|
483
|
-
self.data =
|
|
484
|
-
def
|
|
499
|
+
self.data = filtervalues(self.data, expr,log=self.log,**kwargs)
|
|
500
|
+
def filter_out(self, inplace=False, **kwargs):
|
|
485
501
|
if inplace is False:
|
|
486
502
|
new_Sumstats_object = copy.deepcopy(self)
|
|
487
|
-
new_Sumstats_object.data =
|
|
503
|
+
new_Sumstats_object.data = filterout(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
488
504
|
return new_Sumstats_object
|
|
489
505
|
else:
|
|
490
|
-
self.data =
|
|
491
|
-
def
|
|
506
|
+
self.data = filterout(self.data,log=self.log,**kwargs)
|
|
507
|
+
def filter_in(self, inplace=False, **kwargs):
|
|
492
508
|
if inplace is False:
|
|
493
509
|
new_Sumstats_object = copy.deepcopy(self)
|
|
494
|
-
new_Sumstats_object.data =
|
|
510
|
+
new_Sumstats_object.data = filterin(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
495
511
|
return new_Sumstats_object
|
|
496
512
|
else:
|
|
497
|
-
self.data =
|
|
498
|
-
def
|
|
513
|
+
self.data = filterin(self.data,log=self.log,**kwargs)
|
|
514
|
+
def filter_region_in(self, inplace=False, **kwargs):
|
|
499
515
|
if inplace is False:
|
|
500
516
|
new_Sumstats_object = copy.deepcopy(self)
|
|
501
|
-
new_Sumstats_object.data =
|
|
517
|
+
new_Sumstats_object.data = filterregionin(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
502
518
|
return new_Sumstats_object
|
|
503
519
|
else:
|
|
504
|
-
self.data =
|
|
505
|
-
def
|
|
520
|
+
self.data = filterregionin(self.data,log=self.log,**kwargs)
|
|
521
|
+
def filter_region_out(self, inplace=False, **kwargs):
|
|
506
522
|
if inplace is False:
|
|
507
523
|
new_Sumstats_object = copy.deepcopy(self)
|
|
508
|
-
new_Sumstats_object.data =
|
|
524
|
+
new_Sumstats_object.data = filterregionout(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
509
525
|
return new_Sumstats_object
|
|
510
526
|
else:
|
|
511
|
-
self.data =
|
|
512
|
-
def
|
|
527
|
+
self.data = filterregionout(self.data,log=self.log,**kwargs)
|
|
528
|
+
def filter_palindromic(self, inplace=False, **kwargs):
|
|
513
529
|
if inplace is False:
|
|
514
530
|
new_Sumstats_object = copy.deepcopy(self)
|
|
515
|
-
new_Sumstats_object.data =
|
|
531
|
+
new_Sumstats_object.data = _filter_palindromic(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
516
532
|
return new_Sumstats_object
|
|
517
533
|
else:
|
|
518
|
-
self.data =
|
|
519
|
-
def
|
|
534
|
+
self.data = _filter_palindromic(self.data,log=self.log,**kwargs)
|
|
535
|
+
def filter_snp(self, inplace=False, **kwargs):
|
|
520
536
|
if inplace is False:
|
|
521
537
|
new_Sumstats_object = copy.deepcopy(self)
|
|
522
|
-
new_Sumstats_object.data =
|
|
538
|
+
new_Sumstats_object.data = _filter_snp(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
523
539
|
return new_Sumstats_object
|
|
524
540
|
else:
|
|
525
|
-
self.data =
|
|
526
|
-
def
|
|
541
|
+
self.data = _filter_snp(self.data,log=self.log,**kwargs)
|
|
542
|
+
def filter_indel(self, inplace=False, **kwargs):
|
|
527
543
|
if inplace is False:
|
|
528
544
|
new_Sumstats_object = copy.deepcopy(self)
|
|
529
|
-
new_Sumstats_object.data =
|
|
545
|
+
new_Sumstats_object.data = _filter_indel(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
530
546
|
return new_Sumstats_object
|
|
531
547
|
else:
|
|
532
|
-
self.data =
|
|
533
|
-
|
|
548
|
+
self.data = _filter_indel(self.data,log=self.log,**kwargs)
|
|
549
|
+
|
|
550
|
+
def exclude_hla(self, inplace=False, **kwargs):
|
|
534
551
|
if inplace is False:
|
|
535
552
|
new_Sumstats_object = copy.deepcopy(self)
|
|
536
|
-
new_Sumstats_object.data =
|
|
553
|
+
new_Sumstats_object.data = _exclude_hla(new_Sumstats_object.data,log=new_Sumstats_object.log,**kwargs)
|
|
537
554
|
return new_Sumstats_object
|
|
538
555
|
else:
|
|
539
|
-
self.data =
|
|
556
|
+
self.data = _exclude_hla(self.data,log=self.log,**kwargs)
|
|
557
|
+
|
|
540
558
|
|
|
541
|
-
def random_variants(self,inplace=False,n=1,p=None,**
|
|
559
|
+
def random_variants(self,inplace=False,n=1,p=None,**kwargs):
|
|
542
560
|
if inplace is True:
|
|
543
|
-
self.data = sampling(self.data,n=n,p=p,log=self.log,**
|
|
561
|
+
self.data = sampling(self.data,n=n,p=p,log=self.log,**kwargs)
|
|
544
562
|
else:
|
|
545
563
|
new_Sumstats_object = copy.deepcopy(self)
|
|
546
|
-
new_Sumstats_object.data = sampling(new_Sumstats_object.data,n=n,p=p,log=new_Sumstats_object.log,**
|
|
564
|
+
new_Sumstats_object.data = sampling(new_Sumstats_object.data,n=n,p=p,log=new_Sumstats_object.log,**kwargs)
|
|
547
565
|
return new_Sumstats_object
|
|
548
566
|
|
|
549
|
-
def filter_hapmap3(self, inplace=False, build=None, **
|
|
567
|
+
def filter_hapmap3(self, inplace=False, build=None, **kwargs ):
|
|
550
568
|
if build is None:
|
|
551
569
|
build = self.meta["gwaslab"]["genome_build"]
|
|
552
570
|
if inplace is True:
|
|
553
|
-
self.data = gethapmap3(self.data, build=build,log=self.log, **
|
|
571
|
+
self.data = gethapmap3(self.data, build=build,log=self.log, **kwargs)
|
|
554
572
|
else:
|
|
555
573
|
new_Sumstats_object = copy.deepcopy(self)
|
|
556
|
-
new_Sumstats_object.data = gethapmap3(new_Sumstats_object.data, build=build,log=self.log, **
|
|
574
|
+
new_Sumstats_object.data = gethapmap3(new_Sumstats_object.data, build=build,log=self.log, **kwargs)
|
|
557
575
|
return new_Sumstats_object
|
|
558
576
|
######################################################################
|
|
559
577
|
|
|
560
|
-
def check_af(self,ref_infer,**
|
|
561
|
-
self.data = parallelecheckaf(self.data,ref_infer=ref_infer,log=self.log,**
|
|
578
|
+
def check_af(self,ref_infer,**kwargs):
|
|
579
|
+
self.data = parallelecheckaf(self.data,ref_infer=ref_infer,log=self.log,**kwargs)
|
|
562
580
|
self.meta["gwaslab"]["references"]["ref_infer_daf"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer_daf"] , ref_infer)
|
|
563
|
-
def infer_af(self,ref_infer,**
|
|
564
|
-
self.data = paralleleinferaf(self.data,ref_infer=ref_infer,log=self.log,**
|
|
581
|
+
def infer_af(self,ref_infer,**kwargs):
|
|
582
|
+
self.data = paralleleinferaf(self.data,ref_infer=ref_infer,log=self.log,**kwargs)
|
|
565
583
|
self.meta["gwaslab"]["references"]["ref_infer_af"] = ref_infer
|
|
566
584
|
self.meta["gwaslab"]["references"]["ref_infer_af"] = _append_meta_record(self.meta["gwaslab"]["references"]["ref_infer_af"] , ref_infer)
|
|
567
|
-
def plot_daf(self, **
|
|
568
|
-
fig,outliers = plotdaf(self.data, **
|
|
585
|
+
def plot_daf(self, **kwargs):
|
|
586
|
+
fig,outliers = plotdaf(self.data, **kwargs)
|
|
569
587
|
return fig, outliers
|
|
570
|
-
def plot_mqq(self, build=None, **
|
|
588
|
+
def plot_mqq(self, build=None, **kwargs):
|
|
571
589
|
|
|
572
590
|
chrom="CHR"
|
|
573
591
|
pos="POS"
|
|
@@ -594,17 +612,17 @@ class Sumstats():
|
|
|
594
612
|
p=p,
|
|
595
613
|
eaf=eaf,
|
|
596
614
|
build = build,
|
|
597
|
-
**
|
|
615
|
+
**kwargs)
|
|
598
616
|
|
|
599
617
|
return plot
|
|
600
618
|
|
|
601
|
-
def plot_trumpet(self, build=None, **
|
|
619
|
+
def plot_trumpet(self, build=None, **kwargs):
|
|
602
620
|
if build is None:
|
|
603
621
|
build = self.meta["gwaslab"]["genome_build"]
|
|
604
|
-
fig = plottrumpet(self.data,build = build, **
|
|
622
|
+
fig = plottrumpet(self.data,build = build, **kwargs)
|
|
605
623
|
return fig
|
|
606
624
|
|
|
607
|
-
def get_lead(self, build=None, gls=False, **
|
|
625
|
+
def get_lead(self, build=None, gls=False, **kwargs):
|
|
608
626
|
if "SNPID" in self.data.columns:
|
|
609
627
|
id_to_use = "SNPID"
|
|
610
628
|
else:
|
|
@@ -621,7 +639,7 @@ class Sumstats():
|
|
|
621
639
|
p="P",
|
|
622
640
|
log=self.log,
|
|
623
641
|
build=build,
|
|
624
|
-
**
|
|
642
|
+
**kwargs)
|
|
625
643
|
# return sumstats object
|
|
626
644
|
if gls == True:
|
|
627
645
|
new_Sumstats_object = copy.deepcopy(self)
|
|
@@ -630,7 +648,7 @@ class Sumstats():
|
|
|
630
648
|
return new_Sumstats_object
|
|
631
649
|
return output
|
|
632
650
|
|
|
633
|
-
def get_density(self, sig_list=None, windowsizekb=100,**
|
|
651
|
+
def get_density(self, sig_list=None, windowsizekb=100,**kwargs):
|
|
634
652
|
|
|
635
653
|
if "SNPID" in self.data.columns:
|
|
636
654
|
id_to_use = "SNPID"
|
|
@@ -655,7 +673,7 @@ class Sumstats():
|
|
|
655
673
|
log=self.log)
|
|
656
674
|
|
|
657
675
|
|
|
658
|
-
def get_novel(self, **
|
|
676
|
+
def get_novel(self, **kwargs):
|
|
659
677
|
if "SNPID" in self.data.columns:
|
|
660
678
|
id_to_use = "SNPID"
|
|
661
679
|
else:
|
|
@@ -666,11 +684,11 @@ class Sumstats():
|
|
|
666
684
|
pos="POS",
|
|
667
685
|
p="P",
|
|
668
686
|
log=self.log,
|
|
669
|
-
**
|
|
687
|
+
**kwargs)
|
|
670
688
|
# return sumstats object
|
|
671
689
|
return output
|
|
672
690
|
|
|
673
|
-
def check_cis(self, **
|
|
691
|
+
def check_cis(self, **kwargs):
|
|
674
692
|
if "SNPID" in self.data.columns:
|
|
675
693
|
id_to_use = "SNPID"
|
|
676
694
|
else:
|
|
@@ -681,11 +699,11 @@ class Sumstats():
|
|
|
681
699
|
pos="POS",
|
|
682
700
|
p="P",
|
|
683
701
|
log=self.log,
|
|
684
|
-
**
|
|
702
|
+
**kwargs)
|
|
685
703
|
# return sumstats object
|
|
686
704
|
return output
|
|
687
705
|
|
|
688
|
-
def check_novel_set(self, **
|
|
706
|
+
def check_novel_set(self, **kwargs):
|
|
689
707
|
if "SNPID" in self.data.columns:
|
|
690
708
|
id_to_use = "SNPID"
|
|
691
709
|
else:
|
|
@@ -696,11 +714,11 @@ class Sumstats():
|
|
|
696
714
|
pos="POS",
|
|
697
715
|
p="P",
|
|
698
716
|
log=self.log,
|
|
699
|
-
**
|
|
717
|
+
**kwargs)
|
|
700
718
|
# return sumstats object
|
|
701
719
|
return output
|
|
702
720
|
|
|
703
|
-
def anno_gene(self, **
|
|
721
|
+
def anno_gene(self, **kwargs):
|
|
704
722
|
if "SNPID" in self.data.columns:
|
|
705
723
|
id_to_use = "SNPID"
|
|
706
724
|
else:
|
|
@@ -710,73 +728,73 @@ class Sumstats():
|
|
|
710
728
|
chrom="CHR",
|
|
711
729
|
pos="POS",
|
|
712
730
|
log=self.log,
|
|
713
|
-
**
|
|
731
|
+
**kwargs)
|
|
714
732
|
return output
|
|
715
733
|
|
|
716
|
-
def get_per_snp_r2(self,**
|
|
717
|
-
self.data = _get_per_snp_r2(self.data, beta="BETA", af="EAF", n="N", log=self.log, **
|
|
734
|
+
def get_per_snp_r2(self,**kwargs):
|
|
735
|
+
self.data = _get_per_snp_r2(self.data, beta="BETA", af="EAF", n="N", log=self.log, **kwargs)
|
|
718
736
|
#add data inplace
|
|
719
737
|
|
|
720
|
-
def get_gc(self, mode=None, **
|
|
738
|
+
def get_gc(self, mode=None, **kwargs):
|
|
721
739
|
if mode is None:
|
|
722
740
|
if "P" in self.data.columns:
|
|
723
|
-
output = lambdaGC(self.data[["CHR","P"]],mode="P",**
|
|
741
|
+
output = lambdaGC(self.data[["CHR","P"]],mode="P",**kwargs)
|
|
724
742
|
elif "Z" in self.data.columns:
|
|
725
|
-
output = lambdaGC(self.data[["CHR","Z"]],mode="Z",**
|
|
743
|
+
output = lambdaGC(self.data[["CHR","Z"]],mode="Z",**kwargs)
|
|
726
744
|
elif "CHISQ" in self.data.columns:
|
|
727
|
-
output = lambdaGC(self.data[["CHR","CHISQ"]],mode="CHISQ",**
|
|
745
|
+
output = lambdaGC(self.data[["CHR","CHISQ"]],mode="CHISQ",**kwargs)
|
|
728
746
|
elif "MLOG10P" in self.data.columns:
|
|
729
|
-
output = lambdaGC(self.data[["CHR","MLOG10P"]],mode="MLOG10P",**
|
|
747
|
+
output = lambdaGC(self.data[["CHR","MLOG10P"]],mode="MLOG10P",**kwargs)
|
|
730
748
|
|
|
731
749
|
#return scalar
|
|
732
750
|
self.meta["Genomic inflation factor"] = output
|
|
733
751
|
return output
|
|
734
752
|
else:
|
|
735
|
-
output = lambdaGC(self.data[["CHR",mode]],mode=mode,**
|
|
753
|
+
output = lambdaGC(self.data[["CHR",mode]],mode=mode,**kwargs)
|
|
736
754
|
self.meta["Genomic inflation factor"] = output
|
|
737
755
|
return output
|
|
738
756
|
## LDSC ##############################################################################################
|
|
739
|
-
def estimate_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, **
|
|
757
|
+
def estimate_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
|
|
740
758
|
if build is None:
|
|
741
759
|
build = self.meta["gwaslab"]["genome_build"]
|
|
742
|
-
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True )
|
|
743
|
-
self.ldsc_h2 = _estimate_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **
|
|
760
|
+
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True, how="right" )
|
|
761
|
+
self.ldsc_h2 = _estimate_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
|
|
744
762
|
|
|
745
|
-
def estimate_rg_by_ldsc(self, build=None, verbose=True, match_allele=True, **
|
|
763
|
+
def estimate_rg_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
|
|
746
764
|
if build is None:
|
|
747
765
|
build = self.meta["gwaslab"]["genome_build"]
|
|
748
|
-
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True )
|
|
749
|
-
self.ldsc_rg = _estimate_rg_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **
|
|
766
|
+
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True, how="right" )
|
|
767
|
+
self.ldsc_rg = _estimate_rg_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
|
|
750
768
|
|
|
751
|
-
def estimate_h2_cts_by_ldsc(self, build=None, verbose=True, match_allele=True, **
|
|
769
|
+
def estimate_h2_cts_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
|
|
752
770
|
if build is None:
|
|
753
771
|
build = self.meta["gwaslab"]["genome_build"]
|
|
754
|
-
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True )
|
|
755
|
-
self.ldsc_h2_cts = _estimate_h2_cts_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **
|
|
772
|
+
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True, how="right" )
|
|
773
|
+
self.ldsc_h2_cts = _estimate_h2_cts_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
|
|
756
774
|
|
|
757
|
-
def estimate_partitioned_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, **
|
|
775
|
+
def estimate_partitioned_h2_by_ldsc(self, build=None, verbose=True, match_allele=True, **kwargs):
|
|
758
776
|
if build is None:
|
|
759
777
|
build = self.meta["gwaslab"]["genome_build"]
|
|
760
|
-
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True )
|
|
761
|
-
self.ldsc_partitioned_h2_summary, self.ldsc_partitioned_h2_results = _estimate_partitioned_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **
|
|
778
|
+
insumstats = gethapmap3(self.data.copy(), build=build, verbose=verbose , match_allele=True, how="right" )
|
|
779
|
+
self.ldsc_partitioned_h2_summary, self.ldsc_partitioned_h2_results = _estimate_partitioned_h2_by_ldsc(insumstats=insumstats, log=self.log, verbose=verbose, **kwargs)
|
|
762
780
|
# external ################################################################################################
|
|
763
781
|
|
|
764
|
-
def calculate_ld_matrix(self,**
|
|
765
|
-
self.to_finemapping_file_path, self.to_finemapping_file, self.plink_log = tofinemapping(self.data,study = self.meta["gwaslab"]["study_name"],**
|
|
782
|
+
def calculate_ld_matrix(self,**kwargs):
|
|
783
|
+
self.to_finemapping_file_path, self.to_finemapping_file, self.plink_log = tofinemapping(self.data,study = self.meta["gwaslab"]["study_name"],**kwargs)
|
|
766
784
|
|
|
767
|
-
def run_susie_rss(self,**
|
|
768
|
-
self.pipcs=_run_susie_rss(self.to_finemapping_file_path,**
|
|
785
|
+
def run_susie_rss(self,**kwargs):
|
|
786
|
+
self.pipcs=_run_susie_rss(self.to_finemapping_file_path,**kwargs)
|
|
769
787
|
|
|
770
|
-
def clump(self,**
|
|
771
|
-
self.clumps,self.plink_log = _clump(self.data, log=self.log, study = self.meta["gwaslab"]["study_name"], **
|
|
788
|
+
def clump(self,**kwargs):
|
|
789
|
+
self.clumps,self.plink_log = _clump(self.data, log=self.log, study = self.meta["gwaslab"]["study_name"], **kwargs)
|
|
772
790
|
|
|
773
|
-
def calculate_prs(self,**
|
|
774
|
-
combined_results_summary = _calculate_prs(self.data, log=self.log, study = self.meta["gwaslab"]["study_name"], **
|
|
791
|
+
def calculate_prs(self,**kwargs):
|
|
792
|
+
combined_results_summary = _calculate_prs(self.data, log=self.log, study = self.meta["gwaslab"]["study_name"], **kwargs)
|
|
775
793
|
return combined_results_summary
|
|
776
794
|
|
|
777
795
|
# to_format ###############################################################################################
|
|
778
796
|
|
|
779
|
-
def to_format(self, path, build=None, **
|
|
797
|
+
def to_format(self, path, build=None, verbose=True, **kwargs):
|
|
780
798
|
if build is None:
|
|
781
799
|
build = self.meta["gwaslab"]["genome_build"]
|
|
782
|
-
_to_format(self.data, path, log=self.log, meta=self.meta, build=build, **
|
|
800
|
+
_to_format(self.data, path, log=self.log, verbose=verbose, meta=self.meta, build=build, **kwargs)
|