gwaslab 3.4.48__py3-none-any.whl → 3.4.49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/bd_common_data.py +3 -1
- gwaslab/data/reference.json +10 -2
- gwaslab/g_Sumstats.py +10 -0
- gwaslab/g_vchange_status.py +1 -1
- gwaslab/g_version.py +2 -2
- gwaslab/hm_harmonize_sumstats.py +23 -7
- gwaslab/io_preformat_input.py +73 -8
- gwaslab/io_to_formats.py +5 -5
- gwaslab/qc_fix_sumstats.py +106 -7
- gwaslab/util_in_fill_data.py +20 -2
- gwaslab/viz_plot_miamiplot2.py +5 -1
- gwaslab/viz_plot_stackedregional.py +11 -4
- {gwaslab-3.4.48.dist-info → gwaslab-3.4.49.dist-info}/METADATA +1 -1
- {gwaslab-3.4.48.dist-info → gwaslab-3.4.49.dist-info}/RECORD +18 -18
- {gwaslab-3.4.48.dist-info → gwaslab-3.4.49.dist-info}/WHEEL +1 -1
- {gwaslab-3.4.48.dist-info → gwaslab-3.4.49.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.48.dist-info → gwaslab-3.4.49.dist-info}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.4.48.dist-info → gwaslab-3.4.49.dist-info}/top_level.txt +0 -0
gwaslab/bd_common_data.py
CHANGED
|
@@ -274,7 +274,9 @@ def get_gtf(chrom, build="19",source="ensembl"):
|
|
|
274
274
|
gtf = pd.DataFrame(columns=["seqname","start","end","strand","feature","gene_biotype","gene_id","gene_name"])
|
|
275
275
|
return gtf
|
|
276
276
|
|
|
277
|
-
|
|
277
|
+
def get_chain(from_build="19", to_build="38"):
|
|
278
|
+
chain_path = check_and_download("{}to{}".format(from_build, to_build))
|
|
279
|
+
return chain_path
|
|
278
280
|
####################################################################################################################
|
|
279
281
|
def gtf_to_protein_coding(gtfpath,log=Log(),verbose=True):
|
|
280
282
|
protein_coding_path = gtfpath[:-6]+"protein_coding.gtf.gz"
|
gwaslab/data/reference.json
CHANGED
|
@@ -90,12 +90,20 @@
|
|
|
90
90
|
"1kg_dbsnp151_hg38_x_md5":"48c05eeb1454c0dd4cbee3cb26382e8e",
|
|
91
91
|
"recombination_hg19":"https://www.dropbox.com/s/wbesl8haxknonuc/recombination_hg19.tar.gz?dl=1",
|
|
92
92
|
"recombination_hg38":"https://www.dropbox.com/s/vuo8mvqx0fpibzj/recombination_hg38.tar.gz?dl=1",
|
|
93
|
-
"ensembl_hg19_gtf":"https://ftp.ensembl.org/pub/grch37/
|
|
93
|
+
"ensembl_hg19_gtf":"https://ftp.ensembl.org/pub/grch37/release-87/gtf/homo_sapiens/Homo_sapiens.GRCh37.87.chr.gtf.gz",
|
|
94
94
|
"ensembl_hg38_gtf":"https://ftp.ensembl.org/pub/release-109/gtf/homo_sapiens//Homo_sapiens.GRCh38.109.chr.gtf.gz",
|
|
95
95
|
"refseq_hg19_gtf":"https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/GRCh37_latest/refseq_identifiers/GRCh37_latest_genomic.gtf.gz",
|
|
96
96
|
"refseq_hg38_gtf":"https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz",
|
|
97
97
|
"testlink":"https://www.dropbox.com/s/8u7capwge0ihshu/EAS.chr22.split_norm_af.1kgp3v5.vcf.gz?dl=1",
|
|
98
|
-
"testlink_tbi":"https://www.dropbox.com/s/hdneg53t6u1j6ib/EAS.chr22.split_norm_af.1kgp3v5.vcf.gz.tbi?dl=1"
|
|
98
|
+
"testlink_tbi":"https://www.dropbox.com/s/hdneg53t6u1j6ib/EAS.chr22.split_norm_af.1kgp3v5.vcf.gz.tbi?dl=1",
|
|
99
|
+
"19to38":"https://hgdownload.soe.ucsc.edu/goldenPath/hg19/liftOver/hg19ToHg38.over.chain.gz",
|
|
100
|
+
"19to13":"https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/hg19-chm13v2.chain",
|
|
101
|
+
"38to19":"https://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/hg38ToHg19.over.chain.gz",
|
|
102
|
+
"38to13":"https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/grch38-chm13v2.chain",
|
|
103
|
+
"13to19":"https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/chm13v2-hg19.chain",
|
|
104
|
+
"13to38":"https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/chain/v1_nflo/chm13v2-grch38.chain",
|
|
105
|
+
"18to19":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz",
|
|
106
|
+
"18to38":"https://hgdownload.soe.ucsc.edu/goldenPath/hg18/liftOver/hg18ToHg38.over.chain.gz"
|
|
99
107
|
}
|
|
100
108
|
|
|
101
109
|
|
gwaslab/g_Sumstats.py
CHANGED
|
@@ -8,6 +8,8 @@ from gwaslab.io_preformat_input import preformat
|
|
|
8
8
|
from gwaslab.io_to_formats import _to_format
|
|
9
9
|
from gwaslab.g_Log import Log
|
|
10
10
|
from gwaslab.qc_fix_sumstats import fixID
|
|
11
|
+
from gwaslab.qc_fix_sumstats import flipSNPID
|
|
12
|
+
from gwaslab.qc_fix_sumstats import stripSNPID
|
|
11
13
|
from gwaslab.qc_fix_sumstats import removedup
|
|
12
14
|
from gwaslab.qc_fix_sumstats import fixchr
|
|
13
15
|
from gwaslab.qc_fix_sumstats import fixpos
|
|
@@ -123,6 +125,8 @@ class Sumstats():
|
|
|
123
125
|
snpr2=None,
|
|
124
126
|
status=None,
|
|
125
127
|
other=[],
|
|
128
|
+
chrom_pat=None,
|
|
129
|
+
snpid_pat=None,
|
|
126
130
|
usekeys=None,
|
|
127
131
|
direction=None,
|
|
128
132
|
verbose=True,
|
|
@@ -205,6 +209,8 @@ class Sumstats():
|
|
|
205
209
|
status=status,
|
|
206
210
|
other=other,
|
|
207
211
|
usekeys=usekeys,
|
|
212
|
+
chrom_pat=chrom_pat,
|
|
213
|
+
snpid_pat=snpid_pat,
|
|
208
214
|
verbose=verbose,
|
|
209
215
|
readargs=readargs,
|
|
210
216
|
log=self.log)
|
|
@@ -418,6 +424,10 @@ class Sumstats():
|
|
|
418
424
|
#customizable API to build your own QC pipeline
|
|
419
425
|
def fix_id(self,**kwargs):
|
|
420
426
|
self.data = fixID(self.data,log=self.log,**kwargs)
|
|
427
|
+
def flip_snpid(self,**kwargs):
|
|
428
|
+
self.data = flipSNPID(self.data,log=self.log,**kwargs)
|
|
429
|
+
def strip_snpid(self,**kwargs):
|
|
430
|
+
self.data = stripSNPID(self.data,log=self.log,**kwargs)
|
|
421
431
|
def fix_chr(self,**kwargs):
|
|
422
432
|
self.data = fixchr(self.data,log=self.log,**kwargs)
|
|
423
433
|
def fix_pos(self,**kwargs):
|
gwaslab/g_vchange_status.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
|
|
3
|
-
CATEGORIES = {str(j+i) for j in [1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
|
|
3
|
+
CATEGORIES = {str(j+i) for j in [1300000,1800000,1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
|
|
4
4
|
|
|
5
5
|
def vchange_status(status,digit,before,after):
|
|
6
6
|
dic={}
|
gwaslab/g_version.py
CHANGED
gwaslab/hm_harmonize_sumstats.py
CHANGED
|
@@ -21,6 +21,7 @@ from gwaslab.qc_check_datatype import check_dataframe_shape
|
|
|
21
21
|
from gwaslab.bd_common_data import get_number_to_chr
|
|
22
22
|
from gwaslab.bd_common_data import get_chr_list
|
|
23
23
|
from gwaslab.bd_common_data import get_chr_to_number
|
|
24
|
+
from gwaslab.bd_common_data import get_number_to_NC
|
|
24
25
|
from gwaslab.bd_common_data import _maketrans
|
|
25
26
|
from gwaslab.g_vchange_status import vchange_status
|
|
26
27
|
from gwaslab.g_version import _get_version
|
|
@@ -355,7 +356,7 @@ def oldcheckref(sumstats,ref_seq,chrom="CHR",pos="POS",ea="EA",nea="NEA",status=
|
|
|
355
356
|
|
|
356
357
|
log.write("\n",end="",show_time=False,verbose=verbose)
|
|
357
358
|
|
|
358
|
-
CATEGORIES = {str(j+i) for j in [1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
|
|
359
|
+
CATEGORIES = {str(j+i) for j in [1300000,1800000,1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
|
|
359
360
|
sumstats[status] = pd.Categorical(sumstats[status],categories=CATEGORIES)
|
|
360
361
|
#sumstats[status] = sumstats[status].astype("string")
|
|
361
362
|
|
|
@@ -674,7 +675,7 @@ def checkref(sumstats,ref_seq,chrom="CHR",pos="POS",ea="EA",nea="NEA",status="ST
|
|
|
674
675
|
sumstats.loc[to_check_ref,status] = check_status(sumstats_to_check, all_records_dict, log=log, verbose=verbose)
|
|
675
676
|
log.write(" -Finished checking records", verbose=verbose)
|
|
676
677
|
|
|
677
|
-
CATEGORIES = {str(j+i) for j in [1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
|
|
678
|
+
CATEGORIES = {str(j+i) for j in [1300000,1800000,1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
|
|
678
679
|
sumstats[status] = pd.Categorical(sumstats[status],categories=CATEGORIES)
|
|
679
680
|
#sumstats[status] = sumstats[status].astype("string")
|
|
680
681
|
|
|
@@ -1496,17 +1497,21 @@ def infer_af(chr,start,end,ref,alt,vcf_reader,alt_freq,chr_dict=None):
|
|
|
1496
1497
|
def auto_check_vcf_chr_dict(vcf_path, vcf_chr_dict, verbose, log):
|
|
1497
1498
|
if vcf_path is not None:
|
|
1498
1499
|
if vcf_chr_dict is None:
|
|
1499
|
-
log.write(" -Checking
|
|
1500
|
-
|
|
1500
|
+
log.write(" -Checking chromosome notations in VCF/BCF files..." ,verbose=verbose)
|
|
1501
|
+
vcf_chr_dict = check_vcf_chr_NC(vcf_path, log, verbose)
|
|
1502
|
+
if vcf_chr_dict is not None:
|
|
1503
|
+
return vcf_chr_dict
|
|
1504
|
+
log.write(" -Checking prefix for chromosomes in VCF/BCF files..." ,verbose=verbose)
|
|
1505
|
+
prefix = check_vcf_chr_prefix(vcf_path, log,verbose)
|
|
1501
1506
|
if prefix is not None:
|
|
1502
1507
|
log.write(" -Prefix for chromosomes: ",prefix)
|
|
1503
1508
|
vcf_chr_dict = get_number_to_chr(prefix=prefix)
|
|
1504
1509
|
else:
|
|
1505
|
-
log.write(" -No prefix for chromosomes in the VCF files." ,verbose=verbose)
|
|
1510
|
+
log.write(" -No prefix for chromosomes in the VCF/BCF files." ,verbose=verbose)
|
|
1506
1511
|
vcf_chr_dict = get_number_to_chr()
|
|
1507
1512
|
return vcf_chr_dict
|
|
1508
1513
|
|
|
1509
|
-
def check_vcf_chr_prefix(vcf_bcf_path):
|
|
1514
|
+
def check_vcf_chr_prefix(vcf_bcf_path,log,verbose):
|
|
1510
1515
|
vcf_bcf = VariantFile(vcf_bcf_path)
|
|
1511
1516
|
for i in list(vcf_bcf.header.contigs):
|
|
1512
1517
|
m = re.search('(chr|Chr|CHR)([0-9xXyYmM]+)', i)
|
|
@@ -1514,5 +1519,16 @@ def check_vcf_chr_prefix(vcf_bcf_path):
|
|
|
1514
1519
|
return m.group(1)
|
|
1515
1520
|
else:
|
|
1516
1521
|
return None
|
|
1517
|
-
|
|
1522
|
+
|
|
1523
|
+
def check_vcf_chr_NC(vcf_bcf_path,log,verbose):
|
|
1524
|
+
vcf_bcf = VariantFile(vcf_bcf_path)
|
|
1525
|
+
for i in list(vcf_bcf.header.contigs):
|
|
1526
|
+
if i in get_number_to_NC(build="19").values():
|
|
1527
|
+
log.write(" -RefSeq ID detected (hg19) in VCF/BCF...",verbose=verbose)
|
|
1528
|
+
return get_number_to_NC(build="19")
|
|
1529
|
+
elif i in get_number_to_NC(build="38").values():
|
|
1530
|
+
log.write(" -RefSeq ID detected (hg38) in VCF/BCF...",verbose=verbose)
|
|
1531
|
+
return get_number_to_NC(build="38")
|
|
1532
|
+
else:
|
|
1533
|
+
return None
|
|
1518
1534
|
|
gwaslab/io_preformat_input.py
CHANGED
|
@@ -56,6 +56,8 @@ def preformat(sumstats,
|
|
|
56
56
|
build=None,
|
|
57
57
|
other=[],
|
|
58
58
|
usekeys=None,
|
|
59
|
+
chrom_pat=None,
|
|
60
|
+
snpid_pat=None,
|
|
59
61
|
verbose=False,
|
|
60
62
|
readargs=None,
|
|
61
63
|
log=None):
|
|
@@ -84,7 +86,10 @@ def preformat(sumstats,
|
|
|
84
86
|
if "format_separator" in meta_data.keys():
|
|
85
87
|
if "sep" not in readargs.keys():
|
|
86
88
|
readargs["sep"] = meta_data["format_separator"]
|
|
87
|
-
|
|
89
|
+
else:
|
|
90
|
+
if readargs["sep"] != meta_data["format_separator"]:
|
|
91
|
+
log.write(' - format_separator will be changed to: "{}"'.format(readargs["sep"]),verbose=verbose)
|
|
92
|
+
|
|
88
93
|
if "format_na" in meta_data.keys():
|
|
89
94
|
readargs["na_values"] = meta_data["format_na"]
|
|
90
95
|
|
|
@@ -92,7 +97,7 @@ def preformat(sumstats,
|
|
|
92
97
|
readargs["comment"] = meta_data["format_comment"]
|
|
93
98
|
|
|
94
99
|
if "sep" not in readargs.keys():
|
|
95
|
-
|
|
100
|
+
readargs["sep"] = "\t"
|
|
96
101
|
|
|
97
102
|
#########################################################################################################################################################
|
|
98
103
|
|
|
@@ -323,10 +328,30 @@ def preformat(sumstats,
|
|
|
323
328
|
skip_rows = get_skip_rows(inpath)
|
|
324
329
|
readargs["skiprows"] = skip_rows
|
|
325
330
|
log.write("Start to initialize gl.Sumstats from file :" + inpath,verbose=verbose)
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
331
|
+
if chrom_pat is not None:
|
|
332
|
+
sumstats = _load_single_chr(inpath,
|
|
333
|
+
usecols,
|
|
334
|
+
dtype_dictionary,
|
|
335
|
+
readargs=readargs,
|
|
336
|
+
rename_dictionary=rename_dictionary,
|
|
337
|
+
chrom_pat=chrom_pat,
|
|
338
|
+
log=log,
|
|
339
|
+
verbose=verbose)
|
|
340
|
+
elif snpid_pat is not None:
|
|
341
|
+
|
|
342
|
+
sumstats = _load_variants_with_pattern(inpath,
|
|
343
|
+
usecols,
|
|
344
|
+
dtype_dictionary,
|
|
345
|
+
readargs=readargs,
|
|
346
|
+
rename_dictionary=rename_dictionary,
|
|
347
|
+
snpid_pat=snpid_pat,
|
|
348
|
+
log=log,
|
|
349
|
+
verbose=verbose)
|
|
350
|
+
else:
|
|
351
|
+
sumstats = pd.read_table(inpath,
|
|
352
|
+
usecols=set(usecols),
|
|
353
|
+
dtype=dtype_dictionary,
|
|
354
|
+
**readargs)
|
|
330
355
|
|
|
331
356
|
elif type(sumstats) is pd.DataFrame:
|
|
332
357
|
## loading data from dataframe
|
|
@@ -520,9 +545,49 @@ def process_status(sumstats,build,log,verbose):
|
|
|
520
545
|
#sumstats["STATUS"] = int(build)*(10**5) +99999
|
|
521
546
|
build = _process_build(build,log,verbose)
|
|
522
547
|
sumstats["STATUS"] = build +"99999"
|
|
523
|
-
categories = {str(j+i) for j in [1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
|
|
548
|
+
categories = {str(j+i) for j in [1300000,1800000,1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
|
|
524
549
|
sumstats["STATUS"] = pd.Categorical(sumstats["STATUS"],categories=categories)
|
|
525
550
|
return sumstats
|
|
526
551
|
|
|
527
552
|
|
|
528
|
-
|
|
553
|
+
def _load_single_chr(inpath,usecols,dtype_dictionary,readargs,rename_dictionary,chrom_pat,log,verbose):
|
|
554
|
+
|
|
555
|
+
sumstats_iter = pd.read_table(inpath,
|
|
556
|
+
usecols=set(usecols),
|
|
557
|
+
dtype=dtype_dictionary,
|
|
558
|
+
iterator=True,
|
|
559
|
+
chunksize=500000,
|
|
560
|
+
**readargs)
|
|
561
|
+
# get chr
|
|
562
|
+
for k,v in rename_dictionary.items():
|
|
563
|
+
if v=="CHR":
|
|
564
|
+
if k in usecols:
|
|
565
|
+
log.write(" -Columns used to filter variants: {}".format(k),verbose=verbose)
|
|
566
|
+
chunk_chrom = k
|
|
567
|
+
break
|
|
568
|
+
|
|
569
|
+
log.write(" -Loading only variants on chromosome with pattern : {} ...".format(chrom_pat),verbose=verbose)
|
|
570
|
+
sumstats_filtered = pd.concat([chunk[chunk[chunk_chrom].str.match(chrom_pat, case=False,na=False) ] for chunk in sumstats_iter])
|
|
571
|
+
log.write(" -Loaded {} variants on chromosome with pattern :{} ...".format(len(sumstats_filtered), chrom_pat),verbose=verbose)
|
|
572
|
+
return sumstats_filtered
|
|
573
|
+
|
|
574
|
+
def _load_variants_with_pattern(inpath,usecols,dtype_dictionary,readargs,rename_dictionary,snpid_pat,log,verbose):
|
|
575
|
+
|
|
576
|
+
sumstats_iter = pd.read_table(inpath,
|
|
577
|
+
usecols=set(usecols),
|
|
578
|
+
dtype=dtype_dictionary,
|
|
579
|
+
iterator=True,
|
|
580
|
+
chunksize=500000,
|
|
581
|
+
**readargs)
|
|
582
|
+
# get chr
|
|
583
|
+
for k,v in rename_dictionary.items():
|
|
584
|
+
if v=="SNPID":
|
|
585
|
+
if k in usecols:
|
|
586
|
+
log.write(" -Columns used to filter variants: {}".format(k),verbose=verbose)
|
|
587
|
+
chunk_snpid = k
|
|
588
|
+
break
|
|
589
|
+
|
|
590
|
+
log.write(" -Loading only variants with pattern : {} ...".format(snpid_pat),verbose=verbose)
|
|
591
|
+
sumstats_filtered = pd.concat([chunk[chunk[chunk_snpid].str.match(snpid_pat, case=False,na=False) ] for chunk in sumstats_iter])
|
|
592
|
+
log.write(" -Loaded {} variants with pattern : {} ...".format(len(sumstats_filtered), snpid_pat),verbose=verbose)
|
|
593
|
+
return sumstats_filtered
|
gwaslab/io_to_formats.py
CHANGED
|
@@ -342,7 +342,7 @@ def tofmt(sumstats,
|
|
|
342
342
|
meta_data,rename_dictionary = get_format_dict(fmt,inverse=True)
|
|
343
343
|
print_format_info(fmt=fmt, meta_data=meta_data,rename_dictionary=rename_dictionary,verbose=verbose, log=log, output=True)
|
|
344
344
|
|
|
345
|
-
|
|
345
|
+
yaml_path = path + "."+suffix+".tsv-meta.yaml"
|
|
346
346
|
path = path + "."+suffix+".tsv.gz"
|
|
347
347
|
log.write(" -Output path:",path, verbose=verbose)
|
|
348
348
|
|
|
@@ -361,7 +361,7 @@ def tofmt(sumstats,
|
|
|
361
361
|
md5_value = calculate_md5sum_file(path)
|
|
362
362
|
|
|
363
363
|
## update ssf-style meta data and export to yaml file
|
|
364
|
-
_configure_ssf_meta(sumstats, fmt, ssfmeta, meta, meta_data, path, md5_value,
|
|
364
|
+
_configure_ssf_meta(sumstats, fmt, ssfmeta, meta, meta_data, path, md5_value, yaml_path, log, verbose)
|
|
365
365
|
|
|
366
366
|
return sumstats
|
|
367
367
|
|
|
@@ -476,7 +476,7 @@ def _configure_output_cols_and_args(sumstats, rename_dictionary, cols, no_status
|
|
|
476
476
|
return sumstats, to_csvargs
|
|
477
477
|
|
|
478
478
|
|
|
479
|
-
def _configure_ssf_meta(sumstats, fmt, ssfmeta, meta, meta_data, path, md5_value,
|
|
479
|
+
def _configure_ssf_meta(sumstats, fmt, ssfmeta, meta, meta_data, path, md5_value, yaml_path, log, verbose):
|
|
480
480
|
### calculate meta data
|
|
481
481
|
if "EAF" in sumstats.columns:
|
|
482
482
|
min_maf = sumstats["EAF"].min()
|
|
@@ -506,8 +506,8 @@ def _configure_ssf_meta(sumstats, fmt, ssfmeta, meta, meta_data, path, md5_value
|
|
|
506
506
|
sumstats_meta_copy["gwaslab"]["samples"]["sample_size_min"] = n_min
|
|
507
507
|
sumstats_meta_copy["gwaslab"]["samples"]["sample_size_median"] = n_median
|
|
508
508
|
sumstats_meta_copy["gwaslab"]["variants"]["variant_number"] = len(sumstats)
|
|
509
|
-
log.write(" -Exporting SSF-style meta data to {}".format(
|
|
510
|
-
with open(
|
|
509
|
+
log.write(" -Exporting SSF-style meta data to {}".format(yaml_path),verbose=verbose)
|
|
510
|
+
with open(yaml_path, 'w') as outfile:
|
|
511
511
|
yaml.dump(sumstats_meta_copy, outfile)
|
|
512
512
|
|
|
513
513
|
|
gwaslab/qc_fix_sumstats.py
CHANGED
|
@@ -5,6 +5,7 @@ import numpy as np
|
|
|
5
5
|
from itertools import repeat
|
|
6
6
|
from multiprocessing import Pool
|
|
7
7
|
from liftover import get_lifter
|
|
8
|
+
from liftover import ChainFile
|
|
8
9
|
from functools import partial
|
|
9
10
|
from gwaslab.g_vchange_status import vchange_status
|
|
10
11
|
from gwaslab.g_vchange_status import status_match
|
|
@@ -19,6 +20,7 @@ from gwaslab.g_version import _get_version
|
|
|
19
20
|
from gwaslab.util_in_fill_data import _convert_betase_to_mlog10p
|
|
20
21
|
from gwaslab.util_in_fill_data import _convert_betase_to_p
|
|
21
22
|
from gwaslab.util_in_fill_data import _convert_mlog10p_to_p
|
|
23
|
+
from gwaslab.bd_common_data import get_chain
|
|
22
24
|
#process build
|
|
23
25
|
#setbuild
|
|
24
26
|
#fixID
|
|
@@ -43,9 +45,15 @@ def _process_build(build,log,verbose):
|
|
|
43
45
|
if str(build).lower() in ["hg19","19","37","b37","grch37"]:
|
|
44
46
|
log.write(" -Genomic coordinates are based on GRCh37/hg19...", verbose=verbose)
|
|
45
47
|
final_build = "19"
|
|
48
|
+
elif str(build).lower() in ["hg18","18","36","b36","grch36"]:
|
|
49
|
+
log.write(" -Genomic coordinates are based on GRCh36/hg18...", verbose=verbose)
|
|
50
|
+
final_build = "18"
|
|
46
51
|
elif str(build).lower() in ["hg38","38","b38","grch38"]:
|
|
47
52
|
log.write(" -Genomic coordinates are based on GRCh38/hg38...", verbose=verbose)
|
|
48
53
|
final_build = "38"
|
|
54
|
+
elif str(build).lower() in ["t2t","hs1","chm13","13"]:
|
|
55
|
+
log.write(" -Genomic coordinates are based on T2T-CHM13...", verbose=verbose)
|
|
56
|
+
final_build = "13"
|
|
49
57
|
else:
|
|
50
58
|
log.warning("Version of genomic coordinates is unknown...", verbose=verbose)
|
|
51
59
|
final_build = "99"
|
|
@@ -358,6 +366,76 @@ def fixID(sumstats,
|
|
|
358
366
|
|
|
359
367
|
""
|
|
360
368
|
|
|
369
|
+
def stripSNPID(sumstats,snpid="SNPID",overwrite=False,verbose=True,log=Log()):
|
|
370
|
+
'''
|
|
371
|
+
flip EA and NEA SNPid CHR:POS:EA:NEA -> CHR:POS:NEA:EA
|
|
372
|
+
'''
|
|
373
|
+
##start function with col checking##########################################################
|
|
374
|
+
_start_line = "strip SNPID"
|
|
375
|
+
_end_line = "stripping SNPID"
|
|
376
|
+
_start_cols =["SNPID"]
|
|
377
|
+
_start_function = ".strip_snpid()"
|
|
378
|
+
_must_args ={}
|
|
379
|
+
|
|
380
|
+
is_enough_info = start_to(sumstats=sumstats,
|
|
381
|
+
log=log,
|
|
382
|
+
verbose=verbose,
|
|
383
|
+
start_line=_start_line,
|
|
384
|
+
end_line=_end_line,
|
|
385
|
+
start_cols=_start_cols,
|
|
386
|
+
start_function=_start_function,
|
|
387
|
+
**_must_args)
|
|
388
|
+
if is_enough_info == False: return sumstats
|
|
389
|
+
log.write(" -Checking if SNPID is (xxx:)CHR:POS:ATCG_Allele:ATCG_Allele(:xxx)...(separator: - ,: , _)",verbose=verbose)
|
|
390
|
+
is_chrposrefalt = sumstats[snpid].str.contains(r'[:_-]?\w+[:_-]\d+[:_-][ATCG]+[:_-][ATCG]+[:_-]?', case=False, flags=0, na=False)
|
|
391
|
+
# check if SNPID is NA
|
|
392
|
+
is_snpid_na = sumstats[snpid].isna()
|
|
393
|
+
|
|
394
|
+
log.write(" -Stripping {} non-NA fixable SNPIDs...".format(sum(is_chrposrefalt)),verbose=verbose)
|
|
395
|
+
|
|
396
|
+
# flip
|
|
397
|
+
sumstats.loc[is_chrposrefalt,snpid] = \
|
|
398
|
+
sumstats.loc[is_chrposrefalt,snpid].str.extract(r'[:_-]?(chr)?(\w+[:_-]\d+[:_-][ATCG]+[:_-][ATCG]+)[:_-]?',flags=re.IGNORECASE|re.ASCII)[1].astype("string")
|
|
399
|
+
|
|
400
|
+
finished(log,verbose,_end_line)
|
|
401
|
+
return sumstats
|
|
402
|
+
|
|
403
|
+
def flipSNPID(sumstats,snpid="SNPID",overwrite=False,verbose=True,log=Log()):
|
|
404
|
+
'''
|
|
405
|
+
flip EA and NEA SNPid CHR:POS:EA:NEA -> CHR:POS:NEA:EA
|
|
406
|
+
'''
|
|
407
|
+
##start function with col checking##########################################################
|
|
408
|
+
_start_line = "flip SNPID"
|
|
409
|
+
_end_line = "flipping SNPID"
|
|
410
|
+
_start_cols =["SNPID"]
|
|
411
|
+
_start_function = ".flip_snpid()"
|
|
412
|
+
_must_args ={}
|
|
413
|
+
|
|
414
|
+
is_enough_info = start_to(sumstats=sumstats,
|
|
415
|
+
log=log,
|
|
416
|
+
verbose=verbose,
|
|
417
|
+
start_line=_start_line,
|
|
418
|
+
end_line=_end_line,
|
|
419
|
+
start_cols=_start_cols,
|
|
420
|
+
start_function=_start_function,
|
|
421
|
+
**_must_args)
|
|
422
|
+
if is_enough_info == False: return sumstats
|
|
423
|
+
log.warning("This function only flips alleles in SNPID without changing EA, NEA, STATUS or any statistics.")
|
|
424
|
+
log.write(" -Checking if SNPID is CHR:POS:ATCG_Allele:ATCG_Allele...(separator: - ,: , _)",verbose=verbose)
|
|
425
|
+
is_chrposrefalt = sumstats[snpid].str.match(r'^\w+[:_-]\d+[:_-][ATCG]+[:_-][ATCG]+$', case=False, flags=0, na=False)
|
|
426
|
+
# check if SNPID is NA
|
|
427
|
+
is_snpid_na = sumstats[snpid].isna()
|
|
428
|
+
|
|
429
|
+
log.write(" -Flipping {} non-NA fixable SNPIDs...".format(sum(is_chrposrefalt)),verbose=verbose)
|
|
430
|
+
|
|
431
|
+
# flip
|
|
432
|
+
sumstats.loc[is_chrposrefalt,snpid] = \
|
|
433
|
+
sumstats.loc[is_chrposrefalt,snpid].str.extract(r'^(chr)?(\w+[:_-]\d+)[:_-]([ATCG]+)[:_-]([ATCG]+)$',flags=re.IGNORECASE|re.ASCII)[1].astype("string") \
|
|
434
|
+
+ ":"+sumstats.loc[is_chrposrefalt,snpid].str.extract(r'^(chr)?(\w+)[:_-](\d+)[:_-]([ATCG]+)[:_-]([ATCG]+)$',flags=re.IGNORECASE|re.ASCII)[4].astype("string") \
|
|
435
|
+
+ ":"+sumstats.loc[is_chrposrefalt,snpid].str.extract(r'^(chr)?(\w+)[:_-](\d+)[:_-]([ATCG]+)[:_-]([ATCG]+)$',flags=re.IGNORECASE|re.ASCII)[3].astype("string")
|
|
436
|
+
|
|
437
|
+
finished(log,verbose,_end_line)
|
|
438
|
+
return sumstats
|
|
361
439
|
|
|
362
440
|
###############################################################################################################
|
|
363
441
|
# 20230128
|
|
@@ -1041,7 +1119,7 @@ def check_range(sumstats, var_range, header, coltocheck, cols_to_check, log, ver
|
|
|
1041
1119
|
cols_to_check.append(header)
|
|
1042
1120
|
if header=="STATUS":
|
|
1043
1121
|
log.write(" -Checking STATUS and converting STATUS to categories....", verbose=verbose)
|
|
1044
|
-
categories = {str(j+i) for j in [1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
|
|
1122
|
+
categories = {str(j+i) for j in [1300000,1800000,1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
|
|
1045
1123
|
sumstats[header] = pd.Categorical(sumstats[header],categories=categories)
|
|
1046
1124
|
return sumstats
|
|
1047
1125
|
|
|
@@ -1496,11 +1574,19 @@ def liftover_variant(sumstats,
|
|
|
1496
1574
|
pos="POS",
|
|
1497
1575
|
status="STATUS",
|
|
1498
1576
|
from_build="19",
|
|
1499
|
-
to_build="38"
|
|
1577
|
+
to_build="38",
|
|
1578
|
+
chain=None):
|
|
1579
|
+
|
|
1500
1580
|
try:
|
|
1501
|
-
|
|
1581
|
+
if chain is None:
|
|
1582
|
+
converter = get_lifter(from_build,to_build,one_based=True)
|
|
1583
|
+
else:
|
|
1584
|
+
converter = ChainFile(chain, one_based=True)
|
|
1502
1585
|
except:
|
|
1503
|
-
|
|
1586
|
+
if chain is None:
|
|
1587
|
+
converter = get_lifter(from_build,to_build)
|
|
1588
|
+
else:
|
|
1589
|
+
converter = ChainFile(chain)
|
|
1504
1590
|
|
|
1505
1591
|
dic= get_number_to_chr(in_chr=False,xymt=["X","Y","M"])
|
|
1506
1592
|
dic2= get_chr_to_number(out_chr=False)
|
|
@@ -1513,7 +1599,7 @@ def liftover_variant(sumstats,
|
|
|
1513
1599
|
sumstats.loc[variants_on_chrom_to_convert,chrom] = lifted.str[0].map(dic2).astype("Int64")
|
|
1514
1600
|
return sumstats
|
|
1515
1601
|
|
|
1516
|
-
def parallelizeliftovervariant(sumstats,n_cores=1,chrom="CHR", pos="POS", from_build="19", to_build="38",status="STATUS",remove=True, verbose=True,log=Log()):
|
|
1602
|
+
def parallelizeliftovervariant(sumstats,n_cores=1,chrom="CHR", pos="POS", from_build="19", to_build="38",status="STATUS",remove=True,chain=None, verbose=True,log=Log()):
|
|
1517
1603
|
##start function with col checking##########################################################
|
|
1518
1604
|
_start_line = "perform liftover"
|
|
1519
1605
|
_end_line = "liftover"
|
|
@@ -1532,8 +1618,21 @@ def parallelizeliftovervariant(sumstats,n_cores=1,chrom="CHR", pos="POS", from_b
|
|
|
1532
1618
|
**_must_args)
|
|
1533
1619
|
if is_enough_info == False: return sumstats
|
|
1534
1620
|
############################################################################################
|
|
1621
|
+
|
|
1622
|
+
lifter_from_build = _process_build(from_build,log=log,verbose=False)
|
|
1623
|
+
lifter_to_build = _process_build(to_build,log=log,verbose=False)
|
|
1535
1624
|
|
|
1536
|
-
|
|
1625
|
+
if chain is not None:
|
|
1626
|
+
log.write(" -Creating converter using ChainFile: {}".format(chain), verbose=verbose)
|
|
1627
|
+
else:
|
|
1628
|
+
try:
|
|
1629
|
+
chain = get_chain(from_build=from_build, to_build=to_build)
|
|
1630
|
+
log.write(" -Creating converter using ChainFile: {}".format(chain), verbose=verbose)
|
|
1631
|
+
except:
|
|
1632
|
+
chain = None
|
|
1633
|
+
lifter_from_build=from_build
|
|
1634
|
+
lifter_to_build=to_build
|
|
1635
|
+
log.write(" -Creating converter : {} -> {}".format(lifter_from_build, lifter_to_build), verbose=verbose)
|
|
1537
1636
|
# valid chr and pos
|
|
1538
1637
|
pattern = r"\w\w\w0\w\w\w"
|
|
1539
1638
|
to_lift = sumstats[status].str.match(pattern)
|
|
@@ -1549,7 +1648,7 @@ def parallelizeliftovervariant(sumstats,n_cores=1,chrom="CHR", pos="POS", from_b
|
|
|
1549
1648
|
pool = Pool(n_cores)
|
|
1550
1649
|
#df = pd.concat(pool.starmap(func, df_split))
|
|
1551
1650
|
func=liftover_variant
|
|
1552
|
-
sumstats[[chrom,pos,status]] = pd.concat(pool.map(partial(func,chrom=chrom,pos=pos,from_build=from_build,to_build=to_build,status=status),df_split))
|
|
1651
|
+
sumstats[[chrom,pos,status]] = pd.concat(pool.map(partial(func,chrom=chrom,pos=pos,from_build=from_build,to_build=to_build,status=status,chain=chain),df_split))
|
|
1553
1652
|
pool.close()
|
|
1554
1653
|
pool.join()
|
|
1555
1654
|
############################################################################
|
gwaslab/util_in_fill_data.py
CHANGED
|
@@ -184,7 +184,8 @@ def fill_mlog10p(sumstats,log,verbose=True,filled_count=0):
|
|
|
184
184
|
else:
|
|
185
185
|
return 0,filled_count
|
|
186
186
|
return 1,filled_count
|
|
187
|
-
|
|
187
|
+
|
|
188
|
+
def fill_extreme_mlog10p(sumstats,df,log,verbose=True,filled_count=0):
|
|
188
189
|
# ref: https://stackoverflow.com/questions/46416027/how-to-compute-p-values-from-z-scores-in-r-when-the-z-score-is-large-pvalue-muc/46416222#46416222
|
|
189
190
|
if "Z" in sumstats.columns:
|
|
190
191
|
# P -> MLOG10P
|
|
@@ -198,6 +199,10 @@ def fill_extreme_mlog10p(sumstats,log,verbose=True,filled_count=0):
|
|
|
198
199
|
log.write(" - Filling MLOG10P using Z column...", verbose=verbose)
|
|
199
200
|
sumstats = fill_extreme_mlog10(sumstats, "Z")
|
|
200
201
|
filled_count +=1
|
|
202
|
+
elif "CHISQ" in sumstats.columns and "DOF" in sumstats.columns:
|
|
203
|
+
log.write(" - Filling MLOG10P using CHISQ and DOF column...", verbose=verbose)
|
|
204
|
+
sumstats = fill_extreme_mlog10_chisq(sumstats, "CHISQ", df)
|
|
205
|
+
filled_count +=1
|
|
201
206
|
else:
|
|
202
207
|
return 0,filled_count
|
|
203
208
|
return 1,filled_count
|
|
@@ -223,6 +228,19 @@ def fill_extreme_mlog10(sumstats, z):
|
|
|
223
228
|
sumstats["P_EXPONENT"]= exponent
|
|
224
229
|
return sumstats
|
|
225
230
|
|
|
231
|
+
def fill_extreme_mlog10_chisq(sumstats, chisq, df):
|
|
232
|
+
#https://stackoverflow.com/a/46416222/199475
|
|
233
|
+
log_pvalue = ss.chi2.logsf(sumstats[chisq], sumstats[df])
|
|
234
|
+
|
|
235
|
+
log10_pvalue = log_pvalue/np.log(10)
|
|
236
|
+
|
|
237
|
+
mantissa = 10**(log10_pvalue %1)
|
|
238
|
+
exponent = log10_pvalue // 1
|
|
239
|
+
sumstats["MLOG10P"] = -log10_pvalue
|
|
240
|
+
sumstats["P_MANTISSA"]= mantissa
|
|
241
|
+
sumstats["P_EXPONENT"]= exponent
|
|
242
|
+
return sumstats
|
|
243
|
+
|
|
226
244
|
####################################################################################################################
|
|
227
245
|
def fill_iteratively(sumstats,raw_to_fill,log,only_sig,df,extreme,verbose,sig_level):
|
|
228
246
|
to_fill = raw_to_fill.copy()
|
|
@@ -260,7 +278,7 @@ def fill_iteratively(sumstats,raw_to_fill,log,only_sig,df,extreme,verbose,sig_le
|
|
|
260
278
|
# p to -log10(P) ###############################################################################################
|
|
261
279
|
if "MLOG10P" in to_fill:
|
|
262
280
|
if extreme==True:
|
|
263
|
-
status,filled_count = fill_extreme_mlog10p(sumstats,log,verbose=verbose,filled_count=filled_count)
|
|
281
|
+
status,filled_count = fill_extreme_mlog10p(sumstats,df, log,verbose=verbose,filled_count=filled_count)
|
|
264
282
|
filled_count +=1
|
|
265
283
|
elif "P" not in sumstats.columns:
|
|
266
284
|
fill_p(sumstats,log,verbose=verbose)
|
gwaslab/viz_plot_miamiplot2.py
CHANGED
|
@@ -290,7 +290,11 @@ def plot_miami2(
|
|
|
290
290
|
|
|
291
291
|
|
|
292
292
|
#####################################################################################################################
|
|
293
|
-
|
|
293
|
+
ax1l, ax1r = ax5.get_xlim()
|
|
294
|
+
ax5l, ax5r = ax1.get_xlim()
|
|
295
|
+
ax1.set_xlim([min(ax1l,ax5l), max(ax1r,ax5r)])
|
|
296
|
+
ax5.set_xlim([min(ax1l,ax5l), max(ax1r,ax5r)])
|
|
297
|
+
#####################################################################################################################
|
|
294
298
|
ax5.set_xlabel("")
|
|
295
299
|
#ax5.set_xticks(chrom_df)
|
|
296
300
|
ax5.set_xticklabels([])
|
|
@@ -95,6 +95,10 @@ def plot_stacked_mqq(objects,
|
|
|
95
95
|
if "family" not in title_args.keys():
|
|
96
96
|
title_args["family"] = "Arial"
|
|
97
97
|
# create figure and axes ##################################################################################################################
|
|
98
|
+
#
|
|
99
|
+
# subplot_height : subplot height
|
|
100
|
+
# figsize : Width, height in inches
|
|
101
|
+
|
|
98
102
|
if mode=="r":
|
|
99
103
|
if len(vcfs)==1:
|
|
100
104
|
vcfs = vcfs *len(sumstats_list)
|
|
@@ -107,14 +111,17 @@ def plot_stacked_mqq(objects,
|
|
|
107
111
|
else:
|
|
108
112
|
height_ratios = [1 for i in range(n_plot_plus_gene_track-1)]+[gene_track_height]
|
|
109
113
|
|
|
110
|
-
|
|
114
|
+
if "figsize" not in fig_args.keys():
|
|
115
|
+
fig_args["figsize"] = [16,subplot_height*n_plot_plus_gene_track]
|
|
116
|
+
|
|
111
117
|
fig, axes = plt.subplots(n_plot_plus_gene_track, 1, sharex=True,
|
|
112
118
|
gridspec_kw={'height_ratios': height_ratios},
|
|
113
119
|
**fig_args)
|
|
114
120
|
plt.subplots_adjust(hspace=region_hspace)
|
|
115
121
|
elif mode=="m":
|
|
116
122
|
n_plot = len(sumstats_list)
|
|
117
|
-
|
|
123
|
+
if "figsize" not in fig_args.keys():
|
|
124
|
+
fig_args["figsize"] = [10,subplot_height*n_plot]
|
|
118
125
|
fig, axes = plt.subplots(n_plot, 1, sharex=True,
|
|
119
126
|
gridspec_kw={'height_ratios': [1 for i in range(n_plot)]},
|
|
120
127
|
**fig_args)
|
|
@@ -122,8 +129,8 @@ def plot_stacked_mqq(objects,
|
|
|
122
129
|
vcfs = [None for i in range(n_plot)]
|
|
123
130
|
elif mode=="mqq":
|
|
124
131
|
n_plot = len(objects)
|
|
125
|
-
|
|
126
|
-
|
|
132
|
+
if "figsize" not in fig_args.keys():
|
|
133
|
+
fig_args["figsize"] = [10,subplot_height*n_plot]
|
|
127
134
|
fig, axes = plt.subplots(n_plot, 2, sharex=True,
|
|
128
135
|
gridspec_kw={'height_ratios': [1 for i in range(n_plot-1)],
|
|
129
136
|
'width_ratios':[mqqratio,1]},
|
|
@@ -1,25 +1,25 @@
|
|
|
1
1
|
gwaslab/__init__.py,sha256=7TKJaODdpeuQKibL7gIEa4MtyQ0pmrU-vIHQ-Et27lQ,2433
|
|
2
|
-
gwaslab/bd_common_data.py,sha256=
|
|
2
|
+
gwaslab/bd_common_data.py,sha256=2voBqMrIsII1TN5T6uvyDax90fWcJK1Stmo1ZHNGGsE,13898
|
|
3
3
|
gwaslab/bd_config.py,sha256=TP-r-DPhJD3XnRYZbw9bQHXaDIkiRgK8bG9HCt-UaLc,580
|
|
4
4
|
gwaslab/bd_download.py,sha256=cDDk2C5IvjeAzvPvVYGTkI4Ss33DUtEDjGo8eAbQRvY,15663
|
|
5
5
|
gwaslab/bd_get_hapmap3.py,sha256=asNjQYeGfQi8u3jnfenRvDdKMs5ptql5wpcUzqMlwUI,3937
|
|
6
6
|
gwaslab/cache_manager.py,sha256=HOTnSkCOyGEPLRl90WT8D_6pAdI8d8AzenMIDGuCeWc,28113
|
|
7
7
|
gwaslab/g_Log.py,sha256=C3Zv-_6c3C9ms8bgQ-ytplz22sjk7euqXYkWr9zNeAs,1573
|
|
8
8
|
gwaslab/g_Phenotypes.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
gwaslab/g_Sumstats.py,sha256=
|
|
9
|
+
gwaslab/g_Sumstats.py,sha256=qWBh14-XheS88KLm2vr0OKRAhU9VmbUvy90T4L52zAo,36265
|
|
10
10
|
gwaslab/g_SumstatsPair.py,sha256=20snPb4SlI6ftMGVjgxAuyxsxYRQF-GzzlBSnoB-3Lo,8851
|
|
11
11
|
gwaslab/g_SumstatsT.py,sha256=u_DighLMnMxwTLnqm-B58pA0G6WXRj6pudPyKMVKjSU,2133
|
|
12
12
|
gwaslab/g_Sumstats_summary.py,sha256=FECvvFXJVKaCX5dggBvvk9YvJ6AbdbcLfjltysX7wEE,6380
|
|
13
13
|
gwaslab/g_meta.py,sha256=htWlgURWclm9R6UqFcX1a93WN27xny7lGUeyJZOtszQ,2583
|
|
14
|
-
gwaslab/g_vchange_status.py,sha256=
|
|
15
|
-
gwaslab/g_version.py,sha256=
|
|
14
|
+
gwaslab/g_vchange_status.py,sha256=w3zsYYOcCaI3PTeboonvkQjudzUAfVIgATzRdiPViZs,1939
|
|
15
|
+
gwaslab/g_version.py,sha256=dhN-3rbJKALlFhTZACsxBZL_U8j4PQmcVuCuWx9dapo,1886
|
|
16
16
|
gwaslab/hm_casting.py,sha256=FqP4EQl83Q2OKLw004OgLIvUH795TVCGwziLk5jsHqY,11368
|
|
17
|
-
gwaslab/hm_harmonize_sumstats.py,sha256=
|
|
17
|
+
gwaslab/hm_harmonize_sumstats.py,sha256=_sZ8soikAxDokw-dcr_CLguBB8OmTmPPS04MfmsJc_Q,79509
|
|
18
18
|
gwaslab/hm_rsid_to_chrpos.py,sha256=ODWREO0jPN0RAfNzL5fRzSRANfhiksOvUVPuEsFZQqA,6552
|
|
19
|
-
gwaslab/io_preformat_input.py,sha256=
|
|
19
|
+
gwaslab/io_preformat_input.py,sha256=J8Ny4OPMaLVdo2nP8lTM-c5A8LSdqphSrp9G4i9JjDQ,24097
|
|
20
20
|
gwaslab/io_read_ldsc.py,sha256=wsYXpH50IchBKd2dhYloSqc4YgnDkiwMsAweaCoN5Eo,12471
|
|
21
21
|
gwaslab/io_read_tabular.py,sha256=EG-C6KhCutt4J4LlOMgXnqzJvU-EZXzVhMvaDFnHrMM,2380
|
|
22
|
-
gwaslab/io_to_formats.py,sha256=
|
|
22
|
+
gwaslab/io_to_formats.py,sha256=8FmbQjWUIsz_V1Lb80TuwRIXKBgs5t42j25Znougk1Y,29401
|
|
23
23
|
gwaslab/io_to_pickle.py,sha256=HhePU0VcaGni0HTNU0BqoRaOnrr0NOxotgY6ISdx3Ck,1833
|
|
24
24
|
gwaslab/ldsc_irwls.py,sha256=83JbAMAhD0KOfpv4IJa6LgUDfQjp4XSJveTjnhCBJYQ,6142
|
|
25
25
|
gwaslab/ldsc_jackknife.py,sha256=XrWHoKS_Xn9StG1I83S2vUMTertsb-GH-_gOFYUhLeU,17715
|
|
@@ -28,7 +28,7 @@ gwaslab/ldsc_parse.py,sha256=MBnfgcWlV4oHp9MoDRh1mpilaHhAR15Af77hMFn4-5k,10564
|
|
|
28
28
|
gwaslab/ldsc_regressions.py,sha256=yzbGjgNV7u-SWXNPsh9S8y9mK97Bim_Nmad9G9V18ZU,30078
|
|
29
29
|
gwaslab/ldsc_sumstats.py,sha256=O0olsDxKlh1MJ1gAuEN1t40rxhajOEwOQ20ak7xoDrI,26245
|
|
30
30
|
gwaslab/qc_check_datatype.py,sha256=kW68uk4dTLOU2b1dHoVat6n0loundDysAjIqxsXW28Q,3379
|
|
31
|
-
gwaslab/qc_fix_sumstats.py,sha256=
|
|
31
|
+
gwaslab/qc_fix_sumstats.py,sha256=rb1Iv05o0tYGGTiXXXJphkx-xVQNKAtlwkDfY-D0yPs,97979
|
|
32
32
|
gwaslab/run_script.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
33
|
gwaslab/util_abf_finemapping.py,sha256=LRcopjtkT-iXtKPAJIzR4qjPdhD7nrS_BGit4EW89FM,3054
|
|
34
34
|
gwaslab/util_ex_calculate_ldmatrix.py,sha256=Z_spxbq6SHDS0v84I59YTTF40iyLQIOZbt0dmEcNJjw,15417
|
|
@@ -47,7 +47,7 @@ gwaslab/util_in_calculate_gc.py,sha256=MWOXVzJv7SZx4i2_ncRiqsiEOADc7EfghaUzgGy4j
|
|
|
47
47
|
gwaslab/util_in_calculate_power.py,sha256=JfHJFg3tNF0f4NHgWlzVW2mSxCiP07mAHIyEfVfxTak,10360
|
|
48
48
|
gwaslab/util_in_convert_h2.py,sha256=a8Cbudt3xn9WP2bPc-7ysuowB-LYub8j8GeDXl7Lk7Q,6483
|
|
49
49
|
gwaslab/util_in_correct_winnerscurse.py,sha256=Gp--yAQ8MMzdkWIvXP9C1BHVjZc-YzqHfYWhAj19w9w,2110
|
|
50
|
-
gwaslab/util_in_fill_data.py,sha256=
|
|
50
|
+
gwaslab/util_in_fill_data.py,sha256=iVq5WLWwFI03v9HyvBanu5si3j2p-oyPFTl8jsX69xM,14693
|
|
51
51
|
gwaslab/util_in_filter_value.py,sha256=dY4X66N9A4MHCRHjPqLYFufMM91ggLRwUBf_nJYh8Lg,23605
|
|
52
52
|
gwaslab/util_in_get_density.py,sha256=kpKXH69acMkeYVG5vs-VbJC3COhmuLBfYco-wuOxgjc,3934
|
|
53
53
|
gwaslab/util_in_get_sig.py,sha256=9kq1GXacknO2YnVmsTli1GlPA728ASweTZ3UKm3Wszo,38783
|
|
@@ -62,25 +62,25 @@ gwaslab/viz_plot_compare_af.py,sha256=qtXW45-Sq_ugK8ZfqBYMpmf58SKi3lB3YyHnzn_akc
|
|
|
62
62
|
gwaslab/viz_plot_compare_effect.py,sha256=lV-4KSQatTSsYqzEGGTkBu17I2VlVyVh1uJr-kCq630,49585
|
|
63
63
|
gwaslab/viz_plot_forestplot.py,sha256=xgOnefh737CgdQxu5naVyRNBX1NQXPFKzf51fbh6afs,6771
|
|
64
64
|
gwaslab/viz_plot_miamiplot.py,sha256=rCFEp7VNuVqeBBG3WRkmFAtFklbF79BvIQQYiSY70VY,31238
|
|
65
|
-
gwaslab/viz_plot_miamiplot2.py,sha256=
|
|
65
|
+
gwaslab/viz_plot_miamiplot2.py,sha256=xiFCgFX8hEySmCJORpEurMVER9eEXQyy_Ik7mLkbi9g,16015
|
|
66
66
|
gwaslab/viz_plot_mqqplot.py,sha256=q_c_ZDRdirEcdHHnb030h7dTUWAFy9OVuzwbdCzZ3gU,64580
|
|
67
67
|
gwaslab/viz_plot_qqplot.py,sha256=psQgVpP29686CEZkzQz0iRbApzqy7aE3GGiBcazVvNw,7247
|
|
68
68
|
gwaslab/viz_plot_regional2.py,sha256=tBoGox-4ngL5o_twdIjk_VW6Iam3JDyrPKuttm6_4Sg,36862
|
|
69
69
|
gwaslab/viz_plot_regionalplot.py,sha256=8u-5-yfy-UaXhaxVVz3Y5k2kBAoqzczUw1hyyD450iI,37983
|
|
70
70
|
gwaslab/viz_plot_rg_heatmap.py,sha256=PidUsgOiEVt6MfBPCF3_yDhOEytZ-I1q-ZD6_0pFrV4,13713
|
|
71
|
-
gwaslab/viz_plot_stackedregional.py,sha256=
|
|
71
|
+
gwaslab/viz_plot_stackedregional.py,sha256=IRugpwLjqKdwuSVTQtCM36Rk4Gpv6MxdsGIWVj6vk8w,16651
|
|
72
72
|
gwaslab/viz_plot_trumpetplot.py,sha256=ZHdc6WcVx0-oKoj88yglRkmB4bS9pOiEMcuwKW35Yvo,42672
|
|
73
73
|
gwaslab/data/formatbook.json,sha256=N2nJs80HH98Rsu9FxaSvIQO9J5yIV97WEtAKjRqYwiY,38207
|
|
74
|
-
gwaslab/data/reference.json,sha256=
|
|
74
|
+
gwaslab/data/reference.json,sha256=IrjwFnXjrpVUp3zYfcYClpibJE9Y-94gtrC1Aw8sXxg,12332
|
|
75
75
|
gwaslab/data/chrx_par/chrx_par_hg19.bed.gz,sha256=LocZg_ozhZjQiIpgWCO4EYCW9xgkEKpRy1m-YdIpzQs,83
|
|
76
76
|
gwaslab/data/chrx_par/chrx_par_hg38.bed.gz,sha256=VFW11MnQVC-Iu-ZGvUDcEhVpb-HVRsVTg-W-GNJyxP4,82
|
|
77
77
|
gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz,sha256=qD9RsC5S2h6l-OdpW09DWXHIi2kcPebctMnhxt8mzfU,10282886
|
|
78
78
|
gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz,sha256=Y8ZT2FIAhbhlgCJdE9qQVAiwnV_fcsPt72usBa7RSBM,10225828
|
|
79
79
|
gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz,sha256=R7IkssKu0L4WwkU9SrS84xCMdrkkKL0gnTNO_OKbG0Y,219
|
|
80
80
|
gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz,sha256=76CIU0pibDJ72Y6UY-TbIKE9gEPwTELAaIbCXyjm80Q,470
|
|
81
|
-
gwaslab-3.4.
|
|
82
|
-
gwaslab-3.4.
|
|
83
|
-
gwaslab-3.4.
|
|
84
|
-
gwaslab-3.4.
|
|
85
|
-
gwaslab-3.4.
|
|
86
|
-
gwaslab-3.4.
|
|
81
|
+
gwaslab-3.4.49.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
82
|
+
gwaslab-3.4.49.dist-info/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
|
|
83
|
+
gwaslab-3.4.49.dist-info/METADATA,sha256=bA0qeWgxt5mOkt_aL4aW_v4csyNSJCsbY6wrKMLyqY8,7752
|
|
84
|
+
gwaslab-3.4.49.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
|
85
|
+
gwaslab-3.4.49.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
|
|
86
|
+
gwaslab-3.4.49.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|