gwaslab 3.4.44__py3-none-any.whl → 3.4.46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/__init__.py +2 -1
- gwaslab/bd_common_data.py +22 -0
- gwaslab/g_Sumstats.py +2 -0
- gwaslab/g_SumstatsPair.py +1 -1
- gwaslab/g_vchange_status.py +4 -2
- gwaslab/g_version.py +2 -2
- gwaslab/hm_harmonize_sumstats.py +14 -6
- gwaslab/io_preformat_input.py +22 -1
- gwaslab/qc_fix_sumstats.py +8 -1
- gwaslab/util_in_filter_value.py +38 -2
- gwaslab/util_in_get_sig.py +32 -8
- gwaslab/util_in_meta.py +234 -0
- gwaslab/util_in_snphwe.py +58 -0
- gwaslab/viz_aux_chromatin.py +111 -0
- gwaslab/viz_plot_compare_effect.py +4 -1
- gwaslab/viz_plot_mqqplot.py +2 -0
- gwaslab/viz_plot_regionalplot.py +4 -0
- gwaslab/viz_plot_stackedregional.py +69 -13
- {gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/METADATA +7 -7
- {gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/RECORD +24 -21
- {gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/WHEEL +1 -1
- {gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.4.44.dist-info → gwaslab-3.4.46.dist-info}/top_level.txt +0 -0
gwaslab/__init__.py
CHANGED
|
@@ -44,4 +44,5 @@ from gwaslab.viz_plot_trumpetplot import plot_power
|
|
|
44
44
|
from gwaslab.viz_plot_trumpetplot import plot_power_x
|
|
45
45
|
from gwaslab.util_ex_process_h5 import process_vcf_to_hfd5
|
|
46
46
|
from gwaslab.util_ex_run_susie import _run_susie_rss as run_susie_rss
|
|
47
|
-
from gwaslab.io_read_tabular import _read_tabular as read_tabular
|
|
47
|
+
from gwaslab.io_read_tabular import _read_tabular as read_tabular
|
|
48
|
+
from gwaslab.util_in_meta import meta_analyze
|
gwaslab/bd_common_data.py
CHANGED
|
@@ -298,6 +298,28 @@ def gtf_to_protein_coding(gtfpath,log=Log(),verbose=True):
|
|
|
298
298
|
|
|
299
299
|
return protein_coding_path
|
|
300
300
|
|
|
301
|
+
def gtf_to_all_gene(gtfpath,log=Log(),verbose=True):
|
|
302
|
+
all_gene_path = gtfpath[:-6]+"all_genes.gtf.gz"
|
|
303
|
+
# if not existing, extract protein coding records and output to a new file
|
|
304
|
+
if not path.isfile(all_gene_path):
|
|
305
|
+
|
|
306
|
+
# get gene list
|
|
307
|
+
log.write(" - Extracting genes from {}".format(gtfpath),verbose=verbose)
|
|
308
|
+
gtf = read_gtf(gtfpath,usecols=["feature","gene_biotype","gene_id","gene_name"])
|
|
309
|
+
gene_list = gtf.loc[gtf["feature"]=="gene","gene_id"].values
|
|
310
|
+
log.write(" - Loaded {} genes.".format(len(gene_list)),verbose=verbose)
|
|
311
|
+
|
|
312
|
+
# extract entry using csv
|
|
313
|
+
gtf_raw = pd.read_csv(gtfpath,sep="\t",header=None,comment="#",dtype="string")
|
|
314
|
+
gtf_raw["_gene_id"] = gtf_raw[8].str.extract(r'gene_id "([\w\.-]+)"')
|
|
315
|
+
gtf_raw = gtf_raw.loc[ gtf_raw["_gene_id"].isin(gene_list) ,:]
|
|
316
|
+
gtf_raw = gtf_raw.drop("_gene_id",axis=1)
|
|
317
|
+
|
|
318
|
+
log.write(" - Extracted records are saved to : {} ".format(all_gene_path),verbose=verbose)
|
|
319
|
+
gtf_raw.to_csv(all_gene_path, header=None, index=None, sep="\t")
|
|
320
|
+
|
|
321
|
+
return all_gene_path
|
|
322
|
+
|
|
301
323
|
####################################################################################################################
|
|
302
324
|
# From BioPython: https://github.com/biopython/biopython/blob/c5a6b1374267d769b19c1022b4b45472316e78b4/Bio/Seq.py#L36
|
|
303
325
|
def _maketrans(complement_mapping):
|
gwaslab/g_Sumstats.py
CHANGED
|
@@ -121,6 +121,7 @@ class Sumstats():
|
|
|
121
121
|
snpr2=None,
|
|
122
122
|
status=None,
|
|
123
123
|
other=[],
|
|
124
|
+
usekeys=None,
|
|
124
125
|
direction=None,
|
|
125
126
|
verbose=True,
|
|
126
127
|
study="Study_1",
|
|
@@ -200,6 +201,7 @@ class Sumstats():
|
|
|
200
201
|
trait=trait,
|
|
201
202
|
status=status,
|
|
202
203
|
other=other,
|
|
204
|
+
usekeys=usekeys,
|
|
203
205
|
verbose=verbose,
|
|
204
206
|
readargs=readargs,
|
|
205
207
|
log=self.log)
|
gwaslab/g_SumstatsPair.py
CHANGED
|
@@ -139,7 +139,7 @@ class SumstatsPair( ):
|
|
|
139
139
|
self.clumps["clumps"], self.clumps["plink_log"] = _clump(self.data, log=self.log, p="P_1",mlog10p="MLOG10P_1", study = self.study_name, **kwargs)
|
|
140
140
|
|
|
141
141
|
def to_coloc(self,**kwargs):
|
|
142
|
-
self.to_finemapping_file_path, self.plink_log = tofinemapping(self.data,study=self.study_name,suffixes=self.suffixes,log=self.log,**kwargs)
|
|
142
|
+
self.to_finemapping_file_path, output_file_list, self.plink_log = tofinemapping(self.data,study=self.study_name,suffixes=self.suffixes,log=self.log,**kwargs)
|
|
143
143
|
|
|
144
144
|
def run_coloc_susie(self,**kwargs):
|
|
145
145
|
|
gwaslab/g_vchange_status.py
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
|
|
3
|
+
CATEGORIES = {str(j+i) for j in [1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
|
|
4
|
+
|
|
3
5
|
def vchange_status(status,digit,before,after):
|
|
4
6
|
dic={}
|
|
5
7
|
for i in range(len(before)):
|
|
6
8
|
dic[before[i]]=after[i]
|
|
7
9
|
if digit>1:
|
|
8
|
-
return status.str[:digit-1]+status.str[digit-1].replace(dic)+status.str[digit:]
|
|
10
|
+
return pd.Categorical(status.str[:digit-1]+status.str[digit-1].replace(dic)+status.str[digit:],categories=CATEGORIES)
|
|
9
11
|
else:
|
|
10
|
-
return status.str[digit-1].replace(dic)+status.str[digit:]
|
|
12
|
+
return pd.Categorical(status.str[digit-1].replace(dic)+status.str[digit:],categories=CATEGORIES)
|
|
11
13
|
|
|
12
14
|
def copy_status(from_status,to_status, digit):
|
|
13
15
|
if digit>1:
|
gwaslab/g_version.py
CHANGED
gwaslab/hm_harmonize_sumstats.py
CHANGED
|
@@ -355,7 +355,11 @@ def oldcheckref(sumstats,ref_seq,chrom="CHR",pos="POS",ea="EA",nea="NEA",status=
|
|
|
355
355
|
|
|
356
356
|
log.write("\n",end="",show_time=False,verbose=verbose)
|
|
357
357
|
|
|
358
|
-
|
|
358
|
+
CATEGORIES = {str(j+i) for j in [1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
|
|
359
|
+
sumstats[status] = pd.Categorical(sumstats[status],categories=CATEGORIES)
|
|
360
|
+
#sumstats[status] = sumstats[status].astype("string")
|
|
361
|
+
|
|
362
|
+
|
|
359
363
|
available_to_check =sum( (~sumstats[pos].isna()) & (~sumstats[nea].isna()) & (~sumstats[ea].isna()))
|
|
360
364
|
status_0=sum(sumstats["STATUS"].str.match("\w\w\w\w\w[0]\w", case=False, flags=0, na=False))
|
|
361
365
|
status_3=sum(sumstats["STATUS"].str.match("\w\w\w\w\w[3]\w", case=False, flags=0, na=False))
|
|
@@ -669,9 +673,11 @@ def checkref(sumstats,ref_seq,chrom="CHR",pos="POS",ea="EA",nea="NEA",status="ST
|
|
|
669
673
|
sumstats_to_check = sumstats.loc[to_check_ref,[chrom,pos,ea,nea,status]]
|
|
670
674
|
sumstats.loc[to_check_ref,status] = check_status(sumstats_to_check, all_records_dict, log=log, verbose=verbose)
|
|
671
675
|
log.write(" -Finished checking records", verbose=verbose)
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
676
|
+
|
|
677
|
+
CATEGORIES = {str(j+i) for j in [1900000,3800000,9700000,9800000,9900000] for i in range(0,100000)}
|
|
678
|
+
sumstats[status] = pd.Categorical(sumstats[status],categories=CATEGORIES)
|
|
679
|
+
#sumstats[status] = sumstats[status].astype("string")
|
|
680
|
+
|
|
675
681
|
available_to_check =sum( (~sumstats[pos].isna()) & (~sumstats[nea].isna()) & (~sumstats[ea].isna()))
|
|
676
682
|
status_0=sum(sumstats["STATUS"].str.match("\w\w\w\w\w[0]\w", case=False, flags=0, na=False))
|
|
677
683
|
status_3=sum(sumstats["STATUS"].str.match("\w\w\w\w\w[3]\w", case=False, flags=0, na=False))
|
|
@@ -700,6 +706,7 @@ def checkref(sumstats,ref_seq,chrom="CHR",pos="POS",ea="EA",nea="NEA",status="ST
|
|
|
700
706
|
if remove is True:
|
|
701
707
|
sumstats = sumstats.loc[~sumstats["STATUS"].str.match("\w\w\w\w\w[8]\w"),:]
|
|
702
708
|
log.write(" -Variants not on given reference sequence were removed.",verbose=verbose)
|
|
709
|
+
|
|
703
710
|
|
|
704
711
|
finished(log, verbose, _end_line)
|
|
705
712
|
return sumstats
|
|
@@ -861,8 +868,9 @@ def parallelizeassignrsid(sumstats, path, ref_mode="vcf",snpid="SNPID",rsid="rsI
|
|
|
861
868
|
if is_enough_info == False: return sumstats
|
|
862
869
|
############################################################################################
|
|
863
870
|
|
|
864
|
-
standardized_normalized = sumstats["STATUS"].str.match("\w\w\w[0][01234]\w\w", case=False, flags=0, na=False)
|
|
865
|
-
|
|
871
|
+
#standardized_normalized = sumstats["STATUS"].str.match("\w\w\w[0][01234]\w\w", case=False, flags=0, na=False)
|
|
872
|
+
standardized_normalized = sumstats["STATUS"] == sumstats["STATUS"]
|
|
873
|
+
|
|
866
874
|
if rsid not in sumstats.columns:
|
|
867
875
|
sumstats[rsid]=pd.Series(dtype="string")
|
|
868
876
|
|
gwaslab/io_preformat_input.py
CHANGED
|
@@ -55,6 +55,7 @@ def preformat(sumstats,
|
|
|
55
55
|
trait=None,
|
|
56
56
|
build=None,
|
|
57
57
|
other=[],
|
|
58
|
+
usekeys=None,
|
|
58
59
|
verbose=False,
|
|
59
60
|
readargs=None,
|
|
60
61
|
log=None):
|
|
@@ -65,6 +66,11 @@ def preformat(sumstats,
|
|
|
65
66
|
dtype_dictionary ={}
|
|
66
67
|
|
|
67
68
|
#######################################################################################################################################################
|
|
69
|
+
# workflow:
|
|
70
|
+
# 1. formatbook
|
|
71
|
+
# 2. user specified header
|
|
72
|
+
# 3. usekeys
|
|
73
|
+
|
|
68
74
|
if fmt is not None:
|
|
69
75
|
# loading format parameters
|
|
70
76
|
log.write("Start to load format from formatbook....",verbose=verbose)
|
|
@@ -129,6 +135,8 @@ def preformat(sumstats,
|
|
|
129
135
|
|
|
130
136
|
################################################
|
|
131
137
|
for key,value in rename_dictionary.items():
|
|
138
|
+
# check avaiable keys key->raw header
|
|
139
|
+
# usecols : a list of raw headers to load from file/DataFrame
|
|
132
140
|
if key in raw_cols:
|
|
133
141
|
usecols.append(key)
|
|
134
142
|
if value in ["EA","NEA"]:
|
|
@@ -137,7 +145,7 @@ def preformat(sumstats,
|
|
|
137
145
|
dtype_dictionary[value]="string"
|
|
138
146
|
|
|
139
147
|
except ValueError:
|
|
140
|
-
raise ValueError("Please input a path or a pd.DataFrame, and make sure the columns you specified are in the file.")
|
|
148
|
+
raise ValueError("Please input a path or a pd.DataFrame, and make sure the separator is correct and the columns you specified are in the file.")
|
|
141
149
|
|
|
142
150
|
###################################################################################################################################################
|
|
143
151
|
## check columns/datatype to use
|
|
@@ -276,6 +284,19 @@ def preformat(sumstats,
|
|
|
276
284
|
else:
|
|
277
285
|
study = raw_cols[9]
|
|
278
286
|
usecols = usecols + [study]
|
|
287
|
+
|
|
288
|
+
if usekeys is not None:
|
|
289
|
+
# extract only specified keys
|
|
290
|
+
usecols_new =[]
|
|
291
|
+
for i in usekeys:
|
|
292
|
+
for k, v in rename_dictionary.items():
|
|
293
|
+
if i == v:
|
|
294
|
+
usecols_new.append(k)
|
|
295
|
+
usecols_valid =[]
|
|
296
|
+
for i in usecols_new:
|
|
297
|
+
if i in usecols:
|
|
298
|
+
usecols_valid.append(i)
|
|
299
|
+
usecols = usecols_valid
|
|
279
300
|
#loading data ##########################################################################################################
|
|
280
301
|
|
|
281
302
|
try:
|
gwaslab/qc_fix_sumstats.py
CHANGED
|
@@ -1061,6 +1061,13 @@ def check_range(sumstats, var_range, header, coltocheck, cols_to_check, log, ver
|
|
|
1061
1061
|
if sum(is_low_p) >0:
|
|
1062
1062
|
log.warning("Extremely low P detected (P=0 or P < minimum positive value of float64) : {}".format(sum(is_low_p)))
|
|
1063
1063
|
log.warning("Please consider using MLOG10P instead.")
|
|
1064
|
+
|
|
1065
|
+
if header=="INFO":
|
|
1066
|
+
is_high_info = sumstats["INFO"]>1
|
|
1067
|
+
if sum(is_high_info) >0:
|
|
1068
|
+
log.warning("High INFO detected (INFO>1) : {}".format(sum(is_high_info)))
|
|
1069
|
+
log.warning("max(INFO): {}".format(sumstats["INFO"].max()))
|
|
1070
|
+
log.warning("Please check if this is as expected.")
|
|
1064
1071
|
|
|
1065
1072
|
if sum(~is_valid)>0:
|
|
1066
1073
|
try:
|
|
@@ -1102,7 +1109,7 @@ def sanitycheckstats(sumstats,
|
|
|
1102
1109
|
HR=(-100,100),
|
|
1103
1110
|
HR_95L=(0,float("Inf")),
|
|
1104
1111
|
HR_95U=(0,float("Inf")),
|
|
1105
|
-
info=(0,
|
|
1112
|
+
info=(0,2),
|
|
1106
1113
|
float_tolerence = 1e-7,
|
|
1107
1114
|
verbose=True,
|
|
1108
1115
|
log=Log()):
|
gwaslab/util_in_filter_value.py
CHANGED
|
@@ -10,6 +10,7 @@ from gwaslab.g_vchange_status import vchange_status
|
|
|
10
10
|
from gwaslab.qc_fix_sumstats import sortcoordinate
|
|
11
11
|
from gwaslab.qc_fix_sumstats import start_to
|
|
12
12
|
from gwaslab.qc_fix_sumstats import finished
|
|
13
|
+
from gwaslab.qc_fix_sumstats import _process_build
|
|
13
14
|
from gwaslab.hm_harmonize_sumstats import is_palindromic
|
|
14
15
|
|
|
15
16
|
import gc
|
|
@@ -430,8 +431,43 @@ def _filter_snp(sumstats, mode="in", ea="EA",nea="NEA", log=Log(),verbose=True):
|
|
|
430
431
|
log.write("Finished filtering SNPs.",verbose=verbose)
|
|
431
432
|
return snp
|
|
432
433
|
|
|
433
|
-
def _exclude_hla(sumstats, chrom="CHR", pos="POS", lower=
|
|
434
|
-
|
|
434
|
+
def _exclude_hla(sumstats, chrom="CHR", pos="POS", lower=None ,upper=None, build=None, mode="xmhc", log=Log(), verbose=True):
|
|
435
|
+
|
|
436
|
+
if build is not None:
|
|
437
|
+
build = _process_build(build = build,log = log,verbose = verbose)
|
|
438
|
+
# xMHC : HIST1H2AA ~ 7.6mb ~ RPL12P1
|
|
439
|
+
# reference: Horton, R., Wilming, L., Rand, V., Lovering, R. C., Bruford, E. A., Khodiyar, V. K., ... & Beck, S. (2004). Gene map of the extended human MHC. Nature Reviews Genetics, 5(12), 889-899.
|
|
440
|
+
# hg38: 25,726,063 ~ 33,400,644
|
|
441
|
+
# hg19 : 25,726,291 ~ 33,368,421
|
|
442
|
+
|
|
443
|
+
# HLA : GABBR1 ~ 3.78mb ~ KIFC1
|
|
444
|
+
# reference: Shiina, T., Hosomichi, K., Inoko, H., & Kulski, J. K. (2009). The HLA genomic loci map: expression, interaction, diversity and disease. Journal of human genetics, 54(1), 15-39.
|
|
445
|
+
# hg38: 29,602,238 ~ 33,409,896
|
|
446
|
+
# hg19: 29,570,015 ~ 33,377,673
|
|
447
|
+
|
|
448
|
+
if build == "19":
|
|
449
|
+
if mode =="xmhc":
|
|
450
|
+
lower=25000000
|
|
451
|
+
upper=34000000
|
|
452
|
+
if mode =="hla" or mode =="mhc":
|
|
453
|
+
lower=29500000
|
|
454
|
+
upper=33500000
|
|
455
|
+
if build == "38":
|
|
456
|
+
if mode =="xmhc":
|
|
457
|
+
lower=25000000
|
|
458
|
+
upper=34000000
|
|
459
|
+
if mode =="hla" or mode =="mhc":
|
|
460
|
+
lower=29500000
|
|
461
|
+
upper=33500000
|
|
462
|
+
else:
|
|
463
|
+
# -> 25,000,000 ~ 34,000,000
|
|
464
|
+
if mode =="xmhc":
|
|
465
|
+
lower=25000000
|
|
466
|
+
upper=34000000
|
|
467
|
+
if mode =="hla" or mode =="mhc":
|
|
468
|
+
lower=29500000
|
|
469
|
+
upper=33500000
|
|
470
|
+
|
|
435
471
|
raw_len = len(sumstats)
|
|
436
472
|
|
|
437
473
|
if str(sumstats[chrom].dtype) == "string":
|
gwaslab/util_in_get_sig.py
CHANGED
|
@@ -11,6 +11,7 @@ from gwaslab.bd_common_data import get_chr_to_number
|
|
|
11
11
|
from gwaslab.bd_common_data import get_number_to_chr
|
|
12
12
|
from gwaslab.bd_common_data import get_chr_to_NC
|
|
13
13
|
from gwaslab.bd_common_data import gtf_to_protein_coding
|
|
14
|
+
from gwaslab.bd_common_data import gtf_to_all_gene
|
|
14
15
|
from gwaslab.bd_download import check_and_download
|
|
15
16
|
from gwaslab.util_ex_gwascatalog import gwascatalog_trait
|
|
16
17
|
from gwaslab.qc_fix_sumstats import check_dataframe_shape
|
|
@@ -38,6 +39,7 @@ def getsig(insumstats,
|
|
|
38
39
|
wc_correction=False,
|
|
39
40
|
build="19",
|
|
40
41
|
source="ensembl",
|
|
42
|
+
gtf_path=None,
|
|
41
43
|
verbose=True):
|
|
42
44
|
"""
|
|
43
45
|
Extract the lead variants using a sliding window. P or MLOG10P will be used and converted to SCALEDP for sorting.
|
|
@@ -172,6 +174,7 @@ def getsig(insumstats,
|
|
|
172
174
|
xymt=xymt,
|
|
173
175
|
build=build,
|
|
174
176
|
source=source,
|
|
177
|
+
gtf_path=gtf_path,
|
|
175
178
|
verbose=verbose)
|
|
176
179
|
|
|
177
180
|
# drop internal id
|
|
@@ -253,6 +256,7 @@ def annogene(
|
|
|
253
256
|
xymt=["X","Y","MT"],
|
|
254
257
|
build="19",
|
|
255
258
|
source="ensembl",
|
|
259
|
+
gtf_path=None,
|
|
256
260
|
verbose=True):
|
|
257
261
|
|
|
258
262
|
log.write("Start to annotate variants with nearest gene name(s)...", verbose=verbose)
|
|
@@ -267,8 +271,13 @@ def annogene(
|
|
|
267
271
|
#| gzip >Homo_sapiens.GRCh37.75.processed.chr.gtf.gz
|
|
268
272
|
|
|
269
273
|
#gtf_path = check_and_download("ensembl_hg19_gtf_protein_coding")
|
|
270
|
-
gtf_path
|
|
271
|
-
|
|
274
|
+
if gtf_path is None:
|
|
275
|
+
gtf_path = check_and_download("ensembl_hg19_gtf")
|
|
276
|
+
gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
|
|
277
|
+
else:
|
|
278
|
+
log.write(" -Using user-provided gtf:{}".format(gtf_path))
|
|
279
|
+
gtf_path = gtf_to_all_gene(gtf_path,log=log,verbose=verbose)
|
|
280
|
+
|
|
272
281
|
gtf_db_path = gtf_path[:-2]+"db"
|
|
273
282
|
|
|
274
283
|
data = Genome(
|
|
@@ -283,8 +292,13 @@ def annogene(
|
|
|
283
292
|
elif build=="38":
|
|
284
293
|
log.write(" -Assigning Gene name using ensembl_hg38_gtf for protein coding genes", verbose=verbose)
|
|
285
294
|
#gtf_path = check_and_download("ensembl_hg38_gtf_protein_coding")
|
|
286
|
-
gtf_path
|
|
287
|
-
|
|
295
|
+
if gtf_path is None:
|
|
296
|
+
gtf_path = check_and_download("ensembl_hg38_gtf")
|
|
297
|
+
gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
|
|
298
|
+
else:
|
|
299
|
+
log.write(" -Using user-provided gtf:{}".format(gtf_path))
|
|
300
|
+
gtf_path = gtf_to_all_gene(gtf_path,log=log,verbose=verbose)
|
|
301
|
+
|
|
288
302
|
gtf_db_path = gtf_path[:-2]+"db"
|
|
289
303
|
data = Genome(
|
|
290
304
|
reference_name='GRCh38',
|
|
@@ -300,8 +314,13 @@ def annogene(
|
|
|
300
314
|
if build=="19":
|
|
301
315
|
log.write(" -Assigning Gene name using NCBI refseq latest GRCh37 for protein coding genes", verbose=verbose)
|
|
302
316
|
#gtf_path = check_and_download("refseq_hg19_gtf_protein_coding")
|
|
303
|
-
gtf_path
|
|
304
|
-
|
|
317
|
+
if gtf_path is None:
|
|
318
|
+
gtf_path = check_and_download("refseq_hg19_gtf")
|
|
319
|
+
gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
|
|
320
|
+
else:
|
|
321
|
+
log.write(" -Using user-provided gtf:{}".format(gtf_path))
|
|
322
|
+
gtf_path = gtf_to_all_gene(gtf_path,log=log,verbose=verbose)
|
|
323
|
+
|
|
305
324
|
gtf_db_path = gtf_path[:-2]+"db"
|
|
306
325
|
data = Genome(
|
|
307
326
|
reference_name='GRCh37',
|
|
@@ -315,8 +334,13 @@ def annogene(
|
|
|
315
334
|
elif build=="38":
|
|
316
335
|
log.write(" -Assigning Gene name using NCBI refseq latest GRCh38 for protein coding genes", verbose=verbose)
|
|
317
336
|
#gtf_path = check_and_download("refseq_hg38_gtf_protein_coding")
|
|
318
|
-
gtf_path
|
|
319
|
-
|
|
337
|
+
if gtf_path is None:
|
|
338
|
+
gtf_path = check_and_download("refseq_hg38_gtf")
|
|
339
|
+
gtf_path = gtf_to_protein_coding(gtf_path,log=log,verbose=verbose)
|
|
340
|
+
else:
|
|
341
|
+
log.write(" -Using user-provided gtf:{}".format(gtf_path))
|
|
342
|
+
gtf_path = gtf_to_all_gene(gtf_path,log=log,verbose=verbose)
|
|
343
|
+
|
|
320
344
|
gtf_db_path = gtf_path[:-2]+"db"
|
|
321
345
|
data = Genome(
|
|
322
346
|
reference_name='GRCh38',
|
gwaslab/util_in_meta.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy as np
|
|
4
|
+
from scipy.stats.distributions import chi2
|
|
5
|
+
from scipy.stats import norm
|
|
6
|
+
from gwaslab.g_Log import Log
|
|
7
|
+
from gwaslab.io_to_pickle import load_data_from_pickle
|
|
8
|
+
from gwaslab.g_Sumstats import Sumstats
|
|
9
|
+
import gc
|
|
10
|
+
|
|
11
|
+
def meta_analyze(sumstats_list,random_effects=False, match_allele=True, log=Log()):
|
|
12
|
+
|
|
13
|
+
###########################################################################
|
|
14
|
+
columns=["SNPID","CHR","POS","EA","NEA"]
|
|
15
|
+
results_df = pd.DataFrame(columns=columns)
|
|
16
|
+
|
|
17
|
+
log.write("Start to perform meta-analysis...")
|
|
18
|
+
log.write(" -Datasets:")
|
|
19
|
+
for index,sumstats_path in enumerate(sumstats_list):
|
|
20
|
+
if isinstance(sumstats_path, pd.DataFrame):
|
|
21
|
+
log.write(" -Sumstats #{}: {} ".format(index, sumstats_path))
|
|
22
|
+
elif isinstance(sumstats_path, Sumstats):
|
|
23
|
+
log.write(" -Sumstats #{}: {} ".format(index, sumstats_path))
|
|
24
|
+
else:
|
|
25
|
+
log.write(" -Sumstats #{}: {} ".format(index, sumstats_path))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# extract all variants information
|
|
29
|
+
log.write(" -Iterating through {} datasets to determine variant list...".format(len(sumstats_list)))
|
|
30
|
+
|
|
31
|
+
for index,sumstats_path in enumerate(sumstats_list):
|
|
32
|
+
sumstats = get_sumstats(sumstats_path,usekeys=["SNPID","CHR","POS","EA","NEA"])
|
|
33
|
+
new_rows = sumstats.loc[~sumstats["SNPID"].isin(results_df["SNPID"]),["SNPID","CHR","POS","EA","NEA"]]
|
|
34
|
+
log.write(" -Sumstats #{}: {} new variants (out of {}) are being added to analysis...".format(index, len(new_rows),len(sumstats)))
|
|
35
|
+
|
|
36
|
+
if len(new_rows)>0:
|
|
37
|
+
if len(results_df) == 0:
|
|
38
|
+
results_df = new_rows
|
|
39
|
+
else:
|
|
40
|
+
results_df = pd.concat([results_df, new_rows],ignore_index=True)
|
|
41
|
+
del sumstats
|
|
42
|
+
del new_rows
|
|
43
|
+
gc.collect()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
###########################################################################
|
|
48
|
+
log.write(" -Initiating result DataFrame...")
|
|
49
|
+
columns=["SNPID","CHR","POS","EA","NEA","_BETAW_SUM","_EA_N","_NEA_N","_BETA2W_SUM","_W_SUM","EAF","N","DIRECTION","BETA","SE","DOF"]
|
|
50
|
+
results_df = results_df.set_index("SNPID")
|
|
51
|
+
results_df["N"] = 0
|
|
52
|
+
results_df["_BETAW_SUM"] = 0.0
|
|
53
|
+
results_df["_BETA2W_SUM"] = 0.0
|
|
54
|
+
results_df["_W_SUM"] = 0.0
|
|
55
|
+
results_df["_W2_SUM"] = 0.0
|
|
56
|
+
results_df["_EA_N"] = 0.0
|
|
57
|
+
results_df["_NEA_N"] = 0.0
|
|
58
|
+
results_df["N"] = 0
|
|
59
|
+
results_df["DIRECTION"] = ""
|
|
60
|
+
results_df["BETA"] = 0.0
|
|
61
|
+
results_df["SE"] = 0.0
|
|
62
|
+
results_df["DOF"] = -1
|
|
63
|
+
|
|
64
|
+
dtype_dict ={
|
|
65
|
+
"_BETAW_SUM":"float64",
|
|
66
|
+
"_EA_N":"float64",
|
|
67
|
+
"_NEA_N":"float64",
|
|
68
|
+
"_BETA2W_SUM":"float64",
|
|
69
|
+
"_W_SUM":"float64",
|
|
70
|
+
"BETA":"float64",
|
|
71
|
+
"SE":"float64",
|
|
72
|
+
"N":"Int64",
|
|
73
|
+
"DOF":"Int64"
|
|
74
|
+
}
|
|
75
|
+
results_df=results_df.astype(dtype_dict)
|
|
76
|
+
###########################################################################
|
|
77
|
+
|
|
78
|
+
log.write(" -Iterating through {} datasets to compute statistics for fixed-effect model...".format(len(sumstats_list)))
|
|
79
|
+
for index,sumstats_path in enumerate(sumstats_list):
|
|
80
|
+
to_use_sumstats = process_sumstats(sumstats_path,
|
|
81
|
+
results_df[["EA","NEA"]],
|
|
82
|
+
index=index,
|
|
83
|
+
match_allele=match_allele,)
|
|
84
|
+
sumstats_index = to_use_sumstats.index
|
|
85
|
+
results_df_not_in_sumstat_index = results_df.index[~results_df.index.isin(to_use_sumstats.index)]
|
|
86
|
+
|
|
87
|
+
# N and DOF
|
|
88
|
+
results_df.loc[sumstats_index, "N"] += to_use_sumstats["N"]
|
|
89
|
+
results_df.loc[sumstats_index, "DOF"] += 1
|
|
90
|
+
|
|
91
|
+
# BEAT and SE
|
|
92
|
+
results_df.loc[sumstats_index,"_BETA2W_SUM"] += to_use_sumstats["BETA"]**2 *(1/(to_use_sumstats["SE"]**2))
|
|
93
|
+
results_df.loc[sumstats_index,"_BETAW_SUM"] += to_use_sumstats["BETA"]*(1/(to_use_sumstats["SE"]**2))
|
|
94
|
+
results_df.loc[sumstats_index,"_W_SUM"] += 1/(to_use_sumstats["SE"]**2)
|
|
95
|
+
results_df.loc[sumstats_index,"_W2_SUM"] += results_df.loc[sumstats_index,"_W_SUM"]**2
|
|
96
|
+
|
|
97
|
+
# EAF
|
|
98
|
+
results_df.loc[sumstats_index,"_EA_N"] += to_use_sumstats["N"]*to_use_sumstats["EAF"]
|
|
99
|
+
results_df.loc[sumstats_index,"_NEA_N"] += to_use_sumstats["N"]*(1 - to_use_sumstats["EAF"])
|
|
100
|
+
|
|
101
|
+
# DIRECTION
|
|
102
|
+
beta_index = to_use_sumstats[to_use_sumstats["BETA"]>0].index
|
|
103
|
+
results_df.loc[beta_index, "DIRECTION"] += "+"
|
|
104
|
+
beta_index = to_use_sumstats[to_use_sumstats["BETA"]==0].index
|
|
105
|
+
results_df.loc[beta_index, "DIRECTION"] += "0"
|
|
106
|
+
beta_index = to_use_sumstats[to_use_sumstats["BETA"]<0].index
|
|
107
|
+
results_df.loc[beta_index, "DIRECTION"] += "-"
|
|
108
|
+
results_df.loc[results_df_not_in_sumstat_index, "DIRECTION"] += "?"
|
|
109
|
+
|
|
110
|
+
del to_use_sumstats
|
|
111
|
+
gc.collect()
|
|
112
|
+
|
|
113
|
+
##############################################################################
|
|
114
|
+
# fixed - effect statistics
|
|
115
|
+
results_df["BETA"] = results_df["_BETAW_SUM"] / results_df["_W_SUM"]
|
|
116
|
+
results_df["EAF"] = results_df["_EA_N"] / (results_df["_EA_N"] + results_df["_NEA_N"])
|
|
117
|
+
results_df["SE"] = np.sqrt(1/results_df["_W_SUM"])
|
|
118
|
+
results_df["Z"] = results_df["BETA"] / results_df["SE"]
|
|
119
|
+
results_df["P"] = norm.sf(abs(results_df["Z"]))*2
|
|
120
|
+
results_df["Q"] = results_df["_BETA2W_SUM"] - (results_df["_BETAW_SUM"]**2 / results_df["_W_SUM"])
|
|
121
|
+
|
|
122
|
+
for dof in results_df["DOF"].unique():
|
|
123
|
+
results_df_dof_index = results_df["DOF"] == dof
|
|
124
|
+
results_df.loc[results_df_dof_index,"P_HET"] = chi2.sf(results_df.loc[results_df_dof_index, "Q"].values,dof)
|
|
125
|
+
gc.collect()
|
|
126
|
+
|
|
127
|
+
results_df["I2_HET"] = (results_df["Q"] - results_df["DOF"])/results_df["Q"]
|
|
128
|
+
results_df.loc[results_df["I2_HET"]<0, "I2_HET"] = 0
|
|
129
|
+
|
|
130
|
+
results_df=results_df.drop(columns=["_EA_N","_NEA_N"])
|
|
131
|
+
gc.collect()
|
|
132
|
+
|
|
133
|
+
###########################################################################
|
|
134
|
+
if random_effects==True:
|
|
135
|
+
log.write(" -Iterating through {} datasets to compute statistics for random-effects model...".format(len(sumstats_list)))
|
|
136
|
+
results_df["_R2"] = (results_df["Q"] - results_df["DOF"])/(results_df["_W_SUM"] - (results_df["_W2_SUM"]/results_df["_W_SUM"]))
|
|
137
|
+
results_df.loc[results_df["_R2"]<0, "_R2"] = 0
|
|
138
|
+
variant_index_random = results_df[results_df["_R2"]>0].index
|
|
139
|
+
|
|
140
|
+
results_df["_BETAW_SUM_R"] = 0.0
|
|
141
|
+
results_df["_W_SUM_R"] = 0.0
|
|
142
|
+
results_df["BETA_RANDOM"] = results_df["BETA"]
|
|
143
|
+
results_df["SE_RANDOM"] = results_df["SE"]
|
|
144
|
+
|
|
145
|
+
for index,sumstats_path in enumerate(sumstats_list):
|
|
146
|
+
to_use_sumstats = process_sumstats(sumstats_path,
|
|
147
|
+
results_df.loc[variant_index_random, ["EA","NEA"]],
|
|
148
|
+
index=index,
|
|
149
|
+
match_allele=match_allele,
|
|
150
|
+
extract_index=variant_index_random)
|
|
151
|
+
|
|
152
|
+
sumstats_index = to_use_sumstats.index
|
|
153
|
+
|
|
154
|
+
# BEAT and SE
|
|
155
|
+
results_df.loc[sumstats_index,"_BETAW_SUM_R"] += to_use_sumstats["BETA"]*(1/(to_use_sumstats["SE"]**2 + results_df.loc[sumstats_index,"_R2"]))
|
|
156
|
+
results_df.loc[sumstats_index,"_W_SUM_R"] += 1/(to_use_sumstats["SE"]**2 + results_df.loc[sumstats_index,"_R2"])
|
|
157
|
+
|
|
158
|
+
del to_use_sumstats
|
|
159
|
+
del sumstats_index
|
|
160
|
+
gc.collect()
|
|
161
|
+
|
|
162
|
+
results_df.loc[variant_index_random,"BETA_RANDOM"] = results_df.loc[variant_index_random,"_BETAW_SUM_R"] / results_df.loc[variant_index_random,"_W_SUM_R"]
|
|
163
|
+
results_df.loc[variant_index_random,"SE_RANDOM"] = np.sqrt(1/results_df.loc[variant_index_random,"_W_SUM_R"])
|
|
164
|
+
results_df["Z_RANDOM"] = results_df["BETA_RANDOM"] / results_df["SE_RANDOM"]
|
|
165
|
+
results_df["P_RANDOM"] = norm.sf(abs(results_df["Z_RANDOM"]))*2
|
|
166
|
+
results_df = results_df.drop(columns=["_BETAW_SUM_R","_W_SUM_R"])
|
|
167
|
+
|
|
168
|
+
gc.collect()
|
|
169
|
+
###########################################################################
|
|
170
|
+
results_df = results_df.drop(columns=["_BETAW_SUM","_BETA2W_SUM","_W_SUM","_R2","_W2_SUM"]).sort_values(by=["CHR","POS"])
|
|
171
|
+
gc.collect()
|
|
172
|
+
log.write("Finished meta-analysis successfully!")
|
|
173
|
+
|
|
174
|
+
return results_df
|
|
175
|
+
|
|
176
|
+
def process_sumstats(sumstats_path, results_df, index, extract_index=None, match_allele=True, log=Log()):
|
|
177
|
+
|
|
178
|
+
if extract_index is None:
|
|
179
|
+
extract_index = results_df.index
|
|
180
|
+
|
|
181
|
+
sumstats = get_sumstats(sumstats_path)
|
|
182
|
+
|
|
183
|
+
to_use_sumstats = sumstats.loc[sumstats["SNPID"].isin(extract_index.values),["SNPID","EA","NEA","BETA","N","SE","EAF"]]
|
|
184
|
+
|
|
185
|
+
if len(to_use_sumstats)>0:
|
|
186
|
+
n_pre_dup = len(to_use_sumstats)
|
|
187
|
+
log.write(" -Processing {} variants from sumstats #{}".format(len(to_use_sumstats), index))
|
|
188
|
+
|
|
189
|
+
to_use_sumstats = to_use_sumstats.drop_duplicates(subset="SNPID").set_index("SNPID")
|
|
190
|
+
n_post_dup = len(to_use_sumstats)
|
|
191
|
+
|
|
192
|
+
if n_pre_dup - n_post_dup>0:
|
|
193
|
+
log.write(" -Dropping {} duplicated variants from sumstats #{}".format(n_pre_dup - n_post_dup, index))
|
|
194
|
+
|
|
195
|
+
if match_allele==True:
|
|
196
|
+
sumstats_index = to_use_sumstats.index
|
|
197
|
+
# drop not matched
|
|
198
|
+
is_match = (to_use_sumstats.loc[sumstats_index,"EA"] == results_df.loc[sumstats_index, "EA"] )&(to_use_sumstats.loc[sumstats_index,"NEA"] == results_df.loc[sumstats_index, "NEA"])
|
|
199
|
+
is_flip = (to_use_sumstats.loc[sumstats_index,"EA"] == results_df.loc[sumstats_index, "NEA"])&( to_use_sumstats.loc[sumstats_index,"NEA"] == results_df.loc[sumstats_index, "EA"])
|
|
200
|
+
is_flip = is_flip | ((to_use_sumstats.loc[sumstats_index,"NEA"] == results_df.loc[sumstats_index, "EA"])&( to_use_sumstats.loc[sumstats_index,"EA"] == results_df.loc[sumstats_index, "NEA"]))
|
|
201
|
+
is_to_use = is_match|is_flip
|
|
202
|
+
|
|
203
|
+
if sum(~is_to_use)>0:
|
|
204
|
+
log.write(" -Dropping {} variants with unmatched alleles from sumstats #{}".format(sum(~is_to_use), index))
|
|
205
|
+
|
|
206
|
+
to_use_sumstats.loc[is_flip[is_flip].index, "BETA"] = -to_use_sumstats.loc[is_flip[is_flip].index, "BETA"]
|
|
207
|
+
to_use_sumstats.loc[is_flip[is_flip].index, "EAF"] = 1-to_use_sumstats.loc[is_flip[is_flip].index, "EAF"]
|
|
208
|
+
to_use_sumstats = to_use_sumstats.loc[is_to_use[is_to_use].index,:]
|
|
209
|
+
|
|
210
|
+
gc.collect()
|
|
211
|
+
|
|
212
|
+
return to_use_sumstats
|
|
213
|
+
|
|
214
|
+
def get_sumstats(input_path,usekeys=None):
|
|
215
|
+
if isinstance(input_path, tuple):
|
|
216
|
+
path = input_path[0]
|
|
217
|
+
path_args = input_path[1]
|
|
218
|
+
else:
|
|
219
|
+
path = input_path
|
|
220
|
+
path_args={}
|
|
221
|
+
|
|
222
|
+
if isinstance(path, pd.DataFrame):
|
|
223
|
+
sumstats = Sumstats(path,fmt="auto",verbose=False,usekeys=usekeys,**path_args).data
|
|
224
|
+
elif isinstance(path, Sumstats):
|
|
225
|
+
sumstats = path.data
|
|
226
|
+
if usekeys is not None:
|
|
227
|
+
sumstats = sumstats[usekeys]
|
|
228
|
+
elif path[-6:] == "pickle":
|
|
229
|
+
sumstats = load_data_from_pickle(path)
|
|
230
|
+
if usekeys is not None:
|
|
231
|
+
sumstats = sumstats[usekeys]
|
|
232
|
+
else:
|
|
233
|
+
sumstats = Sumstats(path,fmt="auto",verbose=False,usekeys=usekeys,**path_args).data
|
|
234
|
+
return sumstats
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
def snphwe(obs_hets, obs_hom1, obs_hom2):
|
|
6
|
+
# Convert cpp code from (Jeremy McRae) to python
|
|
7
|
+
# https://github.com/jeremymcrae/snphwe/blob/master/src/snp_hwe.cpp
|
|
8
|
+
#/* (original comments)
|
|
9
|
+
#// This code implements an exact SNP test of Hardy-Weinberg Equilibrium as
|
|
10
|
+
#// described in Wigginton, JE, Cutler, DJ, and Abecasis, GR (2005) A Note on
|
|
11
|
+
#// Exact Tests of Hardy-Weinberg Equilibrium. AJHG 76: 887-893
|
|
12
|
+
#//
|
|
13
|
+
#// Written by Jan Wigginton
|
|
14
|
+
#*/
|
|
15
|
+
|
|
16
|
+
obs_homr = min(obs_hom1, obs_hom2)
|
|
17
|
+
obs_homc = max(obs_hom1, obs_hom2)
|
|
18
|
+
|
|
19
|
+
rare = 2 * obs_homr + obs_hets
|
|
20
|
+
genotypes = obs_hets + obs_homc + obs_homr
|
|
21
|
+
|
|
22
|
+
probs = np.array([0.0 for i in range(rare +1)])
|
|
23
|
+
|
|
24
|
+
mid = rare * (2 * genotypes - rare) // (2 * genotypes)
|
|
25
|
+
|
|
26
|
+
if mid % 2 != rare%2:
|
|
27
|
+
mid += 1
|
|
28
|
+
|
|
29
|
+
probs[mid] = 1.0
|
|
30
|
+
|
|
31
|
+
sum_p = 1 #probs[mid]
|
|
32
|
+
curr_homr = (rare - mid) // 2
|
|
33
|
+
curr_homc = genotypes - mid - curr_homr
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
for curr_hets in range(mid, 1, -2):
|
|
37
|
+
probs[curr_hets - 2] = probs[curr_hets] * curr_hets * (curr_hets - 1.0)/ (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0))
|
|
38
|
+
sum_p+= probs[curr_hets - 2]
|
|
39
|
+
curr_homr += 1
|
|
40
|
+
curr_homc += 1
|
|
41
|
+
|
|
42
|
+
curr_homr = (rare - mid) // 2
|
|
43
|
+
curr_homc = genotypes - mid - curr_homr
|
|
44
|
+
|
|
45
|
+
for curr_hets in range(mid, rare-1, 2):
|
|
46
|
+
probs[curr_hets + 2] = probs[curr_hets] * 4.0 * curr_homr * curr_homc/ ((curr_hets + 2.0) * (curr_hets + 1.0))
|
|
47
|
+
sum_p += probs[curr_hets + 2]
|
|
48
|
+
curr_homr -= 1
|
|
49
|
+
curr_homc -= 1
|
|
50
|
+
|
|
51
|
+
target = probs[obs_hets]
|
|
52
|
+
p_hwe = 0.0
|
|
53
|
+
|
|
54
|
+
for p in probs:
|
|
55
|
+
if p <= target :
|
|
56
|
+
p_hwe += p / sum_p
|
|
57
|
+
|
|
58
|
+
return min(p_hwe,1)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from gwaslab.g_Log import Log
|
|
5
|
+
|
|
6
|
+
#STATE NO. MNEMONIC DESCRIPTION COLOR NAME COLOR CODE
|
|
7
|
+
#1 TssA Active TSS Red 255,0,0
|
|
8
|
+
#2 TssAFlnk Flanking Active TSS Orange Red 255,69,0
|
|
9
|
+
#3 TxFlnk Transcr. at gene 5' and 3' LimeGreen 50,205,50
|
|
10
|
+
#4 Tx Strong transcription Green 0,128,0
|
|
11
|
+
#5 TxWk Weak transcription DarkGreen 0,100,0
|
|
12
|
+
#6 EnhG Genic enhancers GreenYellow 194,225,5
|
|
13
|
+
#7 Enh Enhancers Yellow 255,255,0
|
|
14
|
+
#8 ZNF/Rpts ZNF genes & repeats Medium Aquamarine 102,205,170
|
|
15
|
+
#9 Het Heterochromatin PaleTurquoise 138,145,208
|
|
16
|
+
#10 TssBiv Bivalent/Poised TSS IndianRed 205,92,92
|
|
17
|
+
#11 BivFlnk Flanking Bivalent TSS/Enh DarkSalmon 233,150,122
|
|
18
|
+
#12 EnhBiv Bivalent Enhancer DarkKhaki 189,183,107
|
|
19
|
+
#13 ReprPC Repressed PolyComb Silver 128,128,128
|
|
20
|
+
#14 ReprPCWk Weak Repressed PolyComb Gainsboro 192,192,192
|
|
21
|
+
#15 Quies Quiescent/Low White 255,255,255
|
|
22
|
+
|
|
23
|
+
color_dict={
|
|
24
|
+
"E1": np.array([255,0,0]),
|
|
25
|
+
"E2": np.array([255,69,0]),
|
|
26
|
+
"E3": np.array([50,205,50]),
|
|
27
|
+
"E4": np.array([0,128,0]),
|
|
28
|
+
"E5": np.array([0,100,0]),
|
|
29
|
+
"E6": np.array([194,225,5]),
|
|
30
|
+
"E7": np.array([255,255,0]),
|
|
31
|
+
"E8": np.array([102,205,170]),
|
|
32
|
+
"E9": np.array([138,145,208]),
|
|
33
|
+
"E10":np.array([205,92,92]),
|
|
34
|
+
"E11":np.array([233,150,122]),
|
|
35
|
+
"E12":np.array([189,183,107]),
|
|
36
|
+
"E13":np.array([128,128,128]),
|
|
37
|
+
"E14":np.array([192,192,192]),
|
|
38
|
+
"E15":np.array([255,255,255])
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
color_dict_i={
|
|
42
|
+
1: np.array([255,0,0]),
|
|
43
|
+
2: np.array([255,69,0]),
|
|
44
|
+
3: np.array([50,205,50]),
|
|
45
|
+
4: np.array([0,128,0]),
|
|
46
|
+
5: np.array([0,100,0]),
|
|
47
|
+
6: np.array([194,225,5]),
|
|
48
|
+
7: np.array([255,255,0]),
|
|
49
|
+
8: np.array([102,205,170]),
|
|
50
|
+
9: np.array([138,145,208]),
|
|
51
|
+
10:np.array([205,92,92]),
|
|
52
|
+
11:np.array([233,150,122]),
|
|
53
|
+
12:np.array([189,183,107]),
|
|
54
|
+
13:np.array([128,128,128]),
|
|
55
|
+
14:np.array([192,192,192]),
|
|
56
|
+
15:np.array([255,255,255])
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _plot_chromatin_state(region_chromatin_files,
|
|
61
|
+
region_chromatin_labels,
|
|
62
|
+
region,
|
|
63
|
+
fig,
|
|
64
|
+
ax,
|
|
65
|
+
xlim_i,
|
|
66
|
+
log=Log(),
|
|
67
|
+
verbose=True):
|
|
68
|
+
'''
|
|
69
|
+
files : a list of numbers
|
|
70
|
+
'''
|
|
71
|
+
target_chr = region[0]
|
|
72
|
+
target_start = region[1]
|
|
73
|
+
target_end = region[2]
|
|
74
|
+
|
|
75
|
+
offset_i = xlim_i[0] - region[1]
|
|
76
|
+
|
|
77
|
+
ax.set_ylim([-0.05,0.1*len(region_chromatin_files)-0.05])
|
|
78
|
+
ax.set_xlim([offset_i+target_start,offset_i+target_end])
|
|
79
|
+
|
|
80
|
+
px_for_01 = ax.transData.transform([0,0])[1] - ax.transData.transform([0,0.1])[1]
|
|
81
|
+
|
|
82
|
+
point=fig.dpi/72
|
|
83
|
+
points_for_01 = px_for_01*72 / fig.dpi
|
|
84
|
+
|
|
85
|
+
# each tissue
|
|
86
|
+
for i,file in enumerate(region_chromatin_files):
|
|
87
|
+
log.write(" -Loading : {}".format(file), verbose=verbose)
|
|
88
|
+
enh = pd.read_csv(file,sep="\t",header=None)
|
|
89
|
+
enh.columns=["ID","START","END","STATE"]
|
|
90
|
+
enh["CHR"] = enh["ID"].str.extract(r"chr([0-9]+)").astype("float").astype("Int64")
|
|
91
|
+
enh["STATE_i"] = enh["STATE"].str.extract(r"([0-9]+)_*").astype("float").astype("Int64")
|
|
92
|
+
enh_in_region = (enh["CHR"] == target_chr) & ((enh["END"] > target_start) & (enh["START"]<target_end))
|
|
93
|
+
df =enh.loc[enh_in_region,["STATE_i","START","END"]].sort_values("STATE_i",ascending=False)
|
|
94
|
+
log.write(" -Number of records in specified region: {}".format(len(df)), verbose=verbose)
|
|
95
|
+
# each block
|
|
96
|
+
for index, row in df.iterrows():
|
|
97
|
+
color=color_dict_i[row["STATE_i"]]
|
|
98
|
+
ax.plot([offset_i + row["START"] ,offset_i + row["END"]],
|
|
99
|
+
[i*0.1,i*0.1],
|
|
100
|
+
c=color/255,linewidth=points_for_01,solid_capstyle="butt")
|
|
101
|
+
|
|
102
|
+
## add stripe label
|
|
103
|
+
if len(region_chromatin_labels) == len(region_chromatin_files):
|
|
104
|
+
ax.set_yticks([i*0.1 for i in range(len(region_chromatin_labels))],
|
|
105
|
+
region_chromatin_labels)
|
|
106
|
+
else:
|
|
107
|
+
ax.set_yticks(ticks=[])
|
|
108
|
+
|
|
109
|
+
ax.set_xticks(ticks=[])
|
|
110
|
+
ax.invert_yaxis()
|
|
111
|
+
return fig
|
|
@@ -75,7 +75,10 @@ def compare_effect(path1,
|
|
|
75
75
|
if scaled == True:
|
|
76
76
|
scaled1 = True
|
|
77
77
|
scaled2 = True
|
|
78
|
-
|
|
78
|
+
if is_q_mc=="fdr" or is_q_mc=="bon":
|
|
79
|
+
is_q = True
|
|
80
|
+
else:
|
|
81
|
+
raise ValueError("Please select either fdr or bon for is_q_mc.")
|
|
79
82
|
if save_args is None:
|
|
80
83
|
save_args = {"dpi":300,"facecolor":"white"}
|
|
81
84
|
if reg_box is None:
|
gwaslab/viz_plot_mqqplot.py
CHANGED
|
@@ -133,6 +133,7 @@ def mqqplot(insumstats,
|
|
|
133
133
|
anno_style="right",
|
|
134
134
|
anno_fixed_arm_length=None,
|
|
135
135
|
anno_source = "ensembl",
|
|
136
|
+
anno_gtf_path=None,
|
|
136
137
|
anno_adjust=False,
|
|
137
138
|
anno_max_iter=100,
|
|
138
139
|
arm_offset=50,
|
|
@@ -816,6 +817,7 @@ def mqqplot(insumstats,
|
|
|
816
817
|
log=log,
|
|
817
818
|
build=build,
|
|
818
819
|
source=anno_source,
|
|
820
|
+
gtf_path=anno_gtf_path,
|
|
819
821
|
verbose=verbose).rename(columns={"GENE":"Annotation"})
|
|
820
822
|
log.write("Finished extracting variants for annotation...",verbose=verbose)
|
|
821
823
|
|
gwaslab/viz_plot_regionalplot.py
CHANGED
|
@@ -365,6 +365,10 @@ def _plot_recombination_rate(sumstats,pos, region, ax1, rr_path, rr_chr_dict, r
|
|
|
365
365
|
|
|
366
366
|
rc = rc.loc[(rc["Position(bp)"]<region[2]) & (rc["Position(bp)"]>region[1]),:]
|
|
367
367
|
ax4.plot(rc_track_offset+rc["Position(bp)"],rc["Rate(cM/Mb)"],color="#5858FF",zorder=1)
|
|
368
|
+
|
|
369
|
+
ax1.set_zorder(ax4.get_zorder()+1)
|
|
370
|
+
ax1.patch.set_visible(False)
|
|
371
|
+
|
|
368
372
|
if rr_ylabel:
|
|
369
373
|
ax4.set_ylabel("Recombination rate(cM/Mb)")
|
|
370
374
|
if rr_lim!="max":
|
|
@@ -22,6 +22,7 @@ from gwaslab.bd_common_data import get_number_to_chr
|
|
|
22
22
|
from gwaslab.bd_common_data import get_recombination_rate
|
|
23
23
|
from gwaslab.bd_common_data import get_gtf
|
|
24
24
|
from gwaslab.viz_aux_reposition_text import adjust_text_position
|
|
25
|
+
from gwaslab.viz_aux_chromatin import _plot_chromatin_state
|
|
25
26
|
from gwaslab.viz_aux_quickfix import _quick_fix
|
|
26
27
|
from gwaslab.viz_aux_quickfix import _get_largenumber
|
|
27
28
|
from gwaslab.viz_aux_quickfix import _quick_add_tchrpos
|
|
@@ -37,15 +38,20 @@ from gwaslab.io_to_pickle import load_data_from_pickle
|
|
|
37
38
|
from gwaslab.g_Sumstats import Sumstats
|
|
38
39
|
from gwaslab.viz_aux_save_figure import save_figure
|
|
39
40
|
from gwaslab.viz_plot_mqqplot import mqqplot
|
|
41
|
+
import matplotlib.patches as patches
|
|
40
42
|
|
|
41
43
|
def plot_stacked_mqq(objects,
|
|
42
44
|
vcfs=None,
|
|
43
45
|
mode="r",
|
|
44
46
|
mqqratio=3,
|
|
45
47
|
region=None,
|
|
48
|
+
region_chromatin_height=0.1,
|
|
49
|
+
region_chromatin_files = None,
|
|
50
|
+
region_chromatin_labels= None,
|
|
46
51
|
titles= None,
|
|
47
52
|
title_pos=None,
|
|
48
53
|
title_args=None,
|
|
54
|
+
#title_box = None,
|
|
49
55
|
gtf=None,
|
|
50
56
|
gene_track_height=0.5,
|
|
51
57
|
fig_args=None,
|
|
@@ -72,11 +78,11 @@ def plot_stacked_mqq(objects,
|
|
|
72
78
|
fig_args = {"dpi":200}
|
|
73
79
|
if region_lead_grid_line is None:
|
|
74
80
|
region_lead_grid_line = {"alpha":0.5,"linewidth" : 2,"linestyle":"--","color":"#FF0000"}
|
|
75
|
-
if
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
81
|
+
if region_chromatin_files is None:
|
|
82
|
+
region_chromatin_files = []
|
|
83
|
+
region_chromatin_height = len(region_chromatin_files) * region_chromatin_height
|
|
84
|
+
if region_chromatin_labels is None:
|
|
85
|
+
region_chromatin_labels = []
|
|
80
86
|
# create figure and axes ##################################################################################################################
|
|
81
87
|
if mode=="r":
|
|
82
88
|
if len(vcfs)==1:
|
|
@@ -84,9 +90,15 @@ def plot_stacked_mqq(objects,
|
|
|
84
90
|
n_plot = len(sumstats_list)
|
|
85
91
|
n_plot_plus_gene_track = n_plot + 1
|
|
86
92
|
|
|
93
|
+
if len(region_chromatin_files)>0 and mode=="r":
|
|
94
|
+
height_ratios = [1 for i in range(n_plot_plus_gene_track-1)]+[region_chromatin_height]+[gene_track_height]
|
|
95
|
+
n_plot_plus_gene_track +=1
|
|
96
|
+
else:
|
|
97
|
+
height_ratios = [1 for i in range(n_plot_plus_gene_track-1)]+[gene_track_height]
|
|
98
|
+
|
|
87
99
|
fig_args["figsize"] = [16,subplot_height*n_plot_plus_gene_track]
|
|
88
100
|
fig, axes = plt.subplots(n_plot_plus_gene_track, 1, sharex=True,
|
|
89
|
-
gridspec_kw={'height_ratios':
|
|
101
|
+
gridspec_kw={'height_ratios': height_ratios},
|
|
90
102
|
**fig_args)
|
|
91
103
|
plt.subplots_adjust(hspace=region_hspace)
|
|
92
104
|
elif mode=="m":
|
|
@@ -179,18 +191,58 @@ def plot_stacked_mqq(objects,
|
|
|
179
191
|
**mqq_args_for_each_plot[index]
|
|
180
192
|
)
|
|
181
193
|
lead_variants_is[index] = (lead_i,lead_i2)
|
|
182
|
-
|
|
194
|
+
|
|
195
|
+
if len(region_chromatin_files)>0 and mode=="r":
|
|
196
|
+
xlim_i = axes[-1].get_xlim()
|
|
197
|
+
fig = _plot_chromatin_state( region_chromatin_files = region_chromatin_files,
|
|
198
|
+
region_chromatin_labels = region_chromatin_labels,
|
|
199
|
+
region = region,
|
|
200
|
+
fig = fig,
|
|
201
|
+
ax = axes[-2],
|
|
202
|
+
xlim_i=xlim_i,
|
|
203
|
+
log=log,
|
|
204
|
+
verbose=verbose)
|
|
183
205
|
# adjust labels
|
|
184
206
|
# drop labels for each plot
|
|
185
207
|
# set a common laebl for all plots
|
|
186
208
|
|
|
187
|
-
|
|
188
|
-
|
|
209
|
+
#if title_box is None:
|
|
210
|
+
# title_box = dict(boxstyle='square', facecolor='white', alpha=1.0, edgecolor="black")
|
|
211
|
+
# title_box = {}
|
|
212
|
+
|
|
213
|
+
if title_args is None:
|
|
214
|
+
title_args = {}
|
|
215
|
+
if titles is not None and mode=="r":
|
|
216
|
+
if title_pos is None:
|
|
217
|
+
title_pos = [0.01,0.01]
|
|
189
218
|
for index,title in enumerate(titles):
|
|
190
|
-
|
|
219
|
+
|
|
220
|
+
current_text = axes[index].text(title_pos[0], title_pos[1] , title, transform=axes[index].transAxes,ha="left", va='bottom',zorder=999999, **title_args)
|
|
221
|
+
r = fig.canvas.get_renderer()
|
|
222
|
+
bb = current_text.get_window_extent(renderer=r).transformed(axes[index].transAxes.inverted())
|
|
223
|
+
width = bb.width
|
|
224
|
+
height = bb.height
|
|
225
|
+
|
|
226
|
+
rect = patches.Rectangle((0.0,0.0),
|
|
227
|
+
height=height + 0.02*2,
|
|
228
|
+
width=width + 0.01*2,
|
|
229
|
+
transform=axes[index].transAxes,
|
|
230
|
+
linewidth=1,
|
|
231
|
+
edgecolor='black',
|
|
232
|
+
facecolor='white',
|
|
233
|
+
alpha=1.0,
|
|
234
|
+
zorder=99998)
|
|
235
|
+
axes[index].add_patch(rect)
|
|
236
|
+
rect.set(zorder=99998)
|
|
237
|
+
else:
|
|
238
|
+
if title_pos is None:
|
|
239
|
+
title_pos = [0.01,0.97]
|
|
240
|
+
for index,title in enumerate(titles):
|
|
241
|
+
axes[index].text(title_pos[0], title_pos[1] , title, transform=axes[index].transAxes,ha="left", va='top',zorder=999999, **title_args)
|
|
242
|
+
|
|
191
243
|
##########################################################################################################################################
|
|
192
244
|
# draw the line for lead variants
|
|
193
|
-
_draw_grid_line_for_lead_variants(mode, lead_variants_is, n_plot, axes, region_lead_grid_line)
|
|
245
|
+
_draw_grid_line_for_lead_variants(mode, lead_variants_is, n_plot, axes, region_lead_grid_line,region_chromatin_files)
|
|
194
246
|
|
|
195
247
|
##########################################################################################################################################
|
|
196
248
|
_drop_old_y_labels(axes, n_plot)
|
|
@@ -208,12 +260,16 @@ def _drop_old_y_labels(axes, n_plot):
|
|
|
208
260
|
for index in range(n_plot):
|
|
209
261
|
axes[index].set_ylabel("")
|
|
210
262
|
|
|
211
|
-
def _draw_grid_line_for_lead_variants(mode, lead_variants_is, n_plot, axes, region_lead_grid_line):
|
|
263
|
+
def _draw_grid_line_for_lead_variants(mode, lead_variants_is, n_plot, axes, region_lead_grid_line,region_chromatin_files):
|
|
264
|
+
if len(region_chromatin_files)>0:
|
|
265
|
+
n_plot_and_track = n_plot+2
|
|
266
|
+
else:
|
|
267
|
+
n_plot_and_track = n_plot+1
|
|
212
268
|
if mode=="r":
|
|
213
269
|
for index, sig_is in lead_variants_is.items():
|
|
214
270
|
for sig_i in sig_is:
|
|
215
271
|
if sig_i is not None:
|
|
216
|
-
for each_axis_index in range(
|
|
272
|
+
for each_axis_index in range(n_plot_and_track):
|
|
217
273
|
axes[each_axis_index].axvline(x=sig_i, zorder=2,**region_lead_grid_line)
|
|
218
274
|
|
|
219
275
|
def _add_new_y_label(mode, fig, gene_track_height,n_plot,subplot_height ):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: gwaslab
|
|
3
|
-
Version: 3.4.
|
|
3
|
+
Version: 3.4.46
|
|
4
4
|
Summary: A collection of handy tools for GWAS SumStats
|
|
5
5
|
Author-email: Yunye <yunye@gwaslab.com>
|
|
6
6
|
Project-URL: Homepage, https://cloufield.github.io/gwaslab/
|
|
@@ -8,16 +8,16 @@ Project-URL: Github, https://github.com/Cloufield/gwaslab
|
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python:
|
|
11
|
+
Requires-Python: <3.11,>=3.9
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
License-File: LICENSE_before_v3.4.39
|
|
15
15
|
Requires-Dist: pandas !=1.5,>=1.3
|
|
16
|
-
Requires-Dist: numpy
|
|
17
|
-
Requires-Dist: matplotlib !=3.7.2,>=3.5
|
|
16
|
+
Requires-Dist: numpy <2,>=1.21.2
|
|
17
|
+
Requires-Dist: matplotlib !=3.7.2,<3.9,>=3.5
|
|
18
18
|
Requires-Dist: seaborn >=0.12
|
|
19
19
|
Requires-Dist: scipy >=1.12
|
|
20
|
-
Requires-Dist: pySAM
|
|
20
|
+
Requires-Dist: pySAM ==0.22.1
|
|
21
21
|
Requires-Dist: Biopython >=1.79
|
|
22
22
|
Requires-Dist: adjustText <=0.8,>=0.7.3
|
|
23
23
|
Requires-Dist: liftover >=1.1.13
|
|
@@ -51,7 +51,7 @@ Warning: Known issues of GWASLab are summarized in [https://cloufield.github.io/
|
|
|
51
51
|
### install via pip
|
|
52
52
|
|
|
53
53
|
```
|
|
54
|
-
pip install gwaslab==3.4.
|
|
54
|
+
pip install gwaslab==3.4.45
|
|
55
55
|
```
|
|
56
56
|
|
|
57
57
|
```python
|
|
@@ -90,7 +90,7 @@ Create a Python 3.9 environment and install gwaslab using pip:
|
|
|
90
90
|
```
|
|
91
91
|
conda env create -n gwaslab_test -c conda-forge python=3.9
|
|
92
92
|
conda activate gwaslab
|
|
93
|
-
pip install gwaslab==3.4.
|
|
93
|
+
pip install gwaslab==3.4.45
|
|
94
94
|
```
|
|
95
95
|
|
|
96
96
|
or create a new environment using yml file [environment_3.4.40.yml](https://github.com/Cloufield/gwaslab/blob/main/environment_3.4.40.yml)
|
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
gwaslab/__init__.py,sha256=
|
|
2
|
-
gwaslab/bd_common_data.py,sha256=
|
|
1
|
+
gwaslab/__init__.py,sha256=7TKJaODdpeuQKibL7gIEa4MtyQ0pmrU-vIHQ-Et27lQ,2433
|
|
2
|
+
gwaslab/bd_common_data.py,sha256=qr6OMbBaTH2Smfu8347SO9NmF410tn8dq8pRGF5-OpY,13751
|
|
3
3
|
gwaslab/bd_config.py,sha256=TP-r-DPhJD3XnRYZbw9bQHXaDIkiRgK8bG9HCt-UaLc,580
|
|
4
4
|
gwaslab/bd_download.py,sha256=cDDk2C5IvjeAzvPvVYGTkI4Ss33DUtEDjGo8eAbQRvY,15663
|
|
5
5
|
gwaslab/bd_get_hapmap3.py,sha256=asNjQYeGfQi8u3jnfenRvDdKMs5ptql5wpcUzqMlwUI,3937
|
|
6
6
|
gwaslab/cache_manager.py,sha256=HOTnSkCOyGEPLRl90WT8D_6pAdI8d8AzenMIDGuCeWc,28113
|
|
7
7
|
gwaslab/g_Log.py,sha256=C3Zv-_6c3C9ms8bgQ-ytplz22sjk7euqXYkWr9zNeAs,1573
|
|
8
8
|
gwaslab/g_Phenotypes.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
gwaslab/g_Sumstats.py,sha256=
|
|
10
|
-
gwaslab/g_SumstatsPair.py,sha256=
|
|
9
|
+
gwaslab/g_Sumstats.py,sha256=TUcFQFyODS_-FYMdXDvrBijG4Qtfi1igIWM-eEgb0nc,35352
|
|
10
|
+
gwaslab/g_SumstatsPair.py,sha256=20snPb4SlI6ftMGVjgxAuyxsxYRQF-GzzlBSnoB-3Lo,8851
|
|
11
11
|
gwaslab/g_SumstatsT.py,sha256=u_DighLMnMxwTLnqm-B58pA0G6WXRj6pudPyKMVKjSU,2133
|
|
12
12
|
gwaslab/g_Sumstats_summary.py,sha256=FECvvFXJVKaCX5dggBvvk9YvJ6AbdbcLfjltysX7wEE,6380
|
|
13
13
|
gwaslab/g_meta.py,sha256=htWlgURWclm9R6UqFcX1a93WN27xny7lGUeyJZOtszQ,2583
|
|
14
|
-
gwaslab/g_vchange_status.py,sha256=
|
|
15
|
-
gwaslab/g_version.py,sha256=
|
|
14
|
+
gwaslab/g_vchange_status.py,sha256=jLoVzMJFhB5k_cJKzHuBNc2HZGBWydAunCNa0n_d54g,1923
|
|
15
|
+
gwaslab/g_version.py,sha256=g2bR-qFeFvLADj57VXMT5dufwba2YGD86hfDHRYURfU,1818
|
|
16
16
|
gwaslab/hm_casting.py,sha256=FqP4EQl83Q2OKLw004OgLIvUH795TVCGwziLk5jsHqY,11368
|
|
17
|
-
gwaslab/hm_harmonize_sumstats.py,sha256=
|
|
17
|
+
gwaslab/hm_harmonize_sumstats.py,sha256=1hjUdle2DSKHGBp2BktfFqf-QHU_q2xWl_mPhiYc_ZA,78616
|
|
18
18
|
gwaslab/hm_rsid_to_chrpos.py,sha256=ODWREO0jPN0RAfNzL5fRzSRANfhiksOvUVPuEsFZQqA,6552
|
|
19
|
-
gwaslab/io_preformat_input.py,sha256=
|
|
19
|
+
gwaslab/io_preformat_input.py,sha256=AZ43WGqVTzbo3XtClWhjRjsj6pBR9stw6JBL_TZ461U,20673
|
|
20
20
|
gwaslab/io_read_ldsc.py,sha256=8S9n4imgl4d0WPms_GYld-6uUM5z7iWGiCA-M814kzY,12123
|
|
21
21
|
gwaslab/io_read_tabular.py,sha256=EG-C6KhCutt4J4LlOMgXnqzJvU-EZXzVhMvaDFnHrMM,2380
|
|
22
22
|
gwaslab/io_to_formats.py,sha256=QuGWdvnAamaZAuhymj-0SuNBaKz1maTTyH396gvVaO8,29229
|
|
@@ -28,7 +28,7 @@ gwaslab/ldsc_parse.py,sha256=MBnfgcWlV4oHp9MoDRh1mpilaHhAR15Af77hMFn4-5k,10564
|
|
|
28
28
|
gwaslab/ldsc_regressions.py,sha256=yzbGjgNV7u-SWXNPsh9S8y9mK97Bim_Nmad9G9V18ZU,30078
|
|
29
29
|
gwaslab/ldsc_sumstats.py,sha256=O0olsDxKlh1MJ1gAuEN1t40rxhajOEwOQ20ak7xoDrI,26245
|
|
30
30
|
gwaslab/qc_check_datatype.py,sha256=kW68uk4dTLOU2b1dHoVat6n0loundDysAjIqxsXW28Q,3379
|
|
31
|
-
gwaslab/qc_fix_sumstats.py,sha256
|
|
31
|
+
gwaslab/qc_fix_sumstats.py,sha256=-DQz5dPW6YXXVP-LV2Txa4lJrpZHhqAoKNny6IYAW18,93100
|
|
32
32
|
gwaslab/run_script.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
33
|
gwaslab/util_ex_calculate_ldmatrix.py,sha256=LpE__LoYRHLgVKlCHo6lYWlz9LEUVUDqYPEAP-Svbm0,14598
|
|
34
34
|
gwaslab/util_ex_calculate_prs.py,sha256=5l1eiZs8YwIpEgp7i3IurP8n5KwQM5awbG9fWSm4iT4,9053
|
|
@@ -47,23 +47,26 @@ gwaslab/util_in_calculate_power.py,sha256=JfHJFg3tNF0f4NHgWlzVW2mSxCiP07mAHIyEfV
|
|
|
47
47
|
gwaslab/util_in_convert_h2.py,sha256=a8Cbudt3xn9WP2bPc-7ysuowB-LYub8j8GeDXl7Lk7Q,6483
|
|
48
48
|
gwaslab/util_in_correct_winnerscurse.py,sha256=Gp--yAQ8MMzdkWIvXP9C1BHVjZc-YzqHfYWhAj19w9w,2110
|
|
49
49
|
gwaslab/util_in_fill_data.py,sha256=gdTwYA6FvBMnrtxAeL0lEj_Z0aGIoRNPScWDlJvZWeQ,14021
|
|
50
|
-
gwaslab/util_in_filter_value.py,sha256=
|
|
50
|
+
gwaslab/util_in_filter_value.py,sha256=dY4X66N9A4MHCRHjPqLYFufMM91ggLRwUBf_nJYh8Lg,23605
|
|
51
51
|
gwaslab/util_in_get_density.py,sha256=kpKXH69acMkeYVG5vs-VbJC3COhmuLBfYco-wuOxgjc,3934
|
|
52
|
-
gwaslab/util_in_get_sig.py,sha256=
|
|
52
|
+
gwaslab/util_in_get_sig.py,sha256=9kq1GXacknO2YnVmsTli1GlPA728ASweTZ3UKm3Wszo,38783
|
|
53
|
+
gwaslab/util_in_meta.py,sha256=5K9lIZcIgUy0AERqHy1GvMN2X6dp45JUUgopuDLgt4o,11284
|
|
54
|
+
gwaslab/util_in_snphwe.py,sha256=-KpIDx6vn_nah6H55IkV2OyjXQVXV13XyBL069WE1wM,1751
|
|
53
55
|
gwaslab/viz_aux_annotate_plot.py,sha256=R-1GT89E4NEBAMNTYzNawdi9rjQV5LCnODgnYOOKsys,32184
|
|
56
|
+
gwaslab/viz_aux_chromatin.py,sha256=NzbFFpbwAMH-39F8z0qJaExw-JcKYcAlzyzbMkvFo5M,4002
|
|
54
57
|
gwaslab/viz_aux_quickfix.py,sha256=Z6ZNEAUFuWVDTzH-qGreNGxPxJLCmqhXtBrvDOgo4g8,18308
|
|
55
58
|
gwaslab/viz_aux_reposition_text.py,sha256=iRIP-Rkltlei068HekJcVubiqPrunBqvAoSQ1eHk04M,4304
|
|
56
59
|
gwaslab/viz_aux_save_figure.py,sha256=nL-aoE8Kg06h7FgleGRBIZjhI-6w5gpn3E1HWMwBig8,2664
|
|
57
60
|
gwaslab/viz_plot_compare_af.py,sha256=qtXW45-Sq_ugK8ZfqBYMpmf58SKi3lB3YyHnzn_akcE,5344
|
|
58
|
-
gwaslab/viz_plot_compare_effect.py,sha256=
|
|
61
|
+
gwaslab/viz_plot_compare_effect.py,sha256=iA74jMzh-G65U6BeXyQro08tPlJWpNyvtrjFsYHLvFM,49505
|
|
59
62
|
gwaslab/viz_plot_forestplot.py,sha256=xgOnefh737CgdQxu5naVyRNBX1NQXPFKzf51fbh6afs,6771
|
|
60
63
|
gwaslab/viz_plot_miamiplot.py,sha256=rCFEp7VNuVqeBBG3WRkmFAtFklbF79BvIQQYiSY70VY,31238
|
|
61
64
|
gwaslab/viz_plot_miamiplot2.py,sha256=SWv82D8UBbREKsk8EoKth-2w68l6FbXyVLsb_E1hh8o,15882
|
|
62
|
-
gwaslab/viz_plot_mqqplot.py,sha256=
|
|
65
|
+
gwaslab/viz_plot_mqqplot.py,sha256=oVFiLe6Xv_-ryY8I36tGjU9StjM_ust72YSSfnZgaUg,61828
|
|
63
66
|
gwaslab/viz_plot_qqplot.py,sha256=psQgVpP29686CEZkzQz0iRbApzqy7aE3GGiBcazVvNw,7247
|
|
64
|
-
gwaslab/viz_plot_regionalplot.py,sha256=
|
|
67
|
+
gwaslab/viz_plot_regionalplot.py,sha256=8u-5-yfy-UaXhaxVVz3Y5k2kBAoqzczUw1hyyD450iI,37983
|
|
65
68
|
gwaslab/viz_plot_rg_heatmap.py,sha256=PidUsgOiEVt6MfBPCF3_yDhOEytZ-I1q-ZD6_0pFrV4,13713
|
|
66
|
-
gwaslab/viz_plot_stackedregional.py,sha256=
|
|
69
|
+
gwaslab/viz_plot_stackedregional.py,sha256=yWxAJyqA3Kv9PUFTLzgbAu_hP7wi3_3bVVQlJECm8Gc,14687
|
|
67
70
|
gwaslab/viz_plot_trumpetplot.py,sha256=ZHdc6WcVx0-oKoj88yglRkmB4bS9pOiEMcuwKW35Yvo,42672
|
|
68
71
|
gwaslab/data/formatbook.json,sha256=N2nJs80HH98Rsu9FxaSvIQO9J5yIV97WEtAKjRqYwiY,38207
|
|
69
72
|
gwaslab/data/reference.json,sha256=k8AvvgDsuLxzv-NCJHWvTUZ5q_DLAFxs1Th3jtL313k,11441
|
|
@@ -73,9 +76,9 @@ gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz,sha256=qD9RsC5S2h6l-OdpW
|
|
|
73
76
|
gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz,sha256=Y8ZT2FIAhbhlgCJdE9qQVAiwnV_fcsPt72usBa7RSBM,10225828
|
|
74
77
|
gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz,sha256=R7IkssKu0L4WwkU9SrS84xCMdrkkKL0gnTNO_OKbG0Y,219
|
|
75
78
|
gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz,sha256=76CIU0pibDJ72Y6UY-TbIKE9gEPwTELAaIbCXyjm80Q,470
|
|
76
|
-
gwaslab-3.4.
|
|
77
|
-
gwaslab-3.4.
|
|
78
|
-
gwaslab-3.4.
|
|
79
|
-
gwaslab-3.4.
|
|
80
|
-
gwaslab-3.4.
|
|
81
|
-
gwaslab-3.4.
|
|
79
|
+
gwaslab-3.4.46.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
80
|
+
gwaslab-3.4.46.dist-info/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
|
|
81
|
+
gwaslab-3.4.46.dist-info/METADATA,sha256=aw5qahaxh7TAYCLNPdBO1FmHCWQk3mQcOlZohaGqorw,7765
|
|
82
|
+
gwaslab-3.4.46.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
|
|
83
|
+
gwaslab-3.4.46.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
|
|
84
|
+
gwaslab-3.4.46.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|