gwaslab 3.4.23__py3-none-any.whl → 3.4.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- gwaslab/Sumstats.py +2 -0
- gwaslab/compare_effect.py +2 -0
- gwaslab/data/reference.json +26 -0
- gwaslab/download.py +12 -2
- gwaslab/fixdata.py +1 -1
- gwaslab/mqqplot.py +86 -32
- gwaslab/read_ldsc.py +46 -42
- gwaslab/to_formats.py +7 -2
- gwaslab/trumpetplot.py +9 -3
- gwaslab/version.py +2 -2
- {gwaslab-3.4.23.dist-info → gwaslab-3.4.25.dist-info}/METADATA +4 -4
- {gwaslab-3.4.23.dist-info → gwaslab-3.4.25.dist-info}/RECORD +15 -15
- {gwaslab-3.4.23.dist-info → gwaslab-3.4.25.dist-info}/WHEEL +1 -1
- {gwaslab-3.4.23.dist-info → gwaslab-3.4.25.dist-info}/LICENSE +0 -0
- {gwaslab-3.4.23.dist-info → gwaslab-3.4.25.dist-info}/top_level.txt +0 -0
gwaslab/Sumstats.py
CHANGED
|
@@ -605,6 +605,7 @@ class Sumstats():
|
|
|
605
605
|
build="19",
|
|
606
606
|
n=None,
|
|
607
607
|
verbose=True,
|
|
608
|
+
no_status=False,
|
|
608
609
|
output_log=True,
|
|
609
610
|
to_csvargs=None,
|
|
610
611
|
float_formats=None,
|
|
@@ -713,6 +714,7 @@ class Sumstats():
|
|
|
713
714
|
suffix=suffix,
|
|
714
715
|
build=build,
|
|
715
716
|
verbose=True,
|
|
717
|
+
no_status=no_status,
|
|
716
718
|
log=onetime_log,
|
|
717
719
|
to_csvargs=to_csvargs,
|
|
718
720
|
chr_prefix=chr_prefix,
|
gwaslab/compare_effect.py
CHANGED
|
@@ -728,6 +728,8 @@ def compare_effect(path1,
|
|
|
728
728
|
if verbose:log.write(" -Beta_se = ", reg[4])
|
|
729
729
|
#if verbose:log.write(" -H0 beta = ", null_beta, ", recalculated p = ", "{:.2e}".format(p))
|
|
730
730
|
if verbose:log.write(" -H0 beta = 0",", default p = ", "{:.2e}".format(reg[3]))
|
|
731
|
+
if verbose:log.write(" -Peason correlation coefficient = ", "{:.2f}".format(reg[2]))
|
|
732
|
+
if verbose:log.write(" -r2 = ", "{:.2f}".format(reg[2]**2))
|
|
731
733
|
if r_se==True:
|
|
732
734
|
if verbose:log.write(" -R se (jackknife) = {:.2e}".format(r_se_jackknife))
|
|
733
735
|
|
gwaslab/data/reference.json
CHANGED
|
@@ -10,6 +10,30 @@
|
|
|
10
10
|
"1kg_eur_hg38":"https://www.dropbox.com/s/z0mkehg17lryapv/EUR.ALL.split_norm_af.1kg_30x.hg38.vcf.gz?dl=1",
|
|
11
11
|
"1kg_eur_hg38_md5":"228d3285fa99132cc6321e2925e0768d",
|
|
12
12
|
"1kg_eur_hg38_tbi":"https://www.dropbox.com/s/ze8g58x75x9qbf0/EUR.ALL.split_norm_af.1kg_30x.hg38.vcf.gz.tbi?dl=1",
|
|
13
|
+
"1kg_sas_hg19":"https://www.dropbox.com/scl/fi/fubqvuj3p4ii4y35zknv8/SAS.ALL.split_norm_af.1kgp3v5.hg19.vcf.gz?rlkey=5z50f66iltjchcaszznq5bczt&dl=1",
|
|
14
|
+
"1kg_sas_hg19_md5":"e2d3f9e2e6580d05e877e9effd435c4e",
|
|
15
|
+
"1kg_sas_hg19_tbi":"https://www.dropbox.com/scl/fi/icnmrnzee7ofdpx5l96tg/SAS.ALL.split_norm_af.1kgp3v5.hg19.vcf.gz.tbi?rlkey=st8t88snby26q37rqi6zh5zck&dl=1",
|
|
16
|
+
"1kg_amr_hg19":"https://www.dropbox.com/scl/fi/bxa4zfngsxsc38rhtiv8c/AMR.ALL.split_norm_af.1kgp3v5.hg19.vcf.gz?rlkey=ibcn8hb1n8n36j3u0jfzci267&dl=1",
|
|
17
|
+
"1kg_amr_hg19_md5":"68d3cdf01cbabdae6e74a07795fa881c",
|
|
18
|
+
"1kg_amr_hg19_tbi":"https://www.dropbox.com/scl/fi/1zk16x7h4r89jurzwu05u/AMR.ALL.split_norm_af.1kgp3v5.hg19.vcf.gz.tbi?rlkey=b4cere4w38zvzyfitfge3r8n0&dl=1",
|
|
19
|
+
"1kg_sas_hg38":"https://www.dropbox.com/scl/fi/jr3l5zz42py3kny2bccmj/SAS.ALL.split_norm_af.1kg_30x.hg38.vcf.gz?rlkey=x0t6tsy71jxzf021wfqdn8k5q&dl=1",
|
|
20
|
+
"1kg_sas_hg38_md5":"e5d79bea1958aa50c23f618d342ccc83",
|
|
21
|
+
"1kg_sas_hg38_tbi":"https://www.dropbox.com/scl/fi/02oia4ur5r7w9qgiuf6i9/SAS.ALL.split_norm_af.1kg_30x.hg38.vcf.gz.tbi?rlkey=00p9rxe0xzfs6hr1rg4d8oadm&dl=1",
|
|
22
|
+
"1kg_amr_hg38":"https://www.dropbox.com/scl/fi/4t4tyuhzp78uyb6tgkroq/AMR.ALL.split_norm_af.1kg_30x.hg38.vcf.gz?rlkey=p96gbs1tcdia31jnjv1b82kuz&dl=1",
|
|
23
|
+
"1kg_amr_hg38_md5":"229fbd610001cf6f137b7f738352a44a",
|
|
24
|
+
"1kg_amr_hg38_tbi":"https://www.dropbox.com/scl/fi/x0dby543tr9xpaqj2i0ba/AMR.ALL.split_norm_af.1kg_30x.hg38.vcf.gz.tbi?rlkey=uj8o7j0cy0spipe174jn54sqs&dl=1",
|
|
25
|
+
"1kg_afr_hg19":"https://www.dropbox.com/scl/fi/tq4w9lyt5z47ym7grtrxg/AFR.ALL.split_norm_af.1kgp3v5.hg19.vcf.gz?rlkey=k3bimeu3yr5loq8hohba5mr6k&dl=1",
|
|
26
|
+
"1kg_afr_hg19_md5":"f7b4425f39e8292dce6f13711e7f6c50",
|
|
27
|
+
"1kg_afr_hg19_tbi":"https://www.dropbox.com/scl/fi/0giiptu0btwj1kfm6jdzr/AFR.ALL.split_norm_af.1kgp3v5.hg19.vcf.gz.tbi?rlkey=ucb5weprsc5prcg8hvtgmruxx&dl=1",
|
|
28
|
+
"1kg_pan_hg19":"https://www.dropbox.com/scl/fi/6b4j9z9knmllfnbx86aw6/PAN.ALL.split_norm_af.1kgp3v5.hg19.vcf.gz?rlkey=eento8vg06zyrkvooc9wd4cvu&dl=1",
|
|
29
|
+
"1kg_pan_hg19_md5":"fed846482204487b60d33b21ddb18106",
|
|
30
|
+
"1kg_pan_hg19_tbi":"https://www.dropbox.com/scl/fi/stco946scio5tvto0ln4j/PAN.ALL.split_norm_af.1kgp3v5.hg19.vcf.gz.tbi?rlkey=hfh53beb627lmqwv3d8mzqy0c&dl=1",
|
|
31
|
+
"1kg_afr_hg38":"https://www.dropbox.com/scl/fi/239xmm7qijtnsks97chc9/AFR.ALL.split_norm_af.1kg_30x.hg38.vcf.gz?rlkey=47en5fk1icbekpg7we3uot9g8&dl=1",
|
|
32
|
+
"1kg_afr_hg38_md5":"3bb7923be0809a324d7b7633b8d58a3b",
|
|
33
|
+
"1kg_afr_hg38_tbi":"https://www.dropbox.com/scl/fi/3y3pg4yqwo2jaaamx1c8f/AFR.ALL.split_norm_af.1kg_30x.hg38.vcf.gz.tbi?rlkey=say0ihfwa51z3otgn4bjtze8p&dl=1",
|
|
34
|
+
"1kg_pan_hg38":"https://www.dropbox.com/scl/fi/nf01487smtmeq243ihfwm/PAN.ALL.split_norm_af.1kg_30x.hg38.vcf.gz?rlkey=3pefbkzxwcnejx4inynifpft7&dl=1",
|
|
35
|
+
"1kg_pan_hg38_md5":"23bb86d748c4a66e85e087f647e8b60e",
|
|
36
|
+
"1kg_pan_hg38_tbi":"https://www.dropbox.com/scl/fi/hu7cttr4cenw5yjsm2775/PAN.ALL.split_norm_af.1kg_30x.hg38.vcf.gz.tbi?rlkey=568u7bkvkybm4wt2q9284o198&dl=1",
|
|
13
37
|
"dbsnp_v151_hg19": "https://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh37p13/VCF/00-All.vcf.gz",
|
|
14
38
|
"dbsnp_v151_hg19_tbi": "https://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh37p13/VCF/00-All.vcf.gz.tbi",
|
|
15
39
|
"dbsnp_v151_hg38": "https://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh38p7/VCF/00-All.vcf.gz",
|
|
@@ -33,3 +57,5 @@
|
|
|
33
57
|
"testlink":"https://www.dropbox.com/s/8u7capwge0ihshu/EAS.chr22.split_norm_af.1kgp3v5.vcf.gz?dl=1",
|
|
34
58
|
"testlink_tbi":"https://www.dropbox.com/s/hdneg53t6u1j6ib/EAS.chr22.split_norm_af.1kgp3v5.vcf.gz.tbi?dl=1"
|
|
35
59
|
}
|
|
60
|
+
|
|
61
|
+
|
gwaslab/download.py
CHANGED
|
@@ -7,6 +7,7 @@ import shutil
|
|
|
7
7
|
import hashlib
|
|
8
8
|
from gwaslab.Log import Log
|
|
9
9
|
from gwaslab.config import options
|
|
10
|
+
import re
|
|
10
11
|
|
|
11
12
|
#### config ##############################################################################################
|
|
12
13
|
# config.json
|
|
@@ -184,6 +185,7 @@ def get_path(name,log=Log(),verbose=True):
|
|
|
184
185
|
def download_ref(name,
|
|
185
186
|
directory=None,
|
|
186
187
|
local_filename=None,
|
|
188
|
+
overwrite=False,
|
|
187
189
|
log=Log()):
|
|
188
190
|
'''
|
|
189
191
|
Download the reference file for a given keyword. Url are retrieved from the reference.json file.
|
|
@@ -213,6 +215,9 @@ def download_ref(name,
|
|
|
213
215
|
# if existing in default path
|
|
214
216
|
if search_local(local_path) == True:
|
|
215
217
|
log.write(" -File {} exists.".format(local_path))
|
|
218
|
+
if overwrite == True:
|
|
219
|
+
log.write(" -Overwriting the existing file.")
|
|
220
|
+
download_file(url,local_path)
|
|
216
221
|
else:
|
|
217
222
|
download_file(url,local_path)
|
|
218
223
|
|
|
@@ -233,6 +238,9 @@ def download_ref(name,
|
|
|
233
238
|
try:
|
|
234
239
|
if search_local(local_path+".tbi") == True:
|
|
235
240
|
log.write(" -File {} exists.".format(local_path+".tbi"))
|
|
241
|
+
if overwrite == True:
|
|
242
|
+
log.write(" -Overwriting the existing file.")
|
|
243
|
+
download_file(tbi_url,local_path+".tbi")
|
|
236
244
|
else:
|
|
237
245
|
download_file(tbi_url,local_path+".tbi")
|
|
238
246
|
#download_file(tbi_url, local_path+".tbi")
|
|
@@ -327,12 +335,14 @@ def url_to_local_file_name(local_filename, url, from_dropbox):
|
|
|
327
335
|
# if local name not provided, grab it from url
|
|
328
336
|
local_filename = url.split('/')[-1]
|
|
329
337
|
|
|
330
|
-
if local_filename.endswith("
|
|
338
|
+
if local_filename.endswith("dl=1"):
|
|
331
339
|
# if file are downloaded form dropbox
|
|
332
340
|
# set from_dropbox indicator to 1
|
|
333
341
|
from_dropbox=1
|
|
334
342
|
# remove "?dl=1" suffix
|
|
335
|
-
local_filename = local_filename[:-5]
|
|
343
|
+
#local_filename = local_filename[:-5]
|
|
344
|
+
local_filename = re.match(r'([^\?]+)(\?rlkey=[\w]+)?[&\?]dl=1$', local_filename)
|
|
345
|
+
local_filename = local_filename.group(1)
|
|
336
346
|
return local_filename, from_dropbox
|
|
337
347
|
|
|
338
348
|
##########################################################################################################
|
gwaslab/fixdata.py
CHANGED
gwaslab/mqqplot.py
CHANGED
|
@@ -229,7 +229,7 @@ def mqqplot(insumstats,
|
|
|
229
229
|
if highlight_anno_args is None:
|
|
230
230
|
highlight_anno_args = {}
|
|
231
231
|
if pinpoint is None:
|
|
232
|
-
pinpoint = list()
|
|
232
|
+
pinpoint = list()
|
|
233
233
|
if build is None:
|
|
234
234
|
build = "19"
|
|
235
235
|
if scatter_kwargs is None:
|
|
@@ -255,10 +255,25 @@ def mqqplot(insumstats,
|
|
|
255
255
|
if len(anno_set)>0 and ("m" in mode):
|
|
256
256
|
if verbose: log.write(" -Variants to annotate : "+",".join(anno_set))
|
|
257
257
|
if len(highlight)>0 and ("m" in mode):
|
|
258
|
-
if
|
|
259
|
-
|
|
258
|
+
if pd.api.types.is_list_like(highlight[0]):
|
|
259
|
+
if len(highlight[0]) == len(highlight_color):
|
|
260
|
+
log.write(" -WARNING: number of locus list does not match number of colors !!!")
|
|
261
|
+
for i, highlight_set in enumerate(highlight):
|
|
262
|
+
if verbose: log.write(" -Set {} loci to highlight ({}) : ".format(i+1, highlight_color[i%len(highlight_color)])+",".join(highlight_set))
|
|
263
|
+
if verbose: log.write(" -Highlight_window is set to: ", highlight_windowkb, " kb")
|
|
264
|
+
else:
|
|
265
|
+
if verbose: log.write(" -Loci to highlight ({}): ".format(highlight_color)+",".join(highlight))
|
|
266
|
+
if verbose: log.write(" -Highlight_window is set to: ", highlight_windowkb, " kb")
|
|
267
|
+
|
|
260
268
|
if len(pinpoint)>0 :
|
|
261
|
-
if
|
|
269
|
+
if pd.api.types.is_list_like(pinpoint[0]):
|
|
270
|
+
if len(pinpoint[0]) == len(pinpoint_color):
|
|
271
|
+
log.write(" -WARNING: number of variant list does not match number of colors !!!")
|
|
272
|
+
for i, pinpoint_set in enumerate(pinpoint):
|
|
273
|
+
if verbose: log.write(" -Set {} variants to pinpoint ({}) : ".format(i+1,pinpoint_color[i%len(pinpoint_color)])+",".join(pinpoint_set))
|
|
274
|
+
else:
|
|
275
|
+
if verbose: log.write(" -Variants to pinpoint ({}) : ".format(pinpoint_color)+",".join(pinpoint))
|
|
276
|
+
|
|
262
277
|
if region is not None:
|
|
263
278
|
if verbose: log.write(" -Region to plot : chr"+str(region[0])+":"+str(region[1])+"-"+str(region[2])+".")
|
|
264
279
|
|
|
@@ -425,8 +440,9 @@ def mqqplot(insumstats,
|
|
|
425
440
|
eaf_raw = sumstats["MAF"].copy()
|
|
426
441
|
|
|
427
442
|
if len(highlight)>0 and ("m" in mode):
|
|
428
|
-
sumstats["HUE"] =
|
|
429
|
-
|
|
443
|
+
sumstats["HUE"] = pd.NA
|
|
444
|
+
sumstats["HUE"] = sumstats["HUE"].astype("Int64")
|
|
445
|
+
|
|
430
446
|
if verbose: log.write("Finished loading specified columns from the sumstats.")
|
|
431
447
|
|
|
432
448
|
|
|
@@ -451,15 +467,27 @@ def mqqplot(insumstats,
|
|
|
451
467
|
|
|
452
468
|
## Highlight
|
|
453
469
|
if len(highlight)>0 and ("m" in mode or "r" in mode):
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
470
|
+
if pd.api.types.is_list_like(highlight[0]):
|
|
471
|
+
for i, highlight_set in enumerate(highlight):
|
|
472
|
+
to_highlight = sumstats.loc[sumstats[snpid].isin(highlight_set),:]
|
|
473
|
+
#assign colors: 0 is hightlight color
|
|
474
|
+
for index,row in to_highlight.iterrows():
|
|
475
|
+
target_chr = int(row[chrom])
|
|
476
|
+
target_pos = int(row[pos])
|
|
477
|
+
right_chr=sumstats[chrom]==target_chr
|
|
478
|
+
up_pos=sumstats[pos]>target_pos-highlight_windowkb*1000
|
|
479
|
+
low_pos=sumstats[pos]<target_pos+highlight_windowkb*1000
|
|
480
|
+
sumstats.loc[right_chr&up_pos&low_pos,"HUE"]=i
|
|
481
|
+
else:
|
|
482
|
+
to_highlight = sumstats.loc[sumstats[snpid].isin(highlight),:]
|
|
483
|
+
#assign colors: 0 is hightlight color
|
|
484
|
+
for index,row in to_highlight.iterrows():
|
|
485
|
+
target_chr = int(row[chrom])
|
|
486
|
+
target_pos = int(row[pos])
|
|
487
|
+
right_chr=sumstats[chrom]==target_chr
|
|
488
|
+
up_pos=sumstats[pos]>target_pos-highlight_windowkb*1000
|
|
489
|
+
low_pos=sumstats[pos]<target_pos+highlight_windowkb*1000
|
|
490
|
+
sumstats.loc[right_chr&up_pos&low_pos,"HUE"]=0
|
|
463
491
|
|
|
464
492
|
# Density #####################################################################################################
|
|
465
493
|
if "b" in mode and "DENSITY" not in sumstats.columns:
|
|
@@ -612,18 +640,33 @@ def mqqplot(insumstats,
|
|
|
612
640
|
sizes=marker_size,
|
|
613
641
|
linewidth=linewidth,
|
|
614
642
|
zorder=2,ax=ax1,edgecolor=edgecolor, **scatter_kwargs)
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
643
|
+
if pd.api.types.is_list_like(highlight[0]):
|
|
644
|
+
for i, highlight_set in enumerate(highlight):
|
|
645
|
+
if verbose: log.write(" -Highlighting set {} target loci...".format(i+1))
|
|
646
|
+
print(sumstats["HUE"].dtype)
|
|
647
|
+
sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==i], x='i', y='scaled_P',
|
|
648
|
+
hue="HUE",
|
|
649
|
+
palette={i:highlight_color[i%len(highlight_color)]},
|
|
650
|
+
legend=legend,
|
|
651
|
+
style=style,
|
|
652
|
+
size="s",
|
|
653
|
+
sizes=(marker_size[0]+1,marker_size[1]+1),
|
|
654
|
+
linewidth=linewidth,
|
|
655
|
+
zorder=3+i,ax=ax1,edgecolor=edgecolor,**scatter_kwargs)
|
|
656
|
+
highlight_i = sumstats.loc[~sumstats["HUE"].isna(),"i"].values
|
|
657
|
+
else:
|
|
658
|
+
if verbose: log.write(" -Highlighting target loci...")
|
|
659
|
+
sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==0], x='i', y='scaled_P',
|
|
660
|
+
hue="HUE",
|
|
661
|
+
palette={0:highlight_color},
|
|
662
|
+
legend=legend,
|
|
663
|
+
style=style,
|
|
664
|
+
size="s",
|
|
665
|
+
sizes=(marker_size[0]+1,marker_size[1]+1),
|
|
666
|
+
linewidth=linewidth,
|
|
667
|
+
zorder=3,ax=ax1,edgecolor=edgecolor,**scatter_kwargs)
|
|
668
|
+
# for annotate
|
|
669
|
+
highlight_i = sumstats.loc[sumstats[snpid].isin(highlight),"i"].values
|
|
627
670
|
|
|
628
671
|
## if not highlight
|
|
629
672
|
else:
|
|
@@ -673,12 +716,23 @@ def mqqplot(insumstats,
|
|
|
673
716
|
|
|
674
717
|
## if pinpoint variants
|
|
675
718
|
if (len(pinpoint)>0):
|
|
676
|
-
if
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
719
|
+
if pd.api.types.is_list_like(pinpoint[0]):
|
|
720
|
+
for i, pinpoint_set in enumerate(pinpoint):
|
|
721
|
+
if sum(sumstats[snpid].isin(pinpoint_set))>0:
|
|
722
|
+
to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint_set),:]
|
|
723
|
+
if verbose: log.write(" -Pinpointing set {} target vairants...".format(i+1))
|
|
724
|
+
ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color[i%len(pinpoint_color)],zorder=100,s=marker_size[1]+1)
|
|
725
|
+
else:
|
|
726
|
+
if verbose: log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...")
|
|
680
727
|
else:
|
|
681
|
-
if
|
|
728
|
+
if sum(sumstats[snpid].isin(pinpoint))>0:
|
|
729
|
+
to_pinpoint = sumstats.loc[sumstats[snpid].isin(pinpoint),:]
|
|
730
|
+
if verbose: log.write(" -Pinpointing target vairants...")
|
|
731
|
+
ax1.scatter(to_pinpoint["i"],to_pinpoint["scaled_P"],color=pinpoint_color,zorder=100,s=marker_size[1]+1)
|
|
732
|
+
else:
|
|
733
|
+
if verbose: log.write(" -Target vairants to pinpoint were not found. Skip pinpointing process...")
|
|
734
|
+
|
|
735
|
+
|
|
682
736
|
|
|
683
737
|
#ax1.set_xticks(chrom_df.astype("float64"))
|
|
684
738
|
#ax1.set_xticklabels(chrom_df.index.astype("Int64").map(xtick_chr_dict),fontsize=fontsize,family=font_family)
|
gwaslab/read_ldsc.py
CHANGED
|
@@ -105,52 +105,56 @@ def read_popcorn(filelist=[]):
|
|
|
105
105
|
#h2 mode
|
|
106
106
|
#####################################################################
|
|
107
107
|
summary = pd.DataFrame(columns = ["Filename", 'sfile1', 'sfile2', 'mode', 'pg', 'pg_se','pg_z','pg_p', 'h1^2', 'h1^2_se','h1^2_z','h1^2_p', 'h2^2', 'h2^2_se','h2^2_z','h2^2_p'])
|
|
108
|
-
|
|
108
|
+
|
|
109
109
|
for index, ldscfile in enumerate(filelist):
|
|
110
110
|
print("Loading file "+str(index+1)+" :" + ldscfile +" ...")
|
|
111
|
-
|
|
111
|
+
|
|
112
112
|
row={}
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
line = file.readline()
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
113
|
+
try:
|
|
114
|
+
with open(ldscfile,"r") as file:
|
|
115
|
+
row["Filename"]=ldscfile.split("/")[-1]
|
|
116
|
+
line=""
|
|
117
|
+
while not re.compile('^Invoking command').match(line):
|
|
118
|
+
line = file.readline()
|
|
119
|
+
if not line: break
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
## first line h2 se
|
|
123
|
+
objects = re.compile('--sfile1 ([^\s]+) --sfile2 ([^\s]+)[ /n]').findall(line)
|
|
124
|
+
row["sfile1"]=objects[0][0]
|
|
125
|
+
row["sfile2"]=objects[0][1]
|
|
126
|
+
|
|
127
|
+
#while not re.compile(r'^Jackknife iter:').match(line):
|
|
128
|
+
# line = file.readline()
|
|
129
|
+
# print(line)
|
|
130
|
+
# if not line: break
|
|
131
|
+
while not re.compile(r'P \(Z\)').findall(line.strip()):
|
|
132
|
+
line = file.readline()
|
|
133
|
+
if not line: break
|
|
134
|
+
|
|
135
|
+
#objects = re.compile('[a-zA-Z\s\d]+:|[-0-9.]+[e]?[-0-9.]+|NA').findall(file.readline())
|
|
136
|
+
objects = file.readline().split()
|
|
137
|
+
row["h1^2"] = objects[1]
|
|
138
|
+
row["h1^2_se"] = objects[2]
|
|
139
|
+
row["h1^2_z"] = objects[3]
|
|
140
|
+
row["h1^2_p"] = objects[4]
|
|
141
|
+
|
|
142
|
+
objects = file.readline().split()
|
|
143
|
+
row["h2^2"] = objects[1]
|
|
144
|
+
row["h2^2_se"] = objects[2]
|
|
145
|
+
row["h2^2_z"] = objects[3]
|
|
146
|
+
row["h2^2_p"] = objects[4]
|
|
147
|
+
|
|
148
|
+
objects = file.readline().split()
|
|
149
|
+
row["mode"] = objects[0]
|
|
150
|
+
row["pg"] = objects[1]
|
|
151
|
+
row["pg_se"] = objects[2]
|
|
152
|
+
row["pg_z"] = objects[3]
|
|
153
|
+
row["pg_p"] = objects[4]
|
|
154
|
+
except:
|
|
155
|
+
continue
|
|
152
156
|
|
|
153
157
|
#summary = summary.append(row,ignore_index=True)
|
|
154
158
|
row = pd.DataFrame([row], columns = summary.columns)
|
|
155
159
|
summary = pd.concat([summary, row], ignore_index=True)
|
|
156
|
-
return summary
|
|
160
|
+
return summary
|
gwaslab/to_formats.py
CHANGED
|
@@ -36,6 +36,7 @@ def tofmt(sumstats,
|
|
|
36
36
|
bgzip=False,
|
|
37
37
|
tabix=False,
|
|
38
38
|
verbose=True,
|
|
39
|
+
no_status=False,
|
|
39
40
|
log=Log(),
|
|
40
41
|
to_csvargs=None):
|
|
41
42
|
|
|
@@ -50,7 +51,7 @@ def tofmt(sumstats,
|
|
|
50
51
|
if verbose: log.write(" - Start outputting sumstats in "+fmt+" format...")
|
|
51
52
|
|
|
52
53
|
if "CHR" in sumstats.columns:
|
|
53
|
-
if xymt_number is False and sumstats["CHR"]
|
|
54
|
+
if xymt_number is False and pd.api.types.is_integer_dtype(sumstats["CHR"]):
|
|
54
55
|
sumstats["CHR"]= sumstats["CHR"].map(get_number_to_chr(xymt=xymt,prefix=chr_prefix))
|
|
55
56
|
elif chr_prefix is not None:
|
|
56
57
|
sumstats["CHR"]= chr_prefix + sumstats["CHR"].astype("string")
|
|
@@ -364,7 +365,11 @@ def tofmt(sumstats,
|
|
|
364
365
|
ouput_cols.append(i)
|
|
365
366
|
# + additional cols
|
|
366
367
|
ouput_cols = ouput_cols + cols
|
|
367
|
-
|
|
368
|
+
try:
|
|
369
|
+
if no_status == True:
|
|
370
|
+
ouput_cols.remove("STATUS")
|
|
371
|
+
except:
|
|
372
|
+
pass
|
|
368
373
|
sumstats = sumstats.loc[:,ouput_cols]
|
|
369
374
|
sumstats = sumstats.rename(columns=rename_dictionary)
|
|
370
375
|
|
gwaslab/trumpetplot.py
CHANGED
|
@@ -46,13 +46,15 @@ def plottrumpet(mysumstats,
|
|
|
46
46
|
yscale_factor=1,
|
|
47
47
|
cmap="cool",
|
|
48
48
|
ylim=None,
|
|
49
|
+
xlim=None,
|
|
49
50
|
markercolor="#597FBD",
|
|
50
51
|
fontsize=15,
|
|
51
52
|
font_family="Arial",
|
|
52
|
-
size= "
|
|
53
|
+
size= "ABS_BETA",
|
|
53
54
|
sizes=None,
|
|
54
55
|
save=False,
|
|
55
56
|
saveargs=None,
|
|
57
|
+
figargs=None,
|
|
56
58
|
build="99",
|
|
57
59
|
anno_set=None,
|
|
58
60
|
anno_alias=None,
|
|
@@ -94,7 +96,8 @@ def plottrumpet(mysumstats,
|
|
|
94
96
|
else:
|
|
95
97
|
xticks = [0,0.01,0.05,0.1,0.2,0.5]
|
|
96
98
|
xticklabels = xticks
|
|
97
|
-
|
|
99
|
+
if figargs is None:
|
|
100
|
+
figargs={"figsize":(10,15)}
|
|
98
101
|
#Checking columns#################################################################################################################
|
|
99
102
|
if verbose: log.write("Start to create trumpet plot...")
|
|
100
103
|
|
|
@@ -201,7 +204,7 @@ def plottrumpet(mysumstats,
|
|
|
201
204
|
output_hex_colors
|
|
202
205
|
|
|
203
206
|
##################################################################################################
|
|
204
|
-
fig, ax = plt.subplots(
|
|
207
|
+
fig, ax = plt.subplots(**figargs)
|
|
205
208
|
|
|
206
209
|
##creating power line############################################################################################
|
|
207
210
|
if mode=="q":
|
|
@@ -284,6 +287,9 @@ def plottrumpet(mysumstats,
|
|
|
284
287
|
rotation=90
|
|
285
288
|
ax.set_xticks(xticks,xticklabels,fontsize=fontsize,rotation=rotation)
|
|
286
289
|
ax.set_xlim(-0.02,0.52)
|
|
290
|
+
|
|
291
|
+
if xlim is not None:
|
|
292
|
+
ax.set_xlim(xlim)
|
|
287
293
|
|
|
288
294
|
if ylim is not None:
|
|
289
295
|
ax.set_ylim(ylim)
|
gwaslab/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: gwaslab
|
|
3
|
-
Version: 3.4.
|
|
3
|
+
Version: 3.4.25
|
|
4
4
|
Summary: A collection of handy tools for GWAS SumStats
|
|
5
5
|
Author-email: Yunye <yunye@gwaslab.com>
|
|
6
6
|
Project-URL: Homepage, https://cloufield.github.io/gwaslab/
|
|
@@ -8,12 +8,12 @@ Project-URL: Github, https://github.com/Cloufield/gwaslab
|
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python:
|
|
11
|
+
Requires-Python: <=3.9,>=3.8
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
Requires-Dist: pandas <1.5,>=1.3
|
|
15
15
|
Requires-Dist: numpy >=1.21.2
|
|
16
|
-
Requires-Dist: matplotlib
|
|
16
|
+
Requires-Dist: matplotlib <=3.7.1,>=3.5
|
|
17
17
|
Requires-Dist: seaborn >=0.11.1
|
|
18
18
|
Requires-Dist: scipy >=1.6.2
|
|
19
19
|
Requires-Dist: pySAM <0.20,>=0.18.1
|
|
@@ -45,7 +45,7 @@ Note: GWASLab is being updated very frequently for now. I will release the first
|
|
|
45
45
|
## Install
|
|
46
46
|
|
|
47
47
|
```
|
|
48
|
-
pip install gwaslab==3.4.
|
|
48
|
+
pip install gwaslab==3.4.24
|
|
49
49
|
```
|
|
50
50
|
|
|
51
51
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
gwaslab/CommonData.py,sha256=tcT6g2NCJfYMoNK4OGe9MYedO374DqHSY0-rBg32L8o,11660
|
|
2
2
|
gwaslab/Log.py,sha256=FPUOdEtPkHvKEqTezWI-QDHWHZvqpveWnuG136eNV1U,783
|
|
3
3
|
gwaslab/Phenotypes.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
gwaslab/Sumstats.py,sha256=
|
|
4
|
+
gwaslab/Sumstats.py,sha256=IqpZUUbXPTHwaX7DoxR8KP087rFqrq71Gd6PYZC4dCI,28951
|
|
5
5
|
gwaslab/SumstatsMT.py,sha256=jqG45y1DfKEzrOE9Yl8C7SzJTYveo6by34rK64VxuQ0,67
|
|
6
6
|
gwaslab/Sumstatssummary.py,sha256=Q69702tABxTVk6QIbqQ4k6eSujhXr_MVVQ5saOdEhlk,6367
|
|
7
7
|
gwaslab/__init__.py,sha256=gQXNQFdjmfyTnV-h9IO7ovJjTZxu-0V_cNCwZ6CfKVE,1800
|
|
@@ -10,14 +10,14 @@ gwaslab/annotateplot.py,sha256=jarkA00LabrRc194YVOMuHkThOye5KHehgQFM8f2T5k,30805
|
|
|
10
10
|
gwaslab/calculate_gc.py,sha256=ntglVBVU6NBmQRdqmq0FwQeEhW8-N4IszL314T4zt-w,2174
|
|
11
11
|
gwaslab/calculate_power.py,sha256=xZeIrlX_dX7Ghpc1mor8nkHcHBH6zR70C0nPycwoIuc,10110
|
|
12
12
|
gwaslab/compare_af.py,sha256=rLYWQMGCDxdffMvTdNdgYfQl8yUgpk1jIyGHnTyQ06g,5637
|
|
13
|
-
gwaslab/compare_effect.py,sha256=
|
|
13
|
+
gwaslab/compare_effect.py,sha256=hTF2-ogn9pTZcHLq6BbOAMoUVk9PZ1yf51jB7eGLpg8,48328
|
|
14
14
|
gwaslab/config.py,sha256=TP-r-DPhJD3XnRYZbw9bQHXaDIkiRgK8bG9HCt-UaLc,580
|
|
15
15
|
gwaslab/datatype_check.py,sha256=X19qdqkiMUyNAUO2ew-Y2kNca_64vP9EaKAYDRHCSVg,696
|
|
16
|
-
gwaslab/download.py,sha256=
|
|
16
|
+
gwaslab/download.py,sha256=KkocZcIf87RIEzVZplmzcje81sWJi6aQwqnwZ6TgGvo,15589
|
|
17
17
|
gwaslab/figuresave.py,sha256=Hb6_FmXzwLKXBol8EzUzDufjh1q1sweD2rjbbKVCsPw,2019
|
|
18
18
|
gwaslab/fill.py,sha256=IF3-4l36RvPXjDDJDv2f-D6EGtSDOl9FykX2N5TfzOI,13854
|
|
19
19
|
gwaslab/filtervalue.py,sha256=pU-m_UcYJL1sE4b3rRjip97b7IsCtQRm1yR4oY5opFY,13151
|
|
20
|
-
gwaslab/fixdata.py,sha256=
|
|
20
|
+
gwaslab/fixdata.py,sha256=Ls2fbB_V2M5FvCkY3FBB0spwJydWmt4SqiKXnhH6mVs,75537
|
|
21
21
|
gwaslab/get_hapmap3.py,sha256=vU_5aRqWpIgaK4luddBCziGkIZBnUeO5-GWmm0SdLiU,2032
|
|
22
22
|
gwaslab/getdensity.py,sha256=voxVYa2T90JErX9_8yHI8vQGaM9IdUHo87_jSjfdrnQ,3903
|
|
23
23
|
gwaslab/getsig.py,sha256=AtPtgcVw3brjJKC6x4lPYSqV-w2QgOWBiehQYD1kX3c,19886
|
|
@@ -26,33 +26,33 @@ gwaslab/h2_conversion.py,sha256=z_iJ-Pa_DuBnANAn2hm3rN7DjnlUAvLFnltieA0APQg,6466
|
|
|
26
26
|
gwaslab/meta.py,sha256=kIDfLSC2TFidCX_yJdNk2ITfBhWtfrd2J-Q2a_FODns,2191
|
|
27
27
|
gwaslab/metaanalysis.py,sha256=MuFaxzj9vnebut4pWUg29pGxMcPzWcqbEm3q7t-BUpQ,6770
|
|
28
28
|
gwaslab/miamiplot.py,sha256=oXDcE7f3UDRoyU3XFzoi3dNBP9ixREvKvFJer8hAmsY,30696
|
|
29
|
-
gwaslab/mqqplot.py,sha256=
|
|
29
|
+
gwaslab/mqqplot.py,sha256=wOs1sr8_8f1JosWmmPqCk-7SOyP3qwRKrB0gc8--Da0,45365
|
|
30
30
|
gwaslab/plotrg.py,sha256=EKl4c07nMHvRyReeq9zMnTlqT4dnkOrwl9VHP5JTgK4,13851
|
|
31
31
|
gwaslab/preformat_input.py,sha256=T1RdGbGs5L3NaRes1UN72U9i6y8kvWww02OdVVO3bIk,19211
|
|
32
32
|
gwaslab/processdata.py,sha256=hJQNkrR4IOLw1pZK_OA1v1iNwOmhvlCW9meGnpP5vfU,573
|
|
33
33
|
gwaslab/qqplot.py,sha256=jttKROhsBMnLFZWW2iDqxB6uI_96ASUdKTCAbPqL7yo,7000
|
|
34
34
|
gwaslab/quickfix.py,sha256=ga_ZQYm1mR32oQ2jwEZbq0iX72Qo1Z8UR_aESZs5uvE,17703
|
|
35
|
-
gwaslab/read_ldsc.py,sha256=
|
|
35
|
+
gwaslab/read_ldsc.py,sha256=29RjmBUD_NILcyEl3eFPIHZXFiHHDzbPslg2MoKGekU,7075
|
|
36
36
|
gwaslab/regionalplot.py,sha256=RKg2p9xuxpq5kXvJQLmxBayrMT-BE2DrG4lLiAz3iSg,35442
|
|
37
37
|
gwaslab/retrievedata.py,sha256=Njvi0zGvM3Tjw7nH0scgzgNh5E9mTWI5L9Kb977aRIA,35461
|
|
38
38
|
gwaslab/rsID2chrpos.py,sha256=-pKhY654zS4uULW7FP4yGHNy3e9Wc2ujc1VLBaoUO5A,6564
|
|
39
39
|
gwaslab/textreposition.py,sha256=vNJM7ybnHKMWkqSJ3l9lb5rSDy_UMBuvTd0gPCjbr2w,4231
|
|
40
|
-
gwaslab/to_formats.py,sha256=
|
|
40
|
+
gwaslab/to_formats.py,sha256=S09v6A_SF4i9Ci7N4pfGuZmyR-Bkg3mpMQM6wNTsR4A,20505
|
|
41
41
|
gwaslab/to_pickle.py,sha256=OOoLXiC0Wemyb7jwyeUNU7xMJTAOUT2XDXrd-sxUFFg,1175
|
|
42
|
-
gwaslab/trumpetplot.py,sha256=
|
|
42
|
+
gwaslab/trumpetplot.py,sha256=am_JoHvgTpbDTGwC4uEGNK6BO-mlQKp_vBQGzwARx30,36413
|
|
43
43
|
gwaslab/vchangestatus.py,sha256=id3oUsnMrZth7E-jTQHUfNx2UE0b1ABLO9ajbuTMcjY,1527
|
|
44
|
-
gwaslab/version.py,sha256=
|
|
44
|
+
gwaslab/version.py,sha256=kJICGeHVUv_NSWGbf3qpyVayD_Q-6b0upXRW-LuxbOk,409
|
|
45
45
|
gwaslab/winnerscurse.py,sha256=o7CjUwLp4B_60yrntV76ESNaoPcl-HPOzSw9cYWzpW4,2050
|
|
46
46
|
gwaslab/data/formatbook.json,sha256=DEw31JnEIRhtubwpMdRox1_0g5NinYazFlaz5_E9ulE,26631
|
|
47
|
-
gwaslab/data/reference.json,sha256=
|
|
47
|
+
gwaslab/data/reference.json,sha256=c5Gcv0o5xCm36CCH8dB_TwfXzq2xtOr--Qix4mWAjWY,6481
|
|
48
48
|
gwaslab/data/chrx_par/chrx_par_hg19.bed.gz,sha256=LocZg_ozhZjQiIpgWCO4EYCW9xgkEKpRy1m-YdIpzQs,83
|
|
49
49
|
gwaslab/data/chrx_par/chrx_par_hg38.bed.gz,sha256=VFW11MnQVC-Iu-ZGvUDcEhVpb-HVRsVTg-W-GNJyxP4,82
|
|
50
50
|
gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz,sha256=qD9RsC5S2h6l-OdpW09DWXHIi2kcPebctMnhxt8mzfU,10282886
|
|
51
51
|
gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz,sha256=Y8ZT2FIAhbhlgCJdE9qQVAiwnV_fcsPt72usBa7RSBM,10225828
|
|
52
52
|
gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz,sha256=R7IkssKu0L4WwkU9SrS84xCMdrkkKL0gnTNO_OKbG0Y,219
|
|
53
53
|
gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz,sha256=76CIU0pibDJ72Y6UY-TbIKE9gEPwTELAaIbCXyjm80Q,470
|
|
54
|
-
gwaslab-3.4.
|
|
55
|
-
gwaslab-3.4.
|
|
56
|
-
gwaslab-3.4.
|
|
57
|
-
gwaslab-3.4.
|
|
58
|
-
gwaslab-3.4.
|
|
54
|
+
gwaslab-3.4.25.dist-info/LICENSE,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
|
|
55
|
+
gwaslab-3.4.25.dist-info/METADATA,sha256=V4LSU4Eh5j34DLCrqJaXuwTmrg75q8qX8HABcPCHvOs,6839
|
|
56
|
+
gwaslab-3.4.25.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
57
|
+
gwaslab-3.4.25.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
|
|
58
|
+
gwaslab-3.4.25.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|