gwaslab 3.5.1__tar.gz → 3.5.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gwaslab might be problematic. Click here for more details.
- {gwaslab-3.5.1/src/gwaslab.egg-info → gwaslab-3.5.3}/PKG-INFO +1 -1
- {gwaslab-3.5.1 → gwaslab-3.5.3}/pyproject.toml +1 -1
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/__init__.py +1 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_Sumstats.py +7 -1
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_version.py +2 -2
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/io_process_args.py +25 -8
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/io_to_formats.py +90 -23
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_aux_annotate_plot.py +3 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_aux_chromatin.py +1 -1
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_aux_quickfix.py +8 -6
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_aux_save_figure.py +2 -1
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_mqqplot.py +43 -17
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_regional2.py +69 -13
- gwaslab-3.5.3/src/gwaslab/viz_plot_scatter_with_reg.py +229 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_stackedregional.py +20 -3
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_trumpetplot.py +5 -5
- {gwaslab-3.5.1 → gwaslab-3.5.3/src/gwaslab.egg-info}/PKG-INFO +1 -1
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab.egg-info/SOURCES.txt +1 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/LICENSE +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/LICENSE_before_v3.4.39 +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/README.md +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/setup.cfg +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/bd_common_data.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/bd_config.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/bd_download.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/bd_get_hapmap3.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/cache_manager.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/chrx_par/chrx_par_hg19.bed.gz +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/chrx_par/chrx_par_hg38.bed.gz +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/formatbook.json +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/reference.json +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_Log.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_Phenotypes.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_SumstatsPair.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_SumstatsT.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_Sumstats_summary.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_meta.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_vchange_status.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/hm_casting.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/hm_harmonize_sumstats.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/hm_rsid_to_chrpos.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/io_preformat_input.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/io_read_ldsc.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/io_read_tabular.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/io_to_pickle.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/ldsc_irwls.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/ldsc_jackknife.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/ldsc_ldscore.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/ldsc_parse.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/ldsc_regressions.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/ldsc_sumstats.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/qc_check_datatype.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/qc_fix_sumstats.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/run_script.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_abf_finemapping.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_calculate_ldmatrix.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_calculate_prs.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_gwascatalog.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_ldproxyfinder.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_ldsc.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_plink_filter.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_process_h5.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_process_ref.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_run_2samplemr.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_run_clumping.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_run_coloc.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_run_susie.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_calculate_gc.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_calculate_power.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_convert_h2.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_correct_winnerscurse.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_fill_data.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_filter_value.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_get_density.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_get_sig.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_meta.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_snphwe.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/vis_plot_credible sets.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_aux_property.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_aux_reposition_text.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_compare_af.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_compare_effect.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_forestplot.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_miamiplot.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_miamiplot2.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_phe_heatmap.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_qqplot.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_regionalplot.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_rg_heatmap.py +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab.egg-info/dependency_links.txt +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab.egg-info/requires.txt +0 -0
- {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab.egg-info/top_level.txt +0 -0
|
@@ -46,3 +46,4 @@ from gwaslab.util_ex_process_h5 import process_vcf_to_hfd5
|
|
|
46
46
|
from gwaslab.util_ex_run_susie import _run_susie_rss as run_susie_rss
|
|
47
47
|
from gwaslab.io_read_tabular import _read_tabular as read_tabular
|
|
48
48
|
from gwaslab.util_in_meta import meta_analyze
|
|
49
|
+
from gwaslab.viz_plot_scatter_with_reg import scatter
|
|
@@ -389,6 +389,12 @@ class Sumstats():
|
|
|
389
389
|
self.data =flipallelestats(self.data,log=self.log,**flipallelestats_args)
|
|
390
390
|
|
|
391
391
|
gc.collect()
|
|
392
|
+
|
|
393
|
+
if (ref_seq is not None or ref_infer is not None) and (ref_rsid_tsv is not None or ref_rsid_vcf is not None):
|
|
394
|
+
|
|
395
|
+
self.data = fixID(self.data, log=self.log, **{"fixid":True, "fixsep":True, "overwrite":True})
|
|
396
|
+
|
|
397
|
+
gc.collect()
|
|
392
398
|
|
|
393
399
|
#####################################################
|
|
394
400
|
if ref_rsid_tsv is not None:
|
|
@@ -833,4 +839,4 @@ class Sumstats():
|
|
|
833
839
|
def to_format(self, path, build=None, verbose=True, **kwargs):
|
|
834
840
|
if build is None:
|
|
835
841
|
build = self.meta["gwaslab"]["genome_build"]
|
|
836
|
-
_to_format(self.data, path, log=self.log, verbose=verbose, meta=self.meta, build=build, **kwargs)
|
|
842
|
+
_to_format(self.data, path, log=self.log, verbose=verbose, meta=self.meta, build=build, **kwargs)
|
|
@@ -1,23 +1,40 @@
|
|
|
1
1
|
import copy
|
|
2
|
-
def _merge_and_sync_dic(list_of_dics:list, default:dict) -> dict:
|
|
3
|
-
temp = copy.copy(default)
|
|
4
|
-
for dic in list_of_dics:
|
|
5
|
-
if isinstance(dic, dict):
|
|
6
|
-
temp.update(dic)
|
|
7
|
-
return temp
|
|
8
2
|
|
|
9
3
|
def _list_func_args(func):
|
|
10
4
|
return func.__code__.co_varnames
|
|
11
5
|
|
|
12
6
|
def _extract_kwargs(prefix:str, default:dict, kwargs:dict) -> dict:
|
|
7
|
+
# prefix: keyword
|
|
8
|
+
# default: default dict
|
|
9
|
+
# kwargs: all local kwargs + args + kwargs
|
|
10
|
+
|
|
13
11
|
extracted = []
|
|
12
|
+
extracted_single=dict()
|
|
14
13
|
for key,value in kwargs.items():
|
|
14
|
+
# kwargs or args
|
|
15
15
|
if key=="kwargs" or key=="args":
|
|
16
16
|
for key_nested,value_nested in kwargs[key].items():
|
|
17
17
|
if prefix in key_nested and "arg" in key_nested:
|
|
18
|
-
|
|
18
|
+
|
|
19
|
+
if len(key_nested.split("_"))<3:
|
|
20
|
+
extracted.append(value_nested)
|
|
21
|
+
##
|
|
22
|
+
## prefix_arg_fontsize
|
|
23
|
+
else:
|
|
24
|
+
print(key_nested.split("_")[-1], value)
|
|
25
|
+
extracted_single[key_nested.split("_")[-1]] = value_nested
|
|
19
26
|
else:
|
|
27
|
+
# local kwargs
|
|
20
28
|
if prefix in key and "arg" in key:
|
|
21
29
|
extracted.append(value)
|
|
30
|
+
if len(extracted_single.keys()) >0:
|
|
31
|
+
extracted.append(extracted_single)
|
|
22
32
|
merged_arg = _merge_and_sync_dic(extracted, default)
|
|
23
|
-
return merged_arg
|
|
33
|
+
return merged_arg
|
|
34
|
+
|
|
35
|
+
def _merge_and_sync_dic(list_of_dics:list, default:dict) -> dict:
|
|
36
|
+
temp = copy.copy(default)
|
|
37
|
+
for dic in list_of_dics:
|
|
38
|
+
if isinstance(dic, dict):
|
|
39
|
+
temp.update(dic)
|
|
40
|
+
return temp
|
|
@@ -28,6 +28,7 @@ from gwaslab.util_in_filter_value import _extract
|
|
|
28
28
|
def _to_format(sumstats,
|
|
29
29
|
path="./sumstats",
|
|
30
30
|
fmt="gwaslab",
|
|
31
|
+
tab_fmt="tsv",
|
|
31
32
|
extract=None,
|
|
32
33
|
exclude=None,
|
|
33
34
|
cols=None,
|
|
@@ -39,7 +40,6 @@ def _to_format(sumstats,
|
|
|
39
40
|
n=None,
|
|
40
41
|
no_status=False,
|
|
41
42
|
output_log=True,
|
|
42
|
-
to_csvargs=None,
|
|
43
43
|
float_formats=None,
|
|
44
44
|
xymt_number=False,
|
|
45
45
|
xymt=None,
|
|
@@ -47,20 +47,30 @@ def _to_format(sumstats,
|
|
|
47
47
|
meta=None,
|
|
48
48
|
ssfmeta=False,
|
|
49
49
|
md5sum=False,
|
|
50
|
+
gzip=True,
|
|
50
51
|
bgzip=False,
|
|
51
52
|
tabix=False,
|
|
52
53
|
tabix_indexargs={},
|
|
54
|
+
to_csvargs=None,
|
|
55
|
+
to_tabular_kwargs=None,
|
|
53
56
|
log=Log(),
|
|
54
57
|
verbose=True):
|
|
55
58
|
|
|
56
|
-
if
|
|
57
|
-
to_csvargs
|
|
59
|
+
if to_csvargs is None:
|
|
60
|
+
to_csvargs=dict()
|
|
61
|
+
if tabix_indexargs is None:
|
|
62
|
+
tabix_indexargs=dict()
|
|
63
|
+
if to_tabular_kwargs is None:
|
|
64
|
+
to_tabular_kwargs=dict()
|
|
58
65
|
if float_formats is None:
|
|
59
|
-
float_formats=
|
|
66
|
+
float_formats=dict()
|
|
60
67
|
if cols is None:
|
|
61
68
|
cols=[]
|
|
62
69
|
if xymt is None:
|
|
63
70
|
xymt = ["X","Y","MT"]
|
|
71
|
+
non_gzip_tab_fmt = ["parquet"]
|
|
72
|
+
non_md5sum_tab_fmt = ["parquet"]
|
|
73
|
+
|
|
64
74
|
onetime_log = copy.deepcopy(log)
|
|
65
75
|
|
|
66
76
|
#######################################################################################################
|
|
@@ -154,6 +164,7 @@ def _to_format(sumstats,
|
|
|
154
164
|
tofmt(output,
|
|
155
165
|
path=path,
|
|
156
166
|
fmt=fmt,
|
|
167
|
+
tab_fmt=tab_fmt,
|
|
157
168
|
cols=cols,
|
|
158
169
|
suffix=suffix,
|
|
159
170
|
build=build,
|
|
@@ -164,9 +175,13 @@ def _to_format(sumstats,
|
|
|
164
175
|
chr_prefix=chr_prefix,
|
|
165
176
|
meta=meta,
|
|
166
177
|
ssfmeta=ssfmeta,
|
|
178
|
+
gzip=gzip,
|
|
167
179
|
bgzip=bgzip,
|
|
180
|
+
non_gzip_tab_fmt=non_gzip_tab_fmt,
|
|
181
|
+
non_md5sum_tab_fmt=non_md5sum_tab_fmt,
|
|
168
182
|
tabix=tabix,
|
|
169
183
|
tabix_indexargs=tabix_indexargs,
|
|
184
|
+
to_tabular_kwargs=to_tabular_kwargs,
|
|
170
185
|
md5sum=md5sum,
|
|
171
186
|
xymt_number=xymt_number,
|
|
172
187
|
xymt=xymt)
|
|
@@ -186,6 +201,7 @@ def tofmt(sumstats,
|
|
|
186
201
|
path=None,
|
|
187
202
|
suffix=None,
|
|
188
203
|
fmt=None,
|
|
204
|
+
tab_fmt="csv",
|
|
189
205
|
cols=[],
|
|
190
206
|
xymt_number=False,
|
|
191
207
|
xymt=["X","Y","MT"],
|
|
@@ -194,15 +210,16 @@ def tofmt(sumstats,
|
|
|
194
210
|
ssfmeta=False,
|
|
195
211
|
md5sum=False,
|
|
196
212
|
bgzip=False,
|
|
213
|
+
gzip=True,
|
|
214
|
+
non_gzip_tab_fmt=None,
|
|
215
|
+
non_md5sum_tab_fmt=None,
|
|
197
216
|
tabix=False,
|
|
198
|
-
tabix_indexargs=
|
|
217
|
+
tabix_indexargs=None,
|
|
199
218
|
verbose=True,
|
|
200
219
|
no_status=False,
|
|
201
220
|
log=Log(),
|
|
202
|
-
to_csvargs=None
|
|
203
|
-
|
|
204
|
-
if to_csvargs is None:
|
|
205
|
-
to_csvargs=dict()
|
|
221
|
+
to_csvargs=None,
|
|
222
|
+
to_tabular_kwargs=None):
|
|
206
223
|
|
|
207
224
|
if fmt in ["ssf"]:
|
|
208
225
|
xymt_number=True
|
|
@@ -336,36 +353,86 @@ def tofmt(sumstats,
|
|
|
336
353
|
_bgzip_tabix_md5sum(path, fmt, bgzip, md5sum, tabix, tabix_indexargs, log, verbose)
|
|
337
354
|
|
|
338
355
|
####################################################################################################################
|
|
339
|
-
elif fmt in get_formats_list():
|
|
356
|
+
elif fmt in get_formats_list() :
|
|
340
357
|
# tabular
|
|
341
358
|
log.write(" -"+fmt+" format will be loaded...",verbose=verbose)
|
|
342
359
|
meta_data,rename_dictionary = get_format_dict(fmt,inverse=True)
|
|
343
360
|
print_format_info(fmt=fmt, meta_data=meta_data,rename_dictionary=rename_dictionary,verbose=verbose, log=log, output=True)
|
|
344
361
|
|
|
345
|
-
|
|
346
|
-
|
|
362
|
+
# determine if gzip or not / create path for output
|
|
363
|
+
if gzip ==True and tab_fmt not in non_gzip_tab_fmt:
|
|
364
|
+
path = path + "."+suffix+".{}.gz".format(tab_fmt)
|
|
365
|
+
else:
|
|
366
|
+
path = path + "."+suffix+".{}".format(tab_fmt)
|
|
367
|
+
|
|
368
|
+
yaml_path = path + "."+suffix+".{}-meta.yaml".format(tab_fmt)
|
|
347
369
|
log.write(" -Output path:",path, verbose=verbose)
|
|
348
|
-
|
|
370
|
+
|
|
349
371
|
sumstats,to_csvargs = _configure_output_cols_and_args(sumstats, rename_dictionary, cols, no_status, path, meta_data, to_csvargs, log, verbose)
|
|
350
372
|
|
|
351
373
|
log.write(" -Writing sumstats to: {}...".format(path),verbose=verbose)
|
|
352
|
-
try:
|
|
353
|
-
fast_to_csv(sumstats, path, to_csvargs=to_csvargs, compress=True, write_in_chunks=True)
|
|
354
|
-
except:
|
|
355
|
-
log.write(f"Error in using fast_to_csv. Falling back to original implementation.",verbose=verbose)
|
|
356
|
-
sumstats.to_csv(path, index=None, **to_csvargs)
|
|
357
|
-
|
|
358
|
-
if md5sum == True:
|
|
359
|
-
md5_value = md5sum_file(path,log,verbose)
|
|
360
|
-
else:
|
|
361
|
-
md5_value = calculate_md5sum_file(path)
|
|
362
374
|
|
|
375
|
+
#if tab_fmt=="tsv" or tab_fmt=="csv":
|
|
376
|
+
# try:
|
|
377
|
+
# log.write(f" -Fast to csv mode...",verbose=verbose)
|
|
378
|
+
# fast_to_csv(sumstats, path, to_csvargs=to_csvargs, compress=True, write_in_chunks=True)
|
|
379
|
+
# except:
|
|
380
|
+
# log.write(f"Error in using fast_to_csv. Falling back to original implementation.",verbose=verbose)
|
|
381
|
+
# sumstats.to_csv(path, index=None, **to_csvargs)
|
|
382
|
+
#
|
|
383
|
+
#elif tab_fmt=="parquet":
|
|
384
|
+
# sumstats.to_parquet(path, index=None, **to_tabular_kwargs)
|
|
385
|
+
_write_tabular(sumstats,rename_dictionary, path, tab_fmt, to_csvargs, to_tabular_kwargs, log, verbose)
|
|
386
|
+
|
|
387
|
+
if tab_fmt not in non_md5sum_tab_fmt and "@" not in path:
|
|
388
|
+
if md5sum == True:
|
|
389
|
+
# write a md5sum file
|
|
390
|
+
md5_value = md5sum_file(path,log,verbose)
|
|
391
|
+
else:
|
|
392
|
+
# calculate md5sum without saveing a file
|
|
393
|
+
md5_value = calculate_md5sum_file(path)
|
|
394
|
+
else:
|
|
395
|
+
md5_value = "NA"
|
|
396
|
+
|
|
363
397
|
## update ssf-style meta data and export to yaml file
|
|
364
398
|
_configure_ssf_meta(sumstats, fmt, ssfmeta, meta, meta_data, path, md5_value, yaml_path, log, verbose)
|
|
365
399
|
|
|
366
400
|
return sumstats
|
|
367
401
|
|
|
368
402
|
####################################################################################################################
|
|
403
|
+
def _write_tabular(sumstats,rename_dictionary, path, tab_fmt, to_csvargs, to_tabular_kwargs, log, verbose):
|
|
404
|
+
chr_header = rename_dictionary["CHR"]
|
|
405
|
+
if tab_fmt=="tsv" or tab_fmt=="csv":
|
|
406
|
+
try:
|
|
407
|
+
log.write(f" -Fast to csv mode...",verbose=verbose)
|
|
408
|
+
if "@" in path:
|
|
409
|
+
log.write(f" -@ detected: writing each chromosome to a single file...",verbose=verbose)
|
|
410
|
+
log.write(" -Chromosomes:{}...".format(list(sumstats["CHR"].unique())),verbose=verbose)
|
|
411
|
+
for single_chr in list(sumstats["CHR"].unique()):
|
|
412
|
+
single_path = path.replace("@",single_chr)
|
|
413
|
+
|
|
414
|
+
fast_to_csv(sumstats.loc[sumstats[chr_header]==single_chr,:],
|
|
415
|
+
single_path,
|
|
416
|
+
to_csvargs=to_csvargs, compress=True, write_in_chunks=True)
|
|
417
|
+
else:
|
|
418
|
+
fast_to_csv(sumstats, path, to_csvargs=to_csvargs, compress=True, write_in_chunks=True)
|
|
419
|
+
except:
|
|
420
|
+
log.write(f"Error in using fast_to_csv. Falling back to original implementation.",verbose=verbose)
|
|
421
|
+
if "@" in path:
|
|
422
|
+
log.write(f" -@ detected: writing each chromosome to a single file...",verbose=verbose)
|
|
423
|
+
log.write(" -Chromosomes:{}...".format(list(sumstats["CHR"].unique())),verbose=verbose)
|
|
424
|
+
for single_chr in list(sumstats["CHR"].unique()):
|
|
425
|
+
single_path = path.replace("@",single_chr)
|
|
426
|
+
|
|
427
|
+
sumstats.loc[sumstats[chr_header]==single_chr,:].to_csv(path, index=None, **to_csvargs)
|
|
428
|
+
else:
|
|
429
|
+
sumstats.to_csv(path, index=None, **to_csvargs)
|
|
430
|
+
|
|
431
|
+
elif tab_fmt=="parquet":
|
|
432
|
+
sumstats.to_parquet(path, index=None, **to_tabular_kwargs)
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
|
|
369
436
|
def fast_to_csv(dataframe, path, to_csvargs=None, compress=True, write_in_chunks=True):
|
|
370
437
|
df_numpy = dataframe.to_numpy()
|
|
371
438
|
|
|
@@ -31,6 +31,7 @@ def annotate_single(
|
|
|
31
31
|
arm_scale_d,
|
|
32
32
|
arm_offset,
|
|
33
33
|
anno_adjust,
|
|
34
|
+
anno_xshift,
|
|
34
35
|
anno_fixed_arm_length,
|
|
35
36
|
maxy,
|
|
36
37
|
anno_fontsize,
|
|
@@ -158,6 +159,8 @@ def annotate_single(
|
|
|
158
159
|
if anno_fixed_arm_length is not None:
|
|
159
160
|
xytext=(row["i"],row["scaled_P"] + 0.2 + anno_fixed_arm_length)
|
|
160
161
|
|
|
162
|
+
if anno_xshift is not None:
|
|
163
|
+
xytext = (xytext[0] +(anno_xshift*y_span), xytext[1])
|
|
161
164
|
################################################################################################################################
|
|
162
165
|
# if not changing the directions of some annotation arror arms
|
|
163
166
|
if anno_count not in anno_d.keys():
|
|
@@ -99,7 +99,7 @@ def _plot_chromatin_state(region_chromatin_files,
|
|
|
99
99
|
color=color_dict_i[row["STATE_i"]]
|
|
100
100
|
ax.plot([offset_i + row["START"] ,offset_i + row["END"]],
|
|
101
101
|
[i*0.1,i*0.1],
|
|
102
|
-
c=color/255,linewidth=points_for_01,solid_capstyle="butt")
|
|
102
|
+
c=color/255,linewidth=points_for_01,solid_capstyle="butt",rasterized=True)
|
|
103
103
|
|
|
104
104
|
## add stripe label
|
|
105
105
|
if len(region_chromatin_labels) == len(region_chromatin_files):
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
import numpy as np
|
|
3
3
|
from gwaslab.g_Log import Log
|
|
4
|
+
from matplotlib import ticker
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
4
6
|
from gwaslab.bd_common_data import get_chr_to_number
|
|
5
7
|
from gwaslab.bd_common_data import get_number_to_chr
|
|
6
8
|
from math import ceil
|
|
@@ -350,7 +352,7 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
|
|
|
350
352
|
#sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"] = (sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"]-cut)/cutfactor + cut
|
|
351
353
|
|
|
352
354
|
maxy = (maxticker-cut)/cutfactor + cut
|
|
353
|
-
|
|
355
|
+
|
|
354
356
|
return series, maxy, maxticker, cut, cutfactor,ylabels,lines_to_plot
|
|
355
357
|
|
|
356
358
|
#def _cut_line(level, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, log):
|
|
@@ -379,10 +381,9 @@ def _set_yticklabels(cut,
|
|
|
379
381
|
log.write(" -Processing Y tick lables...",verbose=verbose)
|
|
380
382
|
# if no cut
|
|
381
383
|
if cut == 0:
|
|
382
|
-
ax1.set_ylim(skip, ceil(maxy*1.2) )
|
|
383
|
-
|
|
384
|
+
ax1.set_ylim((skip, ceil(maxy*1.2)) )
|
|
384
385
|
# if cut
|
|
385
|
-
if cut:
|
|
386
|
+
if cut!=0:
|
|
386
387
|
# add cut line
|
|
387
388
|
|
|
388
389
|
cutline = ax1.axhline(y=cut, linewidth = sc_linewidth,linestyle="--",color=cut_line_color,zorder=1)
|
|
@@ -432,14 +433,15 @@ def _set_yticklabels(cut,
|
|
|
432
433
|
else:
|
|
433
434
|
ax1.set_yticks(ticks1+ticks2)
|
|
434
435
|
ax1.set_yticklabels(tickslabel1+tickslabel2,fontsize=fontsize,family=font_family)
|
|
435
|
-
ax1.set_ylim(bottom = skip)
|
|
436
436
|
|
|
437
437
|
if ylabels is not None:
|
|
438
438
|
ax1.set_yticks(ylabels_converted)
|
|
439
439
|
ax1.set_yticklabels(ylabels,fontsize=fontsize,family=font_family)
|
|
440
440
|
|
|
441
|
+
ylim_top = ax1.get_ylim()[1]
|
|
442
|
+
ax1.set_ybound(lower=skip,upper=ylim_top)
|
|
441
443
|
ax1.tick_params(axis='y', labelsize=fontsize)
|
|
442
|
-
|
|
444
|
+
|
|
443
445
|
return ax1
|
|
444
446
|
|
|
445
447
|
def _jagged_y(cut,skip,ax1,mode,mqqratio,jagged_len,jagged_wid, log=Log(), verbose=True):
|
|
@@ -51,7 +51,8 @@ def get_default_path(keyword,fmt="png"):
|
|
|
51
51
|
"miami":"miami",
|
|
52
52
|
"esc":"effect_size_comparision",
|
|
53
53
|
"afc":"allele_frequency_comparision",
|
|
54
|
-
"gwheatmap":"genome_wide_heatmap"
|
|
54
|
+
"gwheatmap":"genome_wide_heatmap",
|
|
55
|
+
"scatter":"scatter"
|
|
55
56
|
}
|
|
56
57
|
prefix = path_dictionary[keyword]
|
|
57
58
|
count = 1
|
|
@@ -25,6 +25,7 @@ from gwaslab.viz_plot_qqplot import _plot_qq
|
|
|
25
25
|
from gwaslab.hm_harmonize_sumstats import auto_check_vcf_chr_dict
|
|
26
26
|
from gwaslab.viz_plot_regional2 import _plot_regional
|
|
27
27
|
from gwaslab.viz_plot_regional2 import process_vcf
|
|
28
|
+
from gwaslab.viz_plot_regional2 import _get_lead_id
|
|
28
29
|
from gwaslab.viz_aux_quickfix import _get_largenumber
|
|
29
30
|
from gwaslab.viz_aux_quickfix import _quick_fix_p_value
|
|
30
31
|
from gwaslab.viz_aux_quickfix import _quick_fix_pos
|
|
@@ -107,6 +108,7 @@ def mqqplot(insumstats,
|
|
|
107
108
|
region_flank_factor = 0.05,
|
|
108
109
|
region_anno_bbox_args = None,
|
|
109
110
|
region_marker_shapes=None,
|
|
111
|
+
region_legend_marker=True,
|
|
110
112
|
cbar_title='LD $r^{2}$ with variant',
|
|
111
113
|
cbar_fontsize = None,
|
|
112
114
|
cbar_font_family = None,
|
|
@@ -138,6 +140,7 @@ def mqqplot(insumstats,
|
|
|
138
140
|
anno_source = "ensembl",
|
|
139
141
|
anno_gtf_path=None,
|
|
140
142
|
anno_adjust=False,
|
|
143
|
+
anno_xshift=None,
|
|
141
144
|
anno_max_iter=100,
|
|
142
145
|
arrow_kwargs=None,
|
|
143
146
|
arm_offset=None,
|
|
@@ -233,7 +236,7 @@ def mqqplot(insumstats,
|
|
|
233
236
|
if "dpi" not in fig_args.keys():
|
|
234
237
|
fig_args["dpi"] = dpi
|
|
235
238
|
if region_anno_bbox_args is None:
|
|
236
|
-
region_anno_bbox_args =
|
|
239
|
+
region_anno_bbox_args = {"ec":"None","fc":"None"}
|
|
237
240
|
if anno_set is None:
|
|
238
241
|
anno_set=list()
|
|
239
242
|
if anno_alias is None:
|
|
@@ -264,7 +267,7 @@ def mqqplot(insumstats,
|
|
|
264
267
|
|
|
265
268
|
if region_marker_shapes is None:
|
|
266
269
|
# 9 shapes
|
|
267
|
-
region_marker_shapes = ['o', '
|
|
270
|
+
region_marker_shapes = ['o', '^','s','D','*','P','X','h','8']
|
|
268
271
|
if region_grid_line is None:
|
|
269
272
|
region_grid_line = {"linewidth": 2,"linestyle":"--"}
|
|
270
273
|
if region_lead_grid_line is None:
|
|
@@ -626,7 +629,8 @@ def mqqplot(insumstats,
|
|
|
626
629
|
sumstats["chr_hue"]=sumstats["LD"]
|
|
627
630
|
|
|
628
631
|
## default seetings
|
|
629
|
-
|
|
632
|
+
# assign to_plot for scatter plot
|
|
633
|
+
to_plot = None
|
|
630
634
|
palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
|
|
631
635
|
|
|
632
636
|
legend = None
|
|
@@ -639,7 +643,18 @@ def mqqplot(insumstats,
|
|
|
639
643
|
legend=None
|
|
640
644
|
linewidth=1
|
|
641
645
|
if len(region_ref) == 1:
|
|
646
|
+
# hide lead variants -> add back in region plot
|
|
642
647
|
palette = {100+i:region_ld_colors[i] for i in range(len(region_ld_colors))}
|
|
648
|
+
scatter_args["markers"]= {(i+1):m for i,m in enumerate(region_marker_shapes[:2])}
|
|
649
|
+
if region_ref[0] is None:
|
|
650
|
+
id_to_hide = sumstats["scaled_P"].idxmax()
|
|
651
|
+
to_plot = sumstats.drop(id_to_hide, axis=0)
|
|
652
|
+
else:
|
|
653
|
+
#id_to_hide = sumstats[sumstats["SNPID"]==region_ref[0],"scaled_P"].idxmax()
|
|
654
|
+
id_to_hide = _get_lead_id(sumstats, region_ref, log=log, verbose=verbose)
|
|
655
|
+
if id_to_hide is not None:
|
|
656
|
+
to_plot = sumstats.drop(id_to_hide, axis=0)
|
|
657
|
+
style="SHAPE"
|
|
643
658
|
else:
|
|
644
659
|
palette = {}
|
|
645
660
|
region_color_maps = []
|
|
@@ -652,21 +667,24 @@ def mqqplot(insumstats,
|
|
|
652
667
|
# 1 + 5 + 1
|
|
653
668
|
region_ld_colors_single = [region_ld_colors[0]] + output_hex_colors + [output_hex_colors[-1]]
|
|
654
669
|
region_color_maps.append(region_ld_colors_single)
|
|
655
|
-
|
|
670
|
+
|
|
671
|
+
# gradient color dict
|
|
656
672
|
for i, hex_colors in enumerate(region_color_maps):
|
|
657
673
|
for j, hex_color in enumerate(hex_colors):
|
|
658
674
|
palette[(i+1)*100 + j ] = hex_color
|
|
659
675
|
|
|
660
676
|
edgecolor="none"
|
|
677
|
+
# create a marker shape dict
|
|
661
678
|
scatter_args["markers"]= {(i+1):m for i,m in enumerate(region_marker_shapes[:len(region_ref)])}
|
|
662
679
|
style="SHAPE"
|
|
663
680
|
|
|
664
|
-
|
|
681
|
+
|
|
665
682
|
## if highlight
|
|
666
683
|
highlight_i = pd.DataFrame()
|
|
667
684
|
if len(highlight) >0:
|
|
685
|
+
to_plot = sumstats
|
|
668
686
|
log.write(" -Creating background plot...",verbose=verbose)
|
|
669
|
-
plot = sns.scatterplot(data=
|
|
687
|
+
plot = sns.scatterplot(data=to_plot, x='i', y='scaled_P',
|
|
670
688
|
hue='chr_hue',
|
|
671
689
|
palette=palette,
|
|
672
690
|
legend=legend,
|
|
@@ -678,7 +696,7 @@ def mqqplot(insumstats,
|
|
|
678
696
|
if pd.api.types.is_list_like(highlight[0]) and highlight_chrpos==False:
|
|
679
697
|
for i, highlight_set in enumerate(highlight):
|
|
680
698
|
log.write(" -Highlighting set {} target loci...".format(i+1),verbose=verbose)
|
|
681
|
-
sns.scatterplot(data=
|
|
699
|
+
sns.scatterplot(data=to_plot.loc[to_plot["HUE"]==i], x='i', y='scaled_P',
|
|
682
700
|
hue="HUE",
|
|
683
701
|
palette={i:highlight_color[i%len(highlight_color)]},
|
|
684
702
|
legend=legend,
|
|
@@ -687,10 +705,10 @@ def mqqplot(insumstats,
|
|
|
687
705
|
sizes=(marker_size[0]+1,marker_size[1]+1),
|
|
688
706
|
linewidth=linewidth,
|
|
689
707
|
zorder=3+i,ax=ax1,edgecolor=edgecolor,**scatter_args)
|
|
690
|
-
highlight_i =
|
|
708
|
+
highlight_i = to_plot.loc[~to_plot["HUE"].isna(),"i"].values
|
|
691
709
|
else:
|
|
692
710
|
log.write(" -Highlighting target loci...",verbose=verbose)
|
|
693
|
-
sns.scatterplot(data=
|
|
711
|
+
sns.scatterplot(data=to_plot.loc[to_plot["HUE"]==0], x='i', y='scaled_P',
|
|
694
712
|
hue="HUE",
|
|
695
713
|
palette={0:highlight_color},
|
|
696
714
|
legend=legend,
|
|
@@ -701,7 +719,7 @@ def mqqplot(insumstats,
|
|
|
701
719
|
zorder=3,ax=ax1,edgecolor=edgecolor,**scatter_args)
|
|
702
720
|
# for annotate
|
|
703
721
|
if highlight_chrpos==False:
|
|
704
|
-
highlight_i =
|
|
722
|
+
highlight_i = to_plot.loc[to_plot[snpid].isin(highlight),"i"].values
|
|
705
723
|
else:
|
|
706
724
|
highlight_i = []
|
|
707
725
|
|
|
@@ -739,7 +757,8 @@ def mqqplot(insumstats,
|
|
|
739
757
|
s = "s"
|
|
740
758
|
hue = 'chr_hue'
|
|
741
759
|
hue_norm=None
|
|
742
|
-
to_plot
|
|
760
|
+
if to_plot is None:
|
|
761
|
+
to_plot = sumstats
|
|
743
762
|
log.write(" -Creating background plot...",verbose=verbose)
|
|
744
763
|
plot = sns.scatterplot(data=to_plot, x='i', y='scaled_P',
|
|
745
764
|
hue=hue,
|
|
@@ -814,6 +833,7 @@ def mqqplot(insumstats,
|
|
|
814
833
|
region_title=region_title,
|
|
815
834
|
region_title_args=region_title_args,
|
|
816
835
|
region_ld_legend = region_ld_legend,
|
|
836
|
+
region_legend_marker=region_legend_marker,
|
|
817
837
|
region_ld_threshold = region_ld_threshold,
|
|
818
838
|
region_ld_colors = region_ld_colors,
|
|
819
839
|
palette = palette,
|
|
@@ -996,6 +1016,7 @@ def mqqplot(insumstats,
|
|
|
996
1016
|
arm_scale_d=arm_scale_d,
|
|
997
1017
|
arm_offset=arm_offset,
|
|
998
1018
|
anno_adjust=anno_adjust,
|
|
1019
|
+
anno_xshift=anno_xshift,
|
|
999
1020
|
anno_fixed_arm_length=anno_fixed_arm_length,
|
|
1000
1021
|
maxy=maxy,
|
|
1001
1022
|
anno_fontsize= anno_fontsize,
|
|
@@ -1396,14 +1417,19 @@ def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log
|
|
|
1396
1417
|
|
|
1397
1418
|
def _process_ytick(ax1, fontsize, font_family, ax4, log=Log(),verbose=True):
|
|
1398
1419
|
log.write(" -Processing Y labels...",verbose=verbose)
|
|
1399
|
-
ax1_yticklabels = ax1.get_yticklabels()
|
|
1420
|
+
#ax1_yticklabels = ax1.get_yticklabels()
|
|
1421
|
+
#print(ax1_yticklabels)
|
|
1422
|
+
#plt.draw()
|
|
1423
|
+
#ax1_yticks = ax1.get_yticks()
|
|
1424
|
+
#print(ax1_yticks)
|
|
1400
1425
|
#ax1.set_yticklabels(ax1_yticklabels,fontsize=fontsize,family=font_family)
|
|
1401
|
-
|
|
1402
|
-
ax1.set_yticks(ax1_yticks,ax1_yticklabels,fontsize=fontsize,family=font_family)
|
|
1426
|
+
ax1.tick_params(axis='y', labelsize=fontsize,labelfontfamily=font_family)
|
|
1427
|
+
#ax1.set_yticks(ax1_yticks,ax1_yticklabels,fontsize=fontsize,family=font_family)
|
|
1403
1428
|
if ax4 is not None:
|
|
1404
|
-
ax4_yticklabels = ax4.get_yticklabels()
|
|
1405
|
-
ax4_yticks = ax4.get_yticks()
|
|
1406
|
-
ax4.
|
|
1429
|
+
#ax4_yticklabels = ax4.get_yticklabels()
|
|
1430
|
+
#ax4_yticks = ax4.get_yticks()
|
|
1431
|
+
ax4.tick_params(axis='y', labelsize=fontsize,labelfontfamily=font_family)
|
|
1432
|
+
#ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
|
|
1407
1433
|
return ax1, ax4
|
|
1408
1434
|
|
|
1409
1435
|
def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None , log=Log(),verbose=True):
|