gwaslab 3.5.1__tar.gz → 3.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (96) hide show
  1. {gwaslab-3.5.1/src/gwaslab.egg-info → gwaslab-3.5.3}/PKG-INFO +1 -1
  2. {gwaslab-3.5.1 → gwaslab-3.5.3}/pyproject.toml +1 -1
  3. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/__init__.py +1 -0
  4. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_Sumstats.py +7 -1
  5. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_version.py +2 -2
  6. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/io_process_args.py +25 -8
  7. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/io_to_formats.py +90 -23
  8. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_aux_annotate_plot.py +3 -0
  9. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_aux_chromatin.py +1 -1
  10. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_aux_quickfix.py +8 -6
  11. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_aux_save_figure.py +2 -1
  12. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_mqqplot.py +43 -17
  13. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_regional2.py +69 -13
  14. gwaslab-3.5.3/src/gwaslab/viz_plot_scatter_with_reg.py +229 -0
  15. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_stackedregional.py +20 -3
  16. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_trumpetplot.py +5 -5
  17. {gwaslab-3.5.1 → gwaslab-3.5.3/src/gwaslab.egg-info}/PKG-INFO +1 -1
  18. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab.egg-info/SOURCES.txt +1 -0
  19. {gwaslab-3.5.1 → gwaslab-3.5.3}/LICENSE +0 -0
  20. {gwaslab-3.5.1 → gwaslab-3.5.3}/LICENSE_before_v3.4.39 +0 -0
  21. {gwaslab-3.5.1 → gwaslab-3.5.3}/README.md +0 -0
  22. {gwaslab-3.5.1 → gwaslab-3.5.3}/setup.cfg +0 -0
  23. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/bd_common_data.py +0 -0
  24. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/bd_config.py +0 -0
  25. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/bd_download.py +0 -0
  26. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/bd_get_hapmap3.py +0 -0
  27. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/cache_manager.py +0 -0
  28. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/chrx_par/chrx_par_hg19.bed.gz +0 -0
  29. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/chrx_par/chrx_par_hg38.bed.gz +0 -0
  30. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/formatbook.json +0 -0
  31. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz +0 -0
  32. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz +0 -0
  33. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz +0 -0
  34. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz +0 -0
  35. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/data/reference.json +0 -0
  36. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_Log.py +0 -0
  37. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_Phenotypes.py +0 -0
  38. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_SumstatsPair.py +0 -0
  39. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_SumstatsT.py +0 -0
  40. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_Sumstats_summary.py +0 -0
  41. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_meta.py +0 -0
  42. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/g_vchange_status.py +0 -0
  43. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/hm_casting.py +0 -0
  44. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/hm_harmonize_sumstats.py +0 -0
  45. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/hm_rsid_to_chrpos.py +0 -0
  46. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/io_preformat_input.py +0 -0
  47. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/io_read_ldsc.py +0 -0
  48. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/io_read_tabular.py +0 -0
  49. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/io_to_pickle.py +0 -0
  50. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/ldsc_irwls.py +0 -0
  51. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/ldsc_jackknife.py +0 -0
  52. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/ldsc_ldscore.py +0 -0
  53. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/ldsc_parse.py +0 -0
  54. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/ldsc_regressions.py +0 -0
  55. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/ldsc_sumstats.py +0 -0
  56. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/qc_check_datatype.py +0 -0
  57. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/qc_fix_sumstats.py +0 -0
  58. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/run_script.py +0 -0
  59. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_abf_finemapping.py +0 -0
  60. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_calculate_ldmatrix.py +0 -0
  61. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_calculate_prs.py +0 -0
  62. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_gwascatalog.py +0 -0
  63. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_ldproxyfinder.py +0 -0
  64. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_ldsc.py +0 -0
  65. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_plink_filter.py +0 -0
  66. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_process_h5.py +0 -0
  67. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_process_ref.py +0 -0
  68. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_run_2samplemr.py +0 -0
  69. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_run_clumping.py +0 -0
  70. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_run_coloc.py +0 -0
  71. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_ex_run_susie.py +0 -0
  72. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_calculate_gc.py +0 -0
  73. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_calculate_power.py +0 -0
  74. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_convert_h2.py +0 -0
  75. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_correct_winnerscurse.py +0 -0
  76. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_fill_data.py +0 -0
  77. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_filter_value.py +0 -0
  78. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_get_density.py +0 -0
  79. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_get_sig.py +0 -0
  80. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_meta.py +0 -0
  81. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/util_in_snphwe.py +0 -0
  82. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/vis_plot_credible sets.py +0 -0
  83. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_aux_property.py +0 -0
  84. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_aux_reposition_text.py +0 -0
  85. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_compare_af.py +0 -0
  86. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_compare_effect.py +0 -0
  87. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_forestplot.py +0 -0
  88. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_miamiplot.py +0 -0
  89. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_miamiplot2.py +0 -0
  90. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_phe_heatmap.py +0 -0
  91. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_qqplot.py +0 -0
  92. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_regionalplot.py +0 -0
  93. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab/viz_plot_rg_heatmap.py +0 -0
  94. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab.egg-info/dependency_links.txt +0 -0
  95. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab.egg-info/requires.txt +0 -0
  96. {gwaslab-3.5.1 → gwaslab-3.5.3}/src/gwaslab.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gwaslab
3
- Version: 3.5.1
3
+ Version: 3.5.3
4
4
  Summary: A collection of handy tools for GWAS SumStats
5
5
  Author-email: Yunye <yunye@gwaslab.com>
6
6
  Project-URL: Homepage, https://cloufield.github.io/gwaslab/
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
7
7
 
8
8
  [project]
9
9
  name = "gwaslab"
10
- version = "3.5.1"
10
+ version = "3.5.3"
11
11
  authors = [
12
12
  { name="Yunye", email="yunye@gwaslab.com" },
13
13
  ]
@@ -46,3 +46,4 @@ from gwaslab.util_ex_process_h5 import process_vcf_to_hfd5
46
46
  from gwaslab.util_ex_run_susie import _run_susie_rss as run_susie_rss
47
47
  from gwaslab.io_read_tabular import _read_tabular as read_tabular
48
48
  from gwaslab.util_in_meta import meta_analyze
49
+ from gwaslab.viz_plot_scatter_with_reg import scatter
@@ -389,6 +389,12 @@ class Sumstats():
389
389
  self.data =flipallelestats(self.data,log=self.log,**flipallelestats_args)
390
390
 
391
391
  gc.collect()
392
+
393
+ if (ref_seq is not None or ref_infer is not None) and (ref_rsid_tsv is not None or ref_rsid_vcf is not None):
394
+
395
+ self.data = fixID(self.data, log=self.log, **{"fixid":True, "fixsep":True, "overwrite":True})
396
+
397
+ gc.collect()
392
398
 
393
399
  #####################################################
394
400
  if ref_rsid_tsv is not None:
@@ -833,4 +839,4 @@ class Sumstats():
833
839
  def to_format(self, path, build=None, verbose=True, **kwargs):
834
840
  if build is None:
835
841
  build = self.meta["gwaslab"]["genome_build"]
836
- _to_format(self.data, path, log=self.log, verbose=verbose, meta=self.meta, build=build, **kwargs)
842
+ _to_format(self.data, path, log=self.log, verbose=verbose, meta=self.meta, build=build, **kwargs)
@@ -15,8 +15,8 @@ def _get_version():
15
15
  def gwaslab_info():
16
16
  # version meta information
17
17
  dic={
18
- "version":"3.5.1",
19
- "release_date":"20241120"
18
+ "version":"3.5.3",
19
+ "release_date":"20241217"
20
20
  }
21
21
  return dic
22
22
 
@@ -1,23 +1,40 @@
1
1
  import copy
2
- def _merge_and_sync_dic(list_of_dics:list, default:dict) -> dict:
3
- temp = copy.copy(default)
4
- for dic in list_of_dics:
5
- if isinstance(dic, dict):
6
- temp.update(dic)
7
- return temp
8
2
 
9
3
  def _list_func_args(func):
10
4
  return func.__code__.co_varnames
11
5
 
12
6
  def _extract_kwargs(prefix:str, default:dict, kwargs:dict) -> dict:
7
+ # prefix: keyword
8
+ # default: default dict
9
+ # kwargs: all local kwargs + args + kwargs
10
+
13
11
  extracted = []
12
+ extracted_single=dict()
14
13
  for key,value in kwargs.items():
14
+ # kwargs or args
15
15
  if key=="kwargs" or key=="args":
16
16
  for key_nested,value_nested in kwargs[key].items():
17
17
  if prefix in key_nested and "arg" in key_nested:
18
- extracted.append(value_nested)
18
+
19
+ if len(key_nested.split("_"))<3:
20
+ extracted.append(value_nested)
21
+ ##
22
+ ## prefix_arg_fontsize
23
+ else:
24
+ print(key_nested.split("_")[-1], value)
25
+ extracted_single[key_nested.split("_")[-1]] = value_nested
19
26
  else:
27
+ # local kwargs
20
28
  if prefix in key and "arg" in key:
21
29
  extracted.append(value)
30
+ if len(extracted_single.keys()) >0:
31
+ extracted.append(extracted_single)
22
32
  merged_arg = _merge_and_sync_dic(extracted, default)
23
- return merged_arg
33
+ return merged_arg
34
+
35
+ def _merge_and_sync_dic(list_of_dics:list, default:dict) -> dict:
36
+ temp = copy.copy(default)
37
+ for dic in list_of_dics:
38
+ if isinstance(dic, dict):
39
+ temp.update(dic)
40
+ return temp
@@ -28,6 +28,7 @@ from gwaslab.util_in_filter_value import _extract
28
28
  def _to_format(sumstats,
29
29
  path="./sumstats",
30
30
  fmt="gwaslab",
31
+ tab_fmt="tsv",
31
32
  extract=None,
32
33
  exclude=None,
33
34
  cols=None,
@@ -39,7 +40,6 @@ def _to_format(sumstats,
39
40
  n=None,
40
41
  no_status=False,
41
42
  output_log=True,
42
- to_csvargs=None,
43
43
  float_formats=None,
44
44
  xymt_number=False,
45
45
  xymt=None,
@@ -47,20 +47,30 @@ def _to_format(sumstats,
47
47
  meta=None,
48
48
  ssfmeta=False,
49
49
  md5sum=False,
50
+ gzip=True,
50
51
  bgzip=False,
51
52
  tabix=False,
52
53
  tabix_indexargs={},
54
+ to_csvargs=None,
55
+ to_tabular_kwargs=None,
53
56
  log=Log(),
54
57
  verbose=True):
55
58
 
56
- if to_csvargs is None:
57
- to_csvargs = {}
59
+ if to_csvargs is None:
60
+ to_csvargs=dict()
61
+ if tabix_indexargs is None:
62
+ tabix_indexargs=dict()
63
+ if to_tabular_kwargs is None:
64
+ to_tabular_kwargs=dict()
58
65
  if float_formats is None:
59
- float_formats={}
66
+ float_formats=dict()
60
67
  if cols is None:
61
68
  cols=[]
62
69
  if xymt is None:
63
70
  xymt = ["X","Y","MT"]
71
+ non_gzip_tab_fmt = ["parquet"]
72
+ non_md5sum_tab_fmt = ["parquet"]
73
+
64
74
  onetime_log = copy.deepcopy(log)
65
75
 
66
76
  #######################################################################################################
@@ -154,6 +164,7 @@ def _to_format(sumstats,
154
164
  tofmt(output,
155
165
  path=path,
156
166
  fmt=fmt,
167
+ tab_fmt=tab_fmt,
157
168
  cols=cols,
158
169
  suffix=suffix,
159
170
  build=build,
@@ -164,9 +175,13 @@ def _to_format(sumstats,
164
175
  chr_prefix=chr_prefix,
165
176
  meta=meta,
166
177
  ssfmeta=ssfmeta,
178
+ gzip=gzip,
167
179
  bgzip=bgzip,
180
+ non_gzip_tab_fmt=non_gzip_tab_fmt,
181
+ non_md5sum_tab_fmt=non_md5sum_tab_fmt,
168
182
  tabix=tabix,
169
183
  tabix_indexargs=tabix_indexargs,
184
+ to_tabular_kwargs=to_tabular_kwargs,
170
185
  md5sum=md5sum,
171
186
  xymt_number=xymt_number,
172
187
  xymt=xymt)
@@ -186,6 +201,7 @@ def tofmt(sumstats,
186
201
  path=None,
187
202
  suffix=None,
188
203
  fmt=None,
204
+ tab_fmt="csv",
189
205
  cols=[],
190
206
  xymt_number=False,
191
207
  xymt=["X","Y","MT"],
@@ -194,15 +210,16 @@ def tofmt(sumstats,
194
210
  ssfmeta=False,
195
211
  md5sum=False,
196
212
  bgzip=False,
213
+ gzip=True,
214
+ non_gzip_tab_fmt=None,
215
+ non_md5sum_tab_fmt=None,
197
216
  tabix=False,
198
- tabix_indexargs={},
217
+ tabix_indexargs=None,
199
218
  verbose=True,
200
219
  no_status=False,
201
220
  log=Log(),
202
- to_csvargs=None):
203
-
204
- if to_csvargs is None:
205
- to_csvargs=dict()
221
+ to_csvargs=None,
222
+ to_tabular_kwargs=None):
206
223
 
207
224
  if fmt in ["ssf"]:
208
225
  xymt_number=True
@@ -336,36 +353,86 @@ def tofmt(sumstats,
336
353
  _bgzip_tabix_md5sum(path, fmt, bgzip, md5sum, tabix, tabix_indexargs, log, verbose)
337
354
 
338
355
  ####################################################################################################################
339
- elif fmt in get_formats_list():
356
+ elif fmt in get_formats_list() :
340
357
  # tabular
341
358
  log.write(" -"+fmt+" format will be loaded...",verbose=verbose)
342
359
  meta_data,rename_dictionary = get_format_dict(fmt,inverse=True)
343
360
  print_format_info(fmt=fmt, meta_data=meta_data,rename_dictionary=rename_dictionary,verbose=verbose, log=log, output=True)
344
361
 
345
- yaml_path = path + "."+suffix+".tsv-meta.yaml"
346
- path = path + "."+suffix+".tsv.gz"
362
+ # determine if gzip or not / create path for output
363
+ if gzip ==True and tab_fmt not in non_gzip_tab_fmt:
364
+ path = path + "."+suffix+".{}.gz".format(tab_fmt)
365
+ else:
366
+ path = path + "."+suffix+".{}".format(tab_fmt)
367
+
368
+ yaml_path = path + "."+suffix+".{}-meta.yaml".format(tab_fmt)
347
369
  log.write(" -Output path:",path, verbose=verbose)
348
-
370
+
349
371
  sumstats,to_csvargs = _configure_output_cols_and_args(sumstats, rename_dictionary, cols, no_status, path, meta_data, to_csvargs, log, verbose)
350
372
 
351
373
  log.write(" -Writing sumstats to: {}...".format(path),verbose=verbose)
352
- try:
353
- fast_to_csv(sumstats, path, to_csvargs=to_csvargs, compress=True, write_in_chunks=True)
354
- except:
355
- log.write(f"Error in using fast_to_csv. Falling back to original implementation.",verbose=verbose)
356
- sumstats.to_csv(path, index=None, **to_csvargs)
357
-
358
- if md5sum == True:
359
- md5_value = md5sum_file(path,log,verbose)
360
- else:
361
- md5_value = calculate_md5sum_file(path)
362
374
 
375
+ #if tab_fmt=="tsv" or tab_fmt=="csv":
376
+ # try:
377
+ # log.write(f" -Fast to csv mode...",verbose=verbose)
378
+ # fast_to_csv(sumstats, path, to_csvargs=to_csvargs, compress=True, write_in_chunks=True)
379
+ # except:
380
+ # log.write(f"Error in using fast_to_csv. Falling back to original implementation.",verbose=verbose)
381
+ # sumstats.to_csv(path, index=None, **to_csvargs)
382
+ #
383
+ #elif tab_fmt=="parquet":
384
+ # sumstats.to_parquet(path, index=None, **to_tabular_kwargs)
385
+ _write_tabular(sumstats,rename_dictionary, path, tab_fmt, to_csvargs, to_tabular_kwargs, log, verbose)
386
+
387
+ if tab_fmt not in non_md5sum_tab_fmt and "@" not in path:
388
+ if md5sum == True:
389
+ # write a md5sum file
390
+ md5_value = md5sum_file(path,log,verbose)
391
+ else:
392
+ # calculate md5sum without saveing a file
393
+ md5_value = calculate_md5sum_file(path)
394
+ else:
395
+ md5_value = "NA"
396
+
363
397
  ## update ssf-style meta data and export to yaml file
364
398
  _configure_ssf_meta(sumstats, fmt, ssfmeta, meta, meta_data, path, md5_value, yaml_path, log, verbose)
365
399
 
366
400
  return sumstats
367
401
 
368
402
  ####################################################################################################################
403
+ def _write_tabular(sumstats,rename_dictionary, path, tab_fmt, to_csvargs, to_tabular_kwargs, log, verbose):
404
+ chr_header = rename_dictionary["CHR"]
405
+ if tab_fmt=="tsv" or tab_fmt=="csv":
406
+ try:
407
+ log.write(f" -Fast to csv mode...",verbose=verbose)
408
+ if "@" in path:
409
+ log.write(f" -@ detected: writing each chromosome to a single file...",verbose=verbose)
410
+ log.write(" -Chromosomes:{}...".format(list(sumstats["CHR"].unique())),verbose=verbose)
411
+ for single_chr in list(sumstats["CHR"].unique()):
412
+ single_path = path.replace("@",single_chr)
413
+
414
+ fast_to_csv(sumstats.loc[sumstats[chr_header]==single_chr,:],
415
+ single_path,
416
+ to_csvargs=to_csvargs, compress=True, write_in_chunks=True)
417
+ else:
418
+ fast_to_csv(sumstats, path, to_csvargs=to_csvargs, compress=True, write_in_chunks=True)
419
+ except:
420
+ log.write(f"Error in using fast_to_csv. Falling back to original implementation.",verbose=verbose)
421
+ if "@" in path:
422
+ log.write(f" -@ detected: writing each chromosome to a single file...",verbose=verbose)
423
+ log.write(" -Chromosomes:{}...".format(list(sumstats["CHR"].unique())),verbose=verbose)
424
+ for single_chr in list(sumstats["CHR"].unique()):
425
+ single_path = path.replace("@",single_chr)
426
+
427
+ sumstats.loc[sumstats[chr_header]==single_chr,:].to_csv(path, index=None, **to_csvargs)
428
+ else:
429
+ sumstats.to_csv(path, index=None, **to_csvargs)
430
+
431
+ elif tab_fmt=="parquet":
432
+ sumstats.to_parquet(path, index=None, **to_tabular_kwargs)
433
+
434
+
435
+
369
436
  def fast_to_csv(dataframe, path, to_csvargs=None, compress=True, write_in_chunks=True):
370
437
  df_numpy = dataframe.to_numpy()
371
438
 
@@ -31,6 +31,7 @@ def annotate_single(
31
31
  arm_scale_d,
32
32
  arm_offset,
33
33
  anno_adjust,
34
+ anno_xshift,
34
35
  anno_fixed_arm_length,
35
36
  maxy,
36
37
  anno_fontsize,
@@ -158,6 +159,8 @@ def annotate_single(
158
159
  if anno_fixed_arm_length is not None:
159
160
  xytext=(row["i"],row["scaled_P"] + 0.2 + anno_fixed_arm_length)
160
161
 
162
+ if anno_xshift is not None:
163
+ xytext = (xytext[0] +(anno_xshift*y_span), xytext[1])
161
164
  ################################################################################################################################
162
165
  # if not changing the directions of some annotation arror arms
163
166
  if anno_count not in anno_d.keys():
@@ -99,7 +99,7 @@ def _plot_chromatin_state(region_chromatin_files,
99
99
  color=color_dict_i[row["STATE_i"]]
100
100
  ax.plot([offset_i + row["START"] ,offset_i + row["END"]],
101
101
  [i*0.1,i*0.1],
102
- c=color/255,linewidth=points_for_01,solid_capstyle="butt")
102
+ c=color/255,linewidth=points_for_01,solid_capstyle="butt",rasterized=True)
103
103
 
104
104
  ## add stripe label
105
105
  if len(region_chromatin_labels) == len(region_chromatin_files):
@@ -1,6 +1,8 @@
1
1
  import pandas as pd
2
2
  import numpy as np
3
3
  from gwaslab.g_Log import Log
4
+ from matplotlib import ticker
5
+ import matplotlib.pyplot as plt
4
6
  from gwaslab.bd_common_data import get_chr_to_number
5
7
  from gwaslab.bd_common_data import get_number_to_chr
6
8
  from math import ceil
@@ -350,7 +352,7 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
350
352
  #sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"] = (sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"]-cut)/cutfactor + cut
351
353
 
352
354
  maxy = (maxticker-cut)/cutfactor + cut
353
-
355
+
354
356
  return series, maxy, maxticker, cut, cutfactor,ylabels,lines_to_plot
355
357
 
356
358
  #def _cut_line(level, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, log):
@@ -379,10 +381,9 @@ def _set_yticklabels(cut,
379
381
  log.write(" -Processing Y tick lables...",verbose=verbose)
380
382
  # if no cut
381
383
  if cut == 0:
382
- ax1.set_ylim(skip, ceil(maxy*1.2) )
383
-
384
+ ax1.set_ylim((skip, ceil(maxy*1.2)) )
384
385
  # if cut
385
- if cut:
386
+ if cut!=0:
386
387
  # add cut line
387
388
 
388
389
  cutline = ax1.axhline(y=cut, linewidth = sc_linewidth,linestyle="--",color=cut_line_color,zorder=1)
@@ -432,14 +433,15 @@ def _set_yticklabels(cut,
432
433
  else:
433
434
  ax1.set_yticks(ticks1+ticks2)
434
435
  ax1.set_yticklabels(tickslabel1+tickslabel2,fontsize=fontsize,family=font_family)
435
- ax1.set_ylim(bottom = skip)
436
436
 
437
437
  if ylabels is not None:
438
438
  ax1.set_yticks(ylabels_converted)
439
439
  ax1.set_yticklabels(ylabels,fontsize=fontsize,family=font_family)
440
440
 
441
+ ylim_top = ax1.get_ylim()[1]
442
+ ax1.set_ybound(lower=skip,upper=ylim_top)
441
443
  ax1.tick_params(axis='y', labelsize=fontsize)
442
-
444
+
443
445
  return ax1
444
446
 
445
447
  def _jagged_y(cut,skip,ax1,mode,mqqratio,jagged_len,jagged_wid, log=Log(), verbose=True):
@@ -51,7 +51,8 @@ def get_default_path(keyword,fmt="png"):
51
51
  "miami":"miami",
52
52
  "esc":"effect_size_comparision",
53
53
  "afc":"allele_frequency_comparision",
54
- "gwheatmap":"genome_wide_heatmap"
54
+ "gwheatmap":"genome_wide_heatmap",
55
+ "scatter":"scatter"
55
56
  }
56
57
  prefix = path_dictionary[keyword]
57
58
  count = 1
@@ -25,6 +25,7 @@ from gwaslab.viz_plot_qqplot import _plot_qq
25
25
  from gwaslab.hm_harmonize_sumstats import auto_check_vcf_chr_dict
26
26
  from gwaslab.viz_plot_regional2 import _plot_regional
27
27
  from gwaslab.viz_plot_regional2 import process_vcf
28
+ from gwaslab.viz_plot_regional2 import _get_lead_id
28
29
  from gwaslab.viz_aux_quickfix import _get_largenumber
29
30
  from gwaslab.viz_aux_quickfix import _quick_fix_p_value
30
31
  from gwaslab.viz_aux_quickfix import _quick_fix_pos
@@ -107,6 +108,7 @@ def mqqplot(insumstats,
107
108
  region_flank_factor = 0.05,
108
109
  region_anno_bbox_args = None,
109
110
  region_marker_shapes=None,
111
+ region_legend_marker=True,
110
112
  cbar_title='LD $r^{2}$ with variant',
111
113
  cbar_fontsize = None,
112
114
  cbar_font_family = None,
@@ -138,6 +140,7 @@ def mqqplot(insumstats,
138
140
  anno_source = "ensembl",
139
141
  anno_gtf_path=None,
140
142
  anno_adjust=False,
143
+ anno_xshift=None,
141
144
  anno_max_iter=100,
142
145
  arrow_kwargs=None,
143
146
  arm_offset=None,
@@ -233,7 +236,7 @@ def mqqplot(insumstats,
233
236
  if "dpi" not in fig_args.keys():
234
237
  fig_args["dpi"] = dpi
235
238
  if region_anno_bbox_args is None:
236
- region_anno_bbox_args = dict()
239
+ region_anno_bbox_args = {"ec":"None","fc":"None"}
237
240
  if anno_set is None:
238
241
  anno_set=list()
239
242
  if anno_alias is None:
@@ -264,7 +267,7 @@ def mqqplot(insumstats,
264
267
 
265
268
  if region_marker_shapes is None:
266
269
  # 9 shapes
267
- region_marker_shapes = ['o', 's','^','D','*','P','X','h','8']
270
+ region_marker_shapes = ['o', '^','s','D','*','P','X','h','8']
268
271
  if region_grid_line is None:
269
272
  region_grid_line = {"linewidth": 2,"linestyle":"--"}
270
273
  if region_lead_grid_line is None:
@@ -626,7 +629,8 @@ def mqqplot(insumstats,
626
629
  sumstats["chr_hue"]=sumstats["LD"]
627
630
 
628
631
  ## default seetings
629
-
632
+ # assign to_plot for scatter plot
633
+ to_plot = None
630
634
  palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
631
635
 
632
636
  legend = None
@@ -639,7 +643,18 @@ def mqqplot(insumstats,
639
643
  legend=None
640
644
  linewidth=1
641
645
  if len(region_ref) == 1:
646
+ # hide lead variants -> add back in region plot
642
647
  palette = {100+i:region_ld_colors[i] for i in range(len(region_ld_colors))}
648
+ scatter_args["markers"]= {(i+1):m for i,m in enumerate(region_marker_shapes[:2])}
649
+ if region_ref[0] is None:
650
+ id_to_hide = sumstats["scaled_P"].idxmax()
651
+ to_plot = sumstats.drop(id_to_hide, axis=0)
652
+ else:
653
+ #id_to_hide = sumstats[sumstats["SNPID"]==region_ref[0],"scaled_P"].idxmax()
654
+ id_to_hide = _get_lead_id(sumstats, region_ref, log=log, verbose=verbose)
655
+ if id_to_hide is not None:
656
+ to_plot = sumstats.drop(id_to_hide, axis=0)
657
+ style="SHAPE"
643
658
  else:
644
659
  palette = {}
645
660
  region_color_maps = []
@@ -652,21 +667,24 @@ def mqqplot(insumstats,
652
667
  # 1 + 5 + 1
653
668
  region_ld_colors_single = [region_ld_colors[0]] + output_hex_colors + [output_hex_colors[-1]]
654
669
  region_color_maps.append(region_ld_colors_single)
655
- # gradient colors
670
+
671
+ # gradient color dict
656
672
  for i, hex_colors in enumerate(region_color_maps):
657
673
  for j, hex_color in enumerate(hex_colors):
658
674
  palette[(i+1)*100 + j ] = hex_color
659
675
 
660
676
  edgecolor="none"
677
+ # create a marker shape dict
661
678
  scatter_args["markers"]= {(i+1):m for i,m in enumerate(region_marker_shapes[:len(region_ref)])}
662
679
  style="SHAPE"
663
680
 
664
-
681
+
665
682
  ## if highlight
666
683
  highlight_i = pd.DataFrame()
667
684
  if len(highlight) >0:
685
+ to_plot = sumstats
668
686
  log.write(" -Creating background plot...",verbose=verbose)
669
- plot = sns.scatterplot(data=sumstats, x='i', y='scaled_P',
687
+ plot = sns.scatterplot(data=to_plot, x='i', y='scaled_P',
670
688
  hue='chr_hue',
671
689
  palette=palette,
672
690
  legend=legend,
@@ -678,7 +696,7 @@ def mqqplot(insumstats,
678
696
  if pd.api.types.is_list_like(highlight[0]) and highlight_chrpos==False:
679
697
  for i, highlight_set in enumerate(highlight):
680
698
  log.write(" -Highlighting set {} target loci...".format(i+1),verbose=verbose)
681
- sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==i], x='i', y='scaled_P',
699
+ sns.scatterplot(data=to_plot.loc[to_plot["HUE"]==i], x='i', y='scaled_P',
682
700
  hue="HUE",
683
701
  palette={i:highlight_color[i%len(highlight_color)]},
684
702
  legend=legend,
@@ -687,10 +705,10 @@ def mqqplot(insumstats,
687
705
  sizes=(marker_size[0]+1,marker_size[1]+1),
688
706
  linewidth=linewidth,
689
707
  zorder=3+i,ax=ax1,edgecolor=edgecolor,**scatter_args)
690
- highlight_i = sumstats.loc[~sumstats["HUE"].isna(),"i"].values
708
+ highlight_i = to_plot.loc[~to_plot["HUE"].isna(),"i"].values
691
709
  else:
692
710
  log.write(" -Highlighting target loci...",verbose=verbose)
693
- sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==0], x='i', y='scaled_P',
711
+ sns.scatterplot(data=to_plot.loc[to_plot["HUE"]==0], x='i', y='scaled_P',
694
712
  hue="HUE",
695
713
  palette={0:highlight_color},
696
714
  legend=legend,
@@ -701,7 +719,7 @@ def mqqplot(insumstats,
701
719
  zorder=3,ax=ax1,edgecolor=edgecolor,**scatter_args)
702
720
  # for annotate
703
721
  if highlight_chrpos==False:
704
- highlight_i = sumstats.loc[sumstats[snpid].isin(highlight),"i"].values
722
+ highlight_i = to_plot.loc[to_plot[snpid].isin(highlight),"i"].values
705
723
  else:
706
724
  highlight_i = []
707
725
 
@@ -739,7 +757,8 @@ def mqqplot(insumstats,
739
757
  s = "s"
740
758
  hue = 'chr_hue'
741
759
  hue_norm=None
742
- to_plot = sumstats
760
+ if to_plot is None:
761
+ to_plot = sumstats
743
762
  log.write(" -Creating background plot...",verbose=verbose)
744
763
  plot = sns.scatterplot(data=to_plot, x='i', y='scaled_P',
745
764
  hue=hue,
@@ -814,6 +833,7 @@ def mqqplot(insumstats,
814
833
  region_title=region_title,
815
834
  region_title_args=region_title_args,
816
835
  region_ld_legend = region_ld_legend,
836
+ region_legend_marker=region_legend_marker,
817
837
  region_ld_threshold = region_ld_threshold,
818
838
  region_ld_colors = region_ld_colors,
819
839
  palette = palette,
@@ -996,6 +1016,7 @@ def mqqplot(insumstats,
996
1016
  arm_scale_d=arm_scale_d,
997
1017
  arm_offset=arm_offset,
998
1018
  anno_adjust=anno_adjust,
1019
+ anno_xshift=anno_xshift,
999
1020
  anno_fixed_arm_length=anno_fixed_arm_length,
1000
1021
  maxy=maxy,
1001
1022
  anno_fontsize= anno_fontsize,
@@ -1396,14 +1417,19 @@ def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log
1396
1417
 
1397
1418
  def _process_ytick(ax1, fontsize, font_family, ax4, log=Log(),verbose=True):
1398
1419
  log.write(" -Processing Y labels...",verbose=verbose)
1399
- ax1_yticklabels = ax1.get_yticklabels()
1420
+ #ax1_yticklabels = ax1.get_yticklabels()
1421
+ #print(ax1_yticklabels)
1422
+ #plt.draw()
1423
+ #ax1_yticks = ax1.get_yticks()
1424
+ #print(ax1_yticks)
1400
1425
  #ax1.set_yticklabels(ax1_yticklabels,fontsize=fontsize,family=font_family)
1401
- ax1_yticks = ax1.get_yticks()
1402
- ax1.set_yticks(ax1_yticks,ax1_yticklabels,fontsize=fontsize,family=font_family)
1426
+ ax1.tick_params(axis='y', labelsize=fontsize,labelfontfamily=font_family)
1427
+ #ax1.set_yticks(ax1_yticks,ax1_yticklabels,fontsize=fontsize,family=font_family)
1403
1428
  if ax4 is not None:
1404
- ax4_yticklabels = ax4.get_yticklabels()
1405
- ax4_yticks = ax4.get_yticks()
1406
- ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
1429
+ #ax4_yticklabels = ax4.get_yticklabels()
1430
+ #ax4_yticks = ax4.get_yticks()
1431
+ ax4.tick_params(axis='y', labelsize=fontsize,labelfontfamily=font_family)
1432
+ #ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
1407
1433
  return ax1, ax4
1408
1434
 
1409
1435
  def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None , log=Log(),verbose=True):