gwaslab 3.5.1__py3-none-any.whl → 3.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

gwaslab/__init__.py CHANGED
@@ -46,3 +46,4 @@ from gwaslab.util_ex_process_h5 import process_vcf_to_hfd5
46
46
  from gwaslab.util_ex_run_susie import _run_susie_rss as run_susie_rss
47
47
  from gwaslab.io_read_tabular import _read_tabular as read_tabular
48
48
  from gwaslab.util_in_meta import meta_analyze
49
+ from gwaslab.viz_plot_scatter_with_reg import scatter
gwaslab/g_Sumstats.py CHANGED
@@ -389,6 +389,12 @@ class Sumstats():
389
389
  self.data =flipallelestats(self.data,log=self.log,**flipallelestats_args)
390
390
 
391
391
  gc.collect()
392
+
393
+ if (ref_seq is not None or ref_infer is not None) and (ref_rsid_tsv is not None or ref_rsid_vcf is not None):
394
+
395
+ self.data = fixID(self.data, log=self.log, **{"fixid":True, "fixsep":True, "overwrite":True})
396
+
397
+ gc.collect()
392
398
 
393
399
  #####################################################
394
400
  if ref_rsid_tsv is not None:
@@ -833,4 +839,4 @@ class Sumstats():
833
839
  def to_format(self, path, build=None, verbose=True, **kwargs):
834
840
  if build is None:
835
841
  build = self.meta["gwaslab"]["genome_build"]
836
- _to_format(self.data, path, log=self.log, verbose=verbose, meta=self.meta, build=build, **kwargs)
842
+ _to_format(self.data, path, log=self.log, verbose=verbose, meta=self.meta, build=build, **kwargs)
gwaslab/g_version.py CHANGED
@@ -15,8 +15,8 @@ def _get_version():
15
15
  def gwaslab_info():
16
16
  # version meta information
17
17
  dic={
18
- "version":"3.5.1",
19
- "release_date":"20241120"
18
+ "version":"3.5.3",
19
+ "release_date":"20241217"
20
20
  }
21
21
  return dic
22
22
 
@@ -1,23 +1,40 @@
1
1
  import copy
2
- def _merge_and_sync_dic(list_of_dics:list, default:dict) -> dict:
3
- temp = copy.copy(default)
4
- for dic in list_of_dics:
5
- if isinstance(dic, dict):
6
- temp.update(dic)
7
- return temp
8
2
 
9
3
  def _list_func_args(func):
10
4
  return func.__code__.co_varnames
11
5
 
12
6
  def _extract_kwargs(prefix:str, default:dict, kwargs:dict) -> dict:
7
+ # prefix: keyword
8
+ # default: default dict
9
+ # kwargs: all local kwargs + args + kwargs
10
+
13
11
  extracted = []
12
+ extracted_single=dict()
14
13
  for key,value in kwargs.items():
14
+ # kwargs or args
15
15
  if key=="kwargs" or key=="args":
16
16
  for key_nested,value_nested in kwargs[key].items():
17
17
  if prefix in key_nested and "arg" in key_nested:
18
- extracted.append(value_nested)
18
+
19
+ if len(key_nested.split("_"))<3:
20
+ extracted.append(value_nested)
21
+ ##
22
+ ## prefix_arg_fontsize
23
+ else:
24
+ print(key_nested.split("_")[-1], value)
25
+ extracted_single[key_nested.split("_")[-1]] = value_nested
19
26
  else:
27
+ # local kwargs
20
28
  if prefix in key and "arg" in key:
21
29
  extracted.append(value)
30
+ if len(extracted_single.keys()) >0:
31
+ extracted.append(extracted_single)
22
32
  merged_arg = _merge_and_sync_dic(extracted, default)
23
- return merged_arg
33
+ return merged_arg
34
+
35
+ def _merge_and_sync_dic(list_of_dics:list, default:dict) -> dict:
36
+ temp = copy.copy(default)
37
+ for dic in list_of_dics:
38
+ if isinstance(dic, dict):
39
+ temp.update(dic)
40
+ return temp
gwaslab/io_to_formats.py CHANGED
@@ -28,6 +28,7 @@ from gwaslab.util_in_filter_value import _extract
28
28
  def _to_format(sumstats,
29
29
  path="./sumstats",
30
30
  fmt="gwaslab",
31
+ tab_fmt="tsv",
31
32
  extract=None,
32
33
  exclude=None,
33
34
  cols=None,
@@ -39,7 +40,6 @@ def _to_format(sumstats,
39
40
  n=None,
40
41
  no_status=False,
41
42
  output_log=True,
42
- to_csvargs=None,
43
43
  float_formats=None,
44
44
  xymt_number=False,
45
45
  xymt=None,
@@ -47,20 +47,30 @@ def _to_format(sumstats,
47
47
  meta=None,
48
48
  ssfmeta=False,
49
49
  md5sum=False,
50
+ gzip=True,
50
51
  bgzip=False,
51
52
  tabix=False,
52
53
  tabix_indexargs={},
54
+ to_csvargs=None,
55
+ to_tabular_kwargs=None,
53
56
  log=Log(),
54
57
  verbose=True):
55
58
 
56
- if to_csvargs is None:
57
- to_csvargs = {}
59
+ if to_csvargs is None:
60
+ to_csvargs=dict()
61
+ if tabix_indexargs is None:
62
+ tabix_indexargs=dict()
63
+ if to_tabular_kwargs is None:
64
+ to_tabular_kwargs=dict()
58
65
  if float_formats is None:
59
- float_formats={}
66
+ float_formats=dict()
60
67
  if cols is None:
61
68
  cols=[]
62
69
  if xymt is None:
63
70
  xymt = ["X","Y","MT"]
71
+ non_gzip_tab_fmt = ["parquet"]
72
+ non_md5sum_tab_fmt = ["parquet"]
73
+
64
74
  onetime_log = copy.deepcopy(log)
65
75
 
66
76
  #######################################################################################################
@@ -154,6 +164,7 @@ def _to_format(sumstats,
154
164
  tofmt(output,
155
165
  path=path,
156
166
  fmt=fmt,
167
+ tab_fmt=tab_fmt,
157
168
  cols=cols,
158
169
  suffix=suffix,
159
170
  build=build,
@@ -164,9 +175,13 @@ def _to_format(sumstats,
164
175
  chr_prefix=chr_prefix,
165
176
  meta=meta,
166
177
  ssfmeta=ssfmeta,
178
+ gzip=gzip,
167
179
  bgzip=bgzip,
180
+ non_gzip_tab_fmt=non_gzip_tab_fmt,
181
+ non_md5sum_tab_fmt=non_md5sum_tab_fmt,
168
182
  tabix=tabix,
169
183
  tabix_indexargs=tabix_indexargs,
184
+ to_tabular_kwargs=to_tabular_kwargs,
170
185
  md5sum=md5sum,
171
186
  xymt_number=xymt_number,
172
187
  xymt=xymt)
@@ -186,6 +201,7 @@ def tofmt(sumstats,
186
201
  path=None,
187
202
  suffix=None,
188
203
  fmt=None,
204
+ tab_fmt="csv",
189
205
  cols=[],
190
206
  xymt_number=False,
191
207
  xymt=["X","Y","MT"],
@@ -194,15 +210,16 @@ def tofmt(sumstats,
194
210
  ssfmeta=False,
195
211
  md5sum=False,
196
212
  bgzip=False,
213
+ gzip=True,
214
+ non_gzip_tab_fmt=None,
215
+ non_md5sum_tab_fmt=None,
197
216
  tabix=False,
198
- tabix_indexargs={},
217
+ tabix_indexargs=None,
199
218
  verbose=True,
200
219
  no_status=False,
201
220
  log=Log(),
202
- to_csvargs=None):
203
-
204
- if to_csvargs is None:
205
- to_csvargs=dict()
221
+ to_csvargs=None,
222
+ to_tabular_kwargs=None):
206
223
 
207
224
  if fmt in ["ssf"]:
208
225
  xymt_number=True
@@ -336,36 +353,86 @@ def tofmt(sumstats,
336
353
  _bgzip_tabix_md5sum(path, fmt, bgzip, md5sum, tabix, tabix_indexargs, log, verbose)
337
354
 
338
355
  ####################################################################################################################
339
- elif fmt in get_formats_list():
356
+ elif fmt in get_formats_list() :
340
357
  # tabular
341
358
  log.write(" -"+fmt+" format will be loaded...",verbose=verbose)
342
359
  meta_data,rename_dictionary = get_format_dict(fmt,inverse=True)
343
360
  print_format_info(fmt=fmt, meta_data=meta_data,rename_dictionary=rename_dictionary,verbose=verbose, log=log, output=True)
344
361
 
345
- yaml_path = path + "."+suffix+".tsv-meta.yaml"
346
- path = path + "."+suffix+".tsv.gz"
362
+ # determine if gzip or not / create path for output
363
+ if gzip ==True and tab_fmt not in non_gzip_tab_fmt:
364
+ path = path + "."+suffix+".{}.gz".format(tab_fmt)
365
+ else:
366
+ path = path + "."+suffix+".{}".format(tab_fmt)
367
+
368
+ yaml_path = path + "."+suffix+".{}-meta.yaml".format(tab_fmt)
347
369
  log.write(" -Output path:",path, verbose=verbose)
348
-
370
+
349
371
  sumstats,to_csvargs = _configure_output_cols_and_args(sumstats, rename_dictionary, cols, no_status, path, meta_data, to_csvargs, log, verbose)
350
372
 
351
373
  log.write(" -Writing sumstats to: {}...".format(path),verbose=verbose)
352
- try:
353
- fast_to_csv(sumstats, path, to_csvargs=to_csvargs, compress=True, write_in_chunks=True)
354
- except:
355
- log.write(f"Error in using fast_to_csv. Falling back to original implementation.",verbose=verbose)
356
- sumstats.to_csv(path, index=None, **to_csvargs)
357
-
358
- if md5sum == True:
359
- md5_value = md5sum_file(path,log,verbose)
360
- else:
361
- md5_value = calculate_md5sum_file(path)
362
374
 
375
+ #if tab_fmt=="tsv" or tab_fmt=="csv":
376
+ # try:
377
+ # log.write(f" -Fast to csv mode...",verbose=verbose)
378
+ # fast_to_csv(sumstats, path, to_csvargs=to_csvargs, compress=True, write_in_chunks=True)
379
+ # except:
380
+ # log.write(f"Error in using fast_to_csv. Falling back to original implementation.",verbose=verbose)
381
+ # sumstats.to_csv(path, index=None, **to_csvargs)
382
+ #
383
+ #elif tab_fmt=="parquet":
384
+ # sumstats.to_parquet(path, index=None, **to_tabular_kwargs)
385
+ _write_tabular(sumstats,rename_dictionary, path, tab_fmt, to_csvargs, to_tabular_kwargs, log, verbose)
386
+
387
+ if tab_fmt not in non_md5sum_tab_fmt and "@" not in path:
388
+ if md5sum == True:
389
+ # write a md5sum file
390
+ md5_value = md5sum_file(path,log,verbose)
391
+ else:
392
+ # calculate md5sum without saveing a file
393
+ md5_value = calculate_md5sum_file(path)
394
+ else:
395
+ md5_value = "NA"
396
+
363
397
  ## update ssf-style meta data and export to yaml file
364
398
  _configure_ssf_meta(sumstats, fmt, ssfmeta, meta, meta_data, path, md5_value, yaml_path, log, verbose)
365
399
 
366
400
  return sumstats
367
401
 
368
402
  ####################################################################################################################
403
+ def _write_tabular(sumstats,rename_dictionary, path, tab_fmt, to_csvargs, to_tabular_kwargs, log, verbose):
404
+ chr_header = rename_dictionary["CHR"]
405
+ if tab_fmt=="tsv" or tab_fmt=="csv":
406
+ try:
407
+ log.write(f" -Fast to csv mode...",verbose=verbose)
408
+ if "@" in path:
409
+ log.write(f" -@ detected: writing each chromosome to a single file...",verbose=verbose)
410
+ log.write(" -Chromosomes:{}...".format(list(sumstats["CHR"].unique())),verbose=verbose)
411
+ for single_chr in list(sumstats["CHR"].unique()):
412
+ single_path = path.replace("@",single_chr)
413
+
414
+ fast_to_csv(sumstats.loc[sumstats[chr_header]==single_chr,:],
415
+ single_path,
416
+ to_csvargs=to_csvargs, compress=True, write_in_chunks=True)
417
+ else:
418
+ fast_to_csv(sumstats, path, to_csvargs=to_csvargs, compress=True, write_in_chunks=True)
419
+ except:
420
+ log.write(f"Error in using fast_to_csv. Falling back to original implementation.",verbose=verbose)
421
+ if "@" in path:
422
+ log.write(f" -@ detected: writing each chromosome to a single file...",verbose=verbose)
423
+ log.write(" -Chromosomes:{}...".format(list(sumstats["CHR"].unique())),verbose=verbose)
424
+ for single_chr in list(sumstats["CHR"].unique()):
425
+ single_path = path.replace("@",single_chr)
426
+
427
+ sumstats.loc[sumstats[chr_header]==single_chr,:].to_csv(path, index=None, **to_csvargs)
428
+ else:
429
+ sumstats.to_csv(path, index=None, **to_csvargs)
430
+
431
+ elif tab_fmt=="parquet":
432
+ sumstats.to_parquet(path, index=None, **to_tabular_kwargs)
433
+
434
+
435
+
369
436
  def fast_to_csv(dataframe, path, to_csvargs=None, compress=True, write_in_chunks=True):
370
437
  df_numpy = dataframe.to_numpy()
371
438
 
@@ -31,6 +31,7 @@ def annotate_single(
31
31
  arm_scale_d,
32
32
  arm_offset,
33
33
  anno_adjust,
34
+ anno_xshift,
34
35
  anno_fixed_arm_length,
35
36
  maxy,
36
37
  anno_fontsize,
@@ -158,6 +159,8 @@ def annotate_single(
158
159
  if anno_fixed_arm_length is not None:
159
160
  xytext=(row["i"],row["scaled_P"] + 0.2 + anno_fixed_arm_length)
160
161
 
162
+ if anno_xshift is not None:
163
+ xytext = (xytext[0] +(anno_xshift*y_span), xytext[1])
161
164
  ################################################################################################################################
162
165
  # if not changing the directions of some annotation arror arms
163
166
  if anno_count not in anno_d.keys():
@@ -99,7 +99,7 @@ def _plot_chromatin_state(region_chromatin_files,
99
99
  color=color_dict_i[row["STATE_i"]]
100
100
  ax.plot([offset_i + row["START"] ,offset_i + row["END"]],
101
101
  [i*0.1,i*0.1],
102
- c=color/255,linewidth=points_for_01,solid_capstyle="butt")
102
+ c=color/255,linewidth=points_for_01,solid_capstyle="butt",rasterized=True)
103
103
 
104
104
  ## add stripe label
105
105
  if len(region_chromatin_labels) == len(region_chromatin_files):
@@ -1,6 +1,8 @@
1
1
  import pandas as pd
2
2
  import numpy as np
3
3
  from gwaslab.g_Log import Log
4
+ from matplotlib import ticker
5
+ import matplotlib.pyplot as plt
4
6
  from gwaslab.bd_common_data import get_chr_to_number
5
7
  from gwaslab.bd_common_data import get_number_to_chr
6
8
  from math import ceil
@@ -350,7 +352,7 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
350
352
  #sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"] = (sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"]-cut)/cutfactor + cut
351
353
 
352
354
  maxy = (maxticker-cut)/cutfactor + cut
353
-
355
+
354
356
  return series, maxy, maxticker, cut, cutfactor,ylabels,lines_to_plot
355
357
 
356
358
  #def _cut_line(level, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, log):
@@ -379,10 +381,9 @@ def _set_yticklabels(cut,
379
381
  log.write(" -Processing Y tick lables...",verbose=verbose)
380
382
  # if no cut
381
383
  if cut == 0:
382
- ax1.set_ylim(skip, ceil(maxy*1.2) )
383
-
384
+ ax1.set_ylim((skip, ceil(maxy*1.2)) )
384
385
  # if cut
385
- if cut:
386
+ if cut!=0:
386
387
  # add cut line
387
388
 
388
389
  cutline = ax1.axhline(y=cut, linewidth = sc_linewidth,linestyle="--",color=cut_line_color,zorder=1)
@@ -432,14 +433,15 @@ def _set_yticklabels(cut,
432
433
  else:
433
434
  ax1.set_yticks(ticks1+ticks2)
434
435
  ax1.set_yticklabels(tickslabel1+tickslabel2,fontsize=fontsize,family=font_family)
435
- ax1.set_ylim(bottom = skip)
436
436
 
437
437
  if ylabels is not None:
438
438
  ax1.set_yticks(ylabels_converted)
439
439
  ax1.set_yticklabels(ylabels,fontsize=fontsize,family=font_family)
440
440
 
441
+ ylim_top = ax1.get_ylim()[1]
442
+ ax1.set_ybound(lower=skip,upper=ylim_top)
441
443
  ax1.tick_params(axis='y', labelsize=fontsize)
442
-
444
+
443
445
  return ax1
444
446
 
445
447
  def _jagged_y(cut,skip,ax1,mode,mqqratio,jagged_len,jagged_wid, log=Log(), verbose=True):
@@ -51,7 +51,8 @@ def get_default_path(keyword,fmt="png"):
51
51
  "miami":"miami",
52
52
  "esc":"effect_size_comparision",
53
53
  "afc":"allele_frequency_comparision",
54
- "gwheatmap":"genome_wide_heatmap"
54
+ "gwheatmap":"genome_wide_heatmap",
55
+ "scatter":"scatter"
55
56
  }
56
57
  prefix = path_dictionary[keyword]
57
58
  count = 1
@@ -25,6 +25,7 @@ from gwaslab.viz_plot_qqplot import _plot_qq
25
25
  from gwaslab.hm_harmonize_sumstats import auto_check_vcf_chr_dict
26
26
  from gwaslab.viz_plot_regional2 import _plot_regional
27
27
  from gwaslab.viz_plot_regional2 import process_vcf
28
+ from gwaslab.viz_plot_regional2 import _get_lead_id
28
29
  from gwaslab.viz_aux_quickfix import _get_largenumber
29
30
  from gwaslab.viz_aux_quickfix import _quick_fix_p_value
30
31
  from gwaslab.viz_aux_quickfix import _quick_fix_pos
@@ -107,6 +108,7 @@ def mqqplot(insumstats,
107
108
  region_flank_factor = 0.05,
108
109
  region_anno_bbox_args = None,
109
110
  region_marker_shapes=None,
111
+ region_legend_marker=True,
110
112
  cbar_title='LD $r^{2}$ with variant',
111
113
  cbar_fontsize = None,
112
114
  cbar_font_family = None,
@@ -138,6 +140,7 @@ def mqqplot(insumstats,
138
140
  anno_source = "ensembl",
139
141
  anno_gtf_path=None,
140
142
  anno_adjust=False,
143
+ anno_xshift=None,
141
144
  anno_max_iter=100,
142
145
  arrow_kwargs=None,
143
146
  arm_offset=None,
@@ -233,7 +236,7 @@ def mqqplot(insumstats,
233
236
  if "dpi" not in fig_args.keys():
234
237
  fig_args["dpi"] = dpi
235
238
  if region_anno_bbox_args is None:
236
- region_anno_bbox_args = dict()
239
+ region_anno_bbox_args = {"ec":"None","fc":"None"}
237
240
  if anno_set is None:
238
241
  anno_set=list()
239
242
  if anno_alias is None:
@@ -264,7 +267,7 @@ def mqqplot(insumstats,
264
267
 
265
268
  if region_marker_shapes is None:
266
269
  # 9 shapes
267
- region_marker_shapes = ['o', 's','^','D','*','P','X','h','8']
270
+ region_marker_shapes = ['o', '^','s','D','*','P','X','h','8']
268
271
  if region_grid_line is None:
269
272
  region_grid_line = {"linewidth": 2,"linestyle":"--"}
270
273
  if region_lead_grid_line is None:
@@ -626,7 +629,8 @@ def mqqplot(insumstats,
626
629
  sumstats["chr_hue"]=sumstats["LD"]
627
630
 
628
631
  ## default seetings
629
-
632
+ # assign to_plot for scatter plot
633
+ to_plot = None
630
634
  palette = sns.color_palette(colors,n_colors=sumstats[chrom].nunique())
631
635
 
632
636
  legend = None
@@ -639,7 +643,18 @@ def mqqplot(insumstats,
639
643
  legend=None
640
644
  linewidth=1
641
645
  if len(region_ref) == 1:
646
+ # hide lead variants -> add back in region plot
642
647
  palette = {100+i:region_ld_colors[i] for i in range(len(region_ld_colors))}
648
+ scatter_args["markers"]= {(i+1):m for i,m in enumerate(region_marker_shapes[:2])}
649
+ if region_ref[0] is None:
650
+ id_to_hide = sumstats["scaled_P"].idxmax()
651
+ to_plot = sumstats.drop(id_to_hide, axis=0)
652
+ else:
653
+ #id_to_hide = sumstats[sumstats["SNPID"]==region_ref[0],"scaled_P"].idxmax()
654
+ id_to_hide = _get_lead_id(sumstats, region_ref, log=log, verbose=verbose)
655
+ if id_to_hide is not None:
656
+ to_plot = sumstats.drop(id_to_hide, axis=0)
657
+ style="SHAPE"
643
658
  else:
644
659
  palette = {}
645
660
  region_color_maps = []
@@ -652,21 +667,24 @@ def mqqplot(insumstats,
652
667
  # 1 + 5 + 1
653
668
  region_ld_colors_single = [region_ld_colors[0]] + output_hex_colors + [output_hex_colors[-1]]
654
669
  region_color_maps.append(region_ld_colors_single)
655
- # gradient colors
670
+
671
+ # gradient color dict
656
672
  for i, hex_colors in enumerate(region_color_maps):
657
673
  for j, hex_color in enumerate(hex_colors):
658
674
  palette[(i+1)*100 + j ] = hex_color
659
675
 
660
676
  edgecolor="none"
677
+ # create a marker shape dict
661
678
  scatter_args["markers"]= {(i+1):m for i,m in enumerate(region_marker_shapes[:len(region_ref)])}
662
679
  style="SHAPE"
663
680
 
664
-
681
+
665
682
  ## if highlight
666
683
  highlight_i = pd.DataFrame()
667
684
  if len(highlight) >0:
685
+ to_plot = sumstats
668
686
  log.write(" -Creating background plot...",verbose=verbose)
669
- plot = sns.scatterplot(data=sumstats, x='i', y='scaled_P',
687
+ plot = sns.scatterplot(data=to_plot, x='i', y='scaled_P',
670
688
  hue='chr_hue',
671
689
  palette=palette,
672
690
  legend=legend,
@@ -678,7 +696,7 @@ def mqqplot(insumstats,
678
696
  if pd.api.types.is_list_like(highlight[0]) and highlight_chrpos==False:
679
697
  for i, highlight_set in enumerate(highlight):
680
698
  log.write(" -Highlighting set {} target loci...".format(i+1),verbose=verbose)
681
- sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==i], x='i', y='scaled_P',
699
+ sns.scatterplot(data=to_plot.loc[to_plot["HUE"]==i], x='i', y='scaled_P',
682
700
  hue="HUE",
683
701
  palette={i:highlight_color[i%len(highlight_color)]},
684
702
  legend=legend,
@@ -687,10 +705,10 @@ def mqqplot(insumstats,
687
705
  sizes=(marker_size[0]+1,marker_size[1]+1),
688
706
  linewidth=linewidth,
689
707
  zorder=3+i,ax=ax1,edgecolor=edgecolor,**scatter_args)
690
- highlight_i = sumstats.loc[~sumstats["HUE"].isna(),"i"].values
708
+ highlight_i = to_plot.loc[~to_plot["HUE"].isna(),"i"].values
691
709
  else:
692
710
  log.write(" -Highlighting target loci...",verbose=verbose)
693
- sns.scatterplot(data=sumstats.loc[sumstats["HUE"]==0], x='i', y='scaled_P',
711
+ sns.scatterplot(data=to_plot.loc[to_plot["HUE"]==0], x='i', y='scaled_P',
694
712
  hue="HUE",
695
713
  palette={0:highlight_color},
696
714
  legend=legend,
@@ -701,7 +719,7 @@ def mqqplot(insumstats,
701
719
  zorder=3,ax=ax1,edgecolor=edgecolor,**scatter_args)
702
720
  # for annotate
703
721
  if highlight_chrpos==False:
704
- highlight_i = sumstats.loc[sumstats[snpid].isin(highlight),"i"].values
722
+ highlight_i = to_plot.loc[to_plot[snpid].isin(highlight),"i"].values
705
723
  else:
706
724
  highlight_i = []
707
725
 
@@ -739,7 +757,8 @@ def mqqplot(insumstats,
739
757
  s = "s"
740
758
  hue = 'chr_hue'
741
759
  hue_norm=None
742
- to_plot = sumstats
760
+ if to_plot is None:
761
+ to_plot = sumstats
743
762
  log.write(" -Creating background plot...",verbose=verbose)
744
763
  plot = sns.scatterplot(data=to_plot, x='i', y='scaled_P',
745
764
  hue=hue,
@@ -814,6 +833,7 @@ def mqqplot(insumstats,
814
833
  region_title=region_title,
815
834
  region_title_args=region_title_args,
816
835
  region_ld_legend = region_ld_legend,
836
+ region_legend_marker=region_legend_marker,
817
837
  region_ld_threshold = region_ld_threshold,
818
838
  region_ld_colors = region_ld_colors,
819
839
  palette = palette,
@@ -996,6 +1016,7 @@ def mqqplot(insumstats,
996
1016
  arm_scale_d=arm_scale_d,
997
1017
  arm_offset=arm_offset,
998
1018
  anno_adjust=anno_adjust,
1019
+ anno_xshift=anno_xshift,
999
1020
  anno_fixed_arm_length=anno_fixed_arm_length,
1000
1021
  maxy=maxy,
1001
1022
  anno_fontsize= anno_fontsize,
@@ -1396,14 +1417,19 @@ def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log
1396
1417
 
1397
1418
  def _process_ytick(ax1, fontsize, font_family, ax4, log=Log(),verbose=True):
1398
1419
  log.write(" -Processing Y labels...",verbose=verbose)
1399
- ax1_yticklabels = ax1.get_yticklabels()
1420
+ #ax1_yticklabels = ax1.get_yticklabels()
1421
+ #print(ax1_yticklabels)
1422
+ #plt.draw()
1423
+ #ax1_yticks = ax1.get_yticks()
1424
+ #print(ax1_yticks)
1400
1425
  #ax1.set_yticklabels(ax1_yticklabels,fontsize=fontsize,family=font_family)
1401
- ax1_yticks = ax1.get_yticks()
1402
- ax1.set_yticks(ax1_yticks,ax1_yticklabels,fontsize=fontsize,family=font_family)
1426
+ ax1.tick_params(axis='y', labelsize=fontsize,labelfontfamily=font_family)
1427
+ #ax1.set_yticks(ax1_yticks,ax1_yticklabels,fontsize=fontsize,family=font_family)
1403
1428
  if ax4 is not None:
1404
- ax4_yticklabels = ax4.get_yticklabels()
1405
- ax4_yticks = ax4.get_yticks()
1406
- ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
1429
+ #ax4_yticklabels = ax4.get_yticklabels()
1430
+ #ax4_yticks = ax4.get_yticks()
1431
+ ax4.tick_params(axis='y', labelsize=fontsize,labelfontfamily=font_family)
1432
+ #ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
1407
1433
  return ax1, ax4
1408
1434
 
1409
1435
  def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None , log=Log(),verbose=True):
@@ -70,6 +70,7 @@ def _plot_regional(
70
70
  palette=None,
71
71
  region_recombination = True,
72
72
  region_protein_coding=True,
73
+ region_legend_marker=True,
73
74
  region_flank_factor = 0.05,
74
75
  track_font_family="Arial",
75
76
  taf=[4,0,0.95,1,1],
@@ -93,6 +94,7 @@ def _plot_regional(
93
94
  ax1, lead_id_single = _pinpoint_lead(sumstats = sumstats,
94
95
  ax1 = ax1,
95
96
  region_ref=region_ref_single,
97
+ region_ref_total_n = len(region_ref),
96
98
  lead_color = palette[(index+1)*100 + len(region_ld_threshold)+2],
97
99
  marker_size= marker_size,
98
100
  region_marker_shapes=region_marker_shapes,
@@ -130,7 +132,10 @@ def _plot_regional(
130
132
  region_ref=region_ref,
131
133
  region_ld_threshold=region_ld_threshold,
132
134
  region_ref_index_dic=region_ref_index_dic,
133
- palette=palette)
135
+ region_marker_shapes=region_marker_shapes,
136
+ palette=palette,
137
+ region_legend_marker=region_legend_marker,
138
+ fig=fig)
134
139
  else:
135
140
  cbar=None
136
141
 
@@ -241,6 +246,10 @@ def _plot_regional(
241
246
 
242
247
  # + ###########################################################################################################################################################################
243
248
  def _get_lead_id(sumstats=None, region_ref=None, log=None, verbose=True):
249
+ # region_ref_single (not none) -> specified variant ID
250
+ # convert region_ref_single -> lead_id(index)
251
+
252
+ #
244
253
  region_ref_to_check = copy.copy(region_ref)
245
254
  try:
246
255
  if len(region_ref_to_check)>0 and type(region_ref_to_check) is not str:
@@ -250,24 +259,30 @@ def _get_lead_id(sumstats=None, region_ref=None, log=None, verbose=True):
250
259
 
251
260
  lead_id=None
252
261
 
262
+ # match by rsID
253
263
  if "rsID" in sumstats.columns:
254
264
  lead_id = sumstats.index[sumstats["rsID"] == region_ref_to_check].to_list()
255
-
265
+ # match by SNPID
256
266
  if lead_id is None and "SNPID" in sumstats.columns:
257
267
  lead_id = sumstats.index[sumstats["SNPID"] == region_ref_to_check].to_list()
258
268
 
269
+ # if duplicated, select the first one
259
270
  if type(lead_id) is list:
260
271
  if len(lead_id)>0:
261
272
  lead_id = int(lead_id[0])
262
273
 
274
+
263
275
  if region_ref_to_check is not None:
264
276
  if type(lead_id) is list:
265
277
  if len(lead_id)==0 :
266
278
  #try:
279
+ # if region_ref_to_check is in CHR:POS:NEA:EA format
267
280
  matched_snpid = re.match("(chr)?[0-9]+:[0-9]+:[ATCG]+:[ATCG]+", region_ref_to_check, re.IGNORECASE)
268
281
  if matched_snpid is None:
282
+ # if not, pass
269
283
  pass
270
284
  else:
285
+ # if region_ref_to_check is in CHR:POS:NEA:EA format, match by CHR:POS:NEA:EA
271
286
  lead_snpid = matched_snpid.group(0).split(":")
272
287
  if len(lead_snpid)==4:
273
288
  lead_chr= int(lead_snpid[0])
@@ -300,7 +315,7 @@ def _get_lead_id(sumstats=None, region_ref=None, log=None, verbose=True):
300
315
 
301
316
  return lead_id
302
317
 
303
- def _pinpoint_lead(sumstats,ax1,region_ref, lead_color, marker_size, log, verbose, region_marker_shapes):
318
+ def _pinpoint_lead(sumstats,ax1,region_ref, region_ref_total_n, lead_color, marker_size, log, verbose, region_marker_shapes):
304
319
 
305
320
  if region_ref is None:
306
321
  log.write(" -Extracting lead variant..." , verbose=verbose)
@@ -308,12 +323,20 @@ def _pinpoint_lead(sumstats,ax1,region_ref, lead_color, marker_size, log, verbos
308
323
  else:
309
324
  lead_id = _get_lead_id(sumstats, region_ref, log, verbose)
310
325
 
326
+ if lead_id is not None:
327
+ if region_ref_total_n <2:
328
+ # single-ref mode
329
+ marker_shape = region_marker_shapes[sumstats.loc[lead_id,"SHAPE"]]
330
+ else:
331
+ # multi-ref mode
332
+ marker_shape = region_marker_shapes[sumstats.loc[lead_id,"SHAPE"]-1]
333
+
311
334
  if lead_id is not None:
312
335
  ax1.scatter(sumstats.loc[lead_id,"i"],sumstats.loc[lead_id,"scaled_P"],
313
336
  color=lead_color,
314
337
  zorder=3,
315
- marker= region_marker_shapes[sumstats.loc[lead_id,"SHAPE"]-1],
316
- s=marker_size[1]+2,
338
+ marker= marker_shape,
339
+ s=marker_size[1]*1.5,
317
340
  edgecolor="black")
318
341
 
319
342
  return ax1, lead_id
@@ -322,14 +345,15 @@ def _add_region_title(region_title, ax1,region_title_args):
322
345
  ax1.text(0.015,0.97, region_title, transform=ax1.transAxes, va="top", ha="left", region_ref=None, **region_title_args )
323
346
  return ax1
324
347
 
325
- def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_index_dic,palette =None, position=1):
348
+ def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_index_dic,region_marker_shapes,fig, region_legend_marker=True,palette =None, position=1):
326
349
 
327
350
  width_pct = "11%"
328
351
  height_pct = "{}%".format( 14 + 7 * len(region_ref))
329
352
  axins1 = inset_axes(ax1,
330
353
  width=width_pct, # width = 50% of parent_bbox width
331
354
  height=height_pct, # height : 5%
332
- loc='upper right',axes_kwargs={"frameon":True,"facecolor":"white","zorder":999999})
355
+ loc='upper right',
356
+ axes_kwargs={"frameon":True,"facecolor":"white","zorder":999999})
333
357
 
334
358
  ld_ticks = [0]+region_ld_threshold+[1]
335
359
 
@@ -345,7 +369,7 @@ def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_ind
345
369
  a = Rectangle((x,y),width, height, fill = True, color = hex_color , linewidth = 2)
346
370
  #patches.append(a)
347
371
  axins1.add_patch(a)
348
-
372
+
349
373
  # y snpid
350
374
  yticks_position = 0.1 + 0.2 *np.arange(0,len(region_ref))
351
375
  axins1.set_yticks(yticks_position, ["{}".format(x) for x in region_ref])
@@ -354,9 +378,35 @@ def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_ind
354
378
  # x ld thresholds
355
379
  axins1.set_xticks(ticks=ld_ticks)
356
380
  axins1.set_xticklabels([str(i) for i in ld_ticks])
357
- axins1.set_xlim(0,1)
358
381
 
382
+ xmin, xmax = 0, 1
383
+ axins1.set_xlim(xmin,xmax)
384
+
385
+ ############### ##############plot marker ############## ##############
386
+ if region_legend_marker==True:
387
+ for group_index, ref in enumerate(region_ref):
388
+ x= -0.1
389
+ y= 0.1 + 0.2 * group_index
390
+
391
+ if len(region_ref) <2:
392
+ # single-ref mode
393
+ marker = region_marker_shapes[group_index+1]
394
+ c = palette[(region_ref_index_dic[region_ref[group_index]]+1)*100 + len(ld_ticks)]
395
+ else:
396
+ # multi-ref mode
397
+ marker = region_marker_shapes[group_index]
398
+ c = palette[(region_ref_index_dic[region_ref[group_index]]+1)*100 + len(ld_ticks)-1]
399
+
400
+ # ([x0,y0][x1,y1])
401
+ data_to_point =(axins1.bbox.get_points()[1][0]-axins1.bbox.get_points()[0][0]) / (xmax - xmin)
402
+ s = (data_to_point * 0.15*0.11/(fig.dpi/72))**2
403
+
404
+ axins1.scatter(x, y, s=s, marker=marker,c=c, edgecolors="black", linewidths = 1, clip_on=False, zorder=100)
405
+ axins1.tick_params(axis="y", pad=data_to_point* 0.11* 0.19/(fig.dpi/72))
406
+
407
+ axins1.set_xlim(0,1)
359
408
  axins1.set_aspect('equal', adjustable='box')
409
+ #axins1.tick_params(axis="y", pad=data_to_point* 0.11* 0.19/(fig.dpi/72))
360
410
  axins1.set_title('LD $r^{2}$ with variant',loc="center",y=-0.2)
361
411
  cbar = axins1
362
412
  return ax1, cbar
@@ -434,6 +484,7 @@ def _plot_gene_track(
434
484
  font_size_in_pixels= taf[2] * pixels_per_track
435
485
  font_size_in_points = font_size_in_pixels * pixels_per_point
436
486
  linewidth_in_points= pixels_per_track * pixels_per_point
487
+
437
488
  log.write(" -plotting gene track..", verbose=verbose)
438
489
 
439
490
  sig_gene_name = "Undefined"
@@ -446,6 +497,7 @@ def _plot_gene_track(
446
497
  sig_gene_names=[]
447
498
  sig_gene_lefts=[]
448
499
  sig_gene_rights=[]
500
+ log.write(" -plotting genes: {}..".format(len(uniq_gene_region)), verbose=verbose)
449
501
  for index,row in uniq_gene_region.iterrows():
450
502
 
451
503
  gene_color="#020080"
@@ -464,9 +516,9 @@ def _plot_gene_track(
464
516
  sig_gene_lefts.append(gene_track_start_i+row["start"])
465
517
  sig_gene_rights.append(gene_track_start_i+row["end"])
466
518
 
467
- # plot gene line
468
- ax3.plot((gene_track_start_i+row["start"],gene_track_start_i+row["end"]),
469
- (row["stack"]*2,row["stack"]*2),color=gene_color,linewidth=linewidth_in_points/10)
519
+ # plot gene line
520
+ ax3.plot((gene_track_start_i+row["start"],gene_track_start_i+row["end"]),
521
+ (row["stack"]*2,row["stack"]*2),color=gene_color,linewidth=linewidth_in_points/10,solid_capstyle="butt")
470
522
 
471
523
  # plot gene name
472
524
  if row["end"] >= region[2]:
@@ -483,6 +535,7 @@ def _plot_gene_track(
483
535
  y=row["stack"]*2+taf[4],s=gene_anno,ha="center",va="center",color="black",style='italic',size=font_size_in_points,family=track_font_family))
484
536
 
485
537
  # plot exons
538
+ log.write(" -plotting exons: {}..".format(len(exons)), verbose=verbose)
486
539
  for index,row in exons.iterrows():
487
540
  exon_color="#020080"
488
541
  for sig_gene_name, sig_gene_left, sig_gene_right in zip(sig_gene_names,sig_gene_lefts,sig_gene_rights):
@@ -664,6 +717,10 @@ def process_vcf(sumstats,
664
717
  sumstats[final_shape_col] = 1
665
718
  sumstats[final_rsq_col] = 0.0
666
719
 
720
+ if len(region_ref)==1:
721
+ if lead_id is not None:
722
+ sumstats.loc[lead_id, final_shape_col] +=1
723
+
667
724
  for i in range(len(region_ref)):
668
725
  ld_single = "LD_{}".format(i)
669
726
  current_rsq = "RSQ_{}".format(i)
@@ -672,7 +729,6 @@ def process_vcf(sumstats,
672
729
  sumstats.loc[a_ngt_b, final_ld_col] = 100 * (i+1) + sumstats.loc[a_ngt_b, ld_single]
673
730
  sumstats.loc[a_ngt_b, final_rsq_col] = sumstats.loc[a_ngt_b, current_rsq]
674
731
  sumstats.loc[a_ngt_b, final_shape_col] = i + 1
675
-
676
732
  ####################################################################################################
677
733
  log.write("Finished loading reference genotype successfully!", verbose=verbose)
678
734
  return sumstats
@@ -0,0 +1,229 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import scipy.stats as ss
5
+ import seaborn as sns
6
+ import gc
7
+ import math
8
+ import scipy.stats as ss
9
+ from matplotlib.patches import Rectangle
10
+ from adjustText import adjust_text
11
+ from gwaslab.viz_aux_save_figure import save_figure
12
+ from gwaslab.util_in_get_sig import getsig
13
+ from gwaslab.util_in_get_sig import annogene
14
+ from gwaslab.g_Log import Log
15
+ from gwaslab.util_in_correct_winnerscurse import wc_correct
16
+ from gwaslab.util_in_correct_winnerscurse import wc_correct_test
17
+ from gwaslab.g_Sumstats import Sumstats
18
+ from gwaslab.io_process_args import _merge_and_sync_dic
19
+ from gwaslab.io_process_args import _extract_kwargs
20
+
21
+ def scatter(df,
22
+ x,
23
+ y,
24
+ mode="0",
25
+ reg_box=None,
26
+ is_reg=True,
27
+ fdr=False,
28
+ allele_match=False,
29
+ r_se=False,
30
+ is_45_helper_line=False,
31
+ plt_args=None,
32
+ xylabel_prefix="Per-allele effect size in ",
33
+ helper_line_args=None,
34
+ font_args=None,
35
+ fontargs=None,
36
+ build="19",
37
+ r_or_r2="r",
38
+ err_kwargs=None,
39
+ legend_args=None,
40
+ log = Log(),
41
+ save=False,
42
+ reg_xmin=None,
43
+ verbose=True,
44
+ save_args=None,
45
+ scatter_kwargs=None,
46
+ font_kwargs=None,
47
+ plt_kwargs=None,
48
+ null_beta=0,
49
+ engine="plt",
50
+ **kwargs):
51
+
52
+ if save_args is None:
53
+ save_args = {"dpi":300,"facecolor":"white"}
54
+ if reg_box is None:
55
+ reg_box = dict(boxstyle='round', facecolor='white', alpha=1,edgecolor="None")
56
+ if err_kwargs is None:
57
+ err_kwargs={"ecolor":"#cccccc","elinewidth":1}
58
+ if font_kwargs is None:
59
+ font_kwargs={'fontsize':12,'family':'sans','fontname':'Arial'}
60
+ if helper_line_args is None:
61
+ helper_line_args={"color":'black', "linestyle":'-',"lw":1}
62
+ if plt_kwargs is None:
63
+ plt_kwargs={"figsize":(8,8),"dpi":300}
64
+ if scatter_kwargs is None:
65
+ scatter_kwargs={"s":20}
66
+ if reg_xmin is None:
67
+ reg_xmin = df[x].min()
68
+
69
+ save_kwargs = _extract_kwargs("save", save_args, locals())
70
+ err_kwargs = _extract_kwargs("err", err_kwargs, locals())
71
+ plt_kwargs = _extract_kwargs("plt", plt_kwargs, locals())
72
+ scatter_kwargs = _extract_kwargs("scatter", scatter_kwargs, locals())
73
+ font_kwargs = _extract_kwargs("font",font_kwargs, locals())
74
+
75
+ log.write("Start to create scatter plot...", verbose=verbose)
76
+ fig,ax = plt.subplots(**plt_kwargs)
77
+
78
+ # plot x=0,y=0, and a 45 degree line
79
+ xl,xh=ax.get_xlim()
80
+ yl,yh=ax.get_ylim()
81
+
82
+ #ax.axhline(y=0, zorder=1,**helper_line_args)
83
+ #ax.axvline(x=0, zorder=1,**helper_line_args)
84
+
85
+ #for spine in ['top', 'right']:
86
+ # ax.spines[spine].set_visible(False)
87
+
88
+ log.write(" -Creating scatter plot : {} - {}...".format(x, y), verbose=verbose)
89
+ if engine=="plt":
90
+ ax.scatter(df[x],df[y],**scatter_kwargs)
91
+ elif engine=="sns":
92
+ sns.scatterplot(data=df,x=x,y=y,ax=ax,**scatter_kwargs)
93
+ ###regression line##############################################################################################################################
94
+ ax, reg = confire_regression_line(x, y,
95
+ is_reg,
96
+ reg_box,
97
+ df,
98
+ ax,
99
+ mode,
100
+ xl,
101
+ yl,
102
+ xh,
103
+ yh,
104
+ null_beta,
105
+ r_se,
106
+ is_45_helper_line,
107
+ helper_line_args,
108
+ font_kwargs,
109
+ log,
110
+ verbose, reg_xmin)
111
+
112
+ save_figure(fig = fig, save = save, keyword="scatter", save_args=save_args, log = log, verbose=verbose)
113
+
114
+ return fig, ax, reg
115
+
116
+
117
+ def confire_regression_line(x, y, is_reg, reg_box, df, ax, mode,xl,yl,xh,yh, null_beta, r_se,
118
+ is_45_helper_line,helper_line_args, font_kwargs,
119
+ log, verbose, reg_xmin):
120
+ # if N <3
121
+ if len(df)<3:
122
+ is_reg=False
123
+
124
+ if is_reg is True:
125
+ # reg
126
+ # slope, intercept, r, p, slope_se, intercept_se
127
+ if mode=="0":
128
+ reg = ss.linregress(df[x],df[y])
129
+ # estimate se for r
130
+ if r_se==True:
131
+ log.write(" -Estimating SE for rsq using Jackknife method.", verbose=verbose)
132
+ r_se_jackknife = jackknife_r(df,x,y,log,verbose)
133
+ r_se_jackknife_string = " ({:.2f})".format(r_se_jackknife)
134
+ else:
135
+ r_se_jackknife_string= ""
136
+ else:
137
+ reg = ss.linregress(df[x],df[y])
138
+ r_se_jackknife_string= ""
139
+
140
+ #### calculate p values based on selected value , default = 0
141
+ create_reg_log(reg, log, verbose)
142
+
143
+ reg_string = create_reg_string(reg,
144
+ r_se_jackknife_string)
145
+
146
+ ax.text(0.99,0.01, reg_string, va="bottom",ha="right",transform=ax.transAxes,bbox=reg_box,**font_kwargs)
147
+
148
+ ax = create_helper_line(ax, reg[0], is_45_helper_line, helper_line_args, reg_xmin=reg_xmin)
149
+ ax = create_reg_line(ax, reg, reg_xmin=reg_xmin)
150
+
151
+ return ax, reg
152
+
153
+ #############################################################################################################################################################################
154
+ def create_reg_log(reg,log, verbose):
155
+ #t_score = (reg[0]-null_beta) / reg[4]
156
+ #degree = len(df.dropna())-2
157
+ p = reg[3]
158
+ #ss.t.sf(abs(t_score), df=degree)*2
159
+ log.write(" -Beta = ", reg[0], verbose=verbose)
160
+ log.write(" -Beta_se = ", reg[4], verbose=verbose)
161
+ log.write(" -H0 beta = 0",", default p = ", "{:.2e}".format(reg[3]), verbose=verbose)
162
+ log.write(" -Peason correlation coefficient = ", "{:.2f}".format(reg[2]), verbose=verbose)
163
+ log.write(" -r2 = ", "{:.2f}".format(reg[2]**2), verbose=verbose)
164
+
165
+ def create_helper_line(ax,
166
+ slope,
167
+ is_45_helper_line,
168
+ helper_line_args,
169
+ reg_xmin=0):
170
+
171
+ if is_45_helper_line is True:
172
+ xl,xh=ax.get_xlim()
173
+ yl,yh=ax.get_ylim()
174
+ if slope >0:
175
+ ax.axline([min(xl,yl),min(xl,yl)], [max(xh, yh),max(xh, yh)],zorder=1,**helper_line_args)
176
+ else:
177
+ ax.axline([min(xl,yl),-min(xl,yl)], [max(xh, yh),-max(xh, yh)],zorder=1,**helper_line_args)
178
+
179
+ return ax
180
+
181
+ def create_reg_line(ax, reg, reg_xmin=0):
182
+ xy1 = (reg_xmin,reg[0]*reg_xmin+reg[1])
183
+ ax.axline(xy1=xy1,slope=reg[0],color="#cccccc",linestyle='--',zorder=1)
184
+ return ax
185
+
186
+ def create_reg_string(reg,
187
+ r_se_jackknife_string):
188
+ p = reg[2]
189
+ try:
190
+ p12=str("{:.2e}".format(p)).split("e")[0]
191
+ pe =str(int("{:.2e}".format(p).split("e")[1]))
192
+ except:
193
+ p12="0"
194
+ pe="0"
195
+
196
+ p_text="$p = " + p12 + " \\times 10^{"+pe+"}$"
197
+ p_latex= f'{p_text}'
198
+
199
+ reg_string = "$y =$ "+"{:.2f}".format(reg[1]) +" $+$ "+ "{:.2f}".format(reg[0])+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string
200
+
201
+ return reg_string
202
+
203
+ def jackknife_r(df,x,y,log,verbose):
204
+ """Jackknife estimation of se for rsq
205
+ """
206
+
207
+ # dropna
208
+ df_nona = df.loc[:,[x,y]].dropna()
209
+ # non-empty entries
210
+ n=len(df)
211
+ # assign row number
212
+ df_nona["_NROW"] = range(n)
213
+ # a list to store r2
214
+ r_list=[]
215
+ # estimate r
216
+ for i in range(n):
217
+ # exclude 1 record
218
+ records_to_use = df_nona["_NROW"]!=i
219
+ # estimate r
220
+ reg_jackknife = ss.linregress(df_nona.loc[records_to_use, x],df_nona.loc[records_to_use,y])
221
+ # add r_i to list
222
+ r_list.append(reg_jackknife[2])
223
+
224
+ # convert list to array
225
+ rs = np.array(r_list)
226
+ # https://en.wikipedia.org/wiki/Jackknife_resampling
227
+ r_se = np.sqrt( (n-1)/n * np.sum((rs - np.mean(rs))**2) )
228
+ log.write(" -R se (jackknife) = {:.2e}".format(r_se), verbose=verbose)
229
+ return r_se
@@ -80,6 +80,8 @@ def plot_stacked_mqq(objects,
80
80
 
81
81
  if fig_args is None:
82
82
  fig_args = {"dpi":200}
83
+ if save_args is None:
84
+ save_args = {"dpi":400,"facecolor":"white"}
83
85
  if region_lead_grid_line is None:
84
86
  region_lead_grid_line = {"alpha":0.5,"linewidth" : 2,"linestyle":"--","color":"#FF0000"}
85
87
  if region_chromatin_files is None:
@@ -94,6 +96,19 @@ def plot_stacked_mqq(objects,
94
96
  else:
95
97
  if "family" not in title_args.keys():
96
98
  title_args["family"] = "Arial"
99
+
100
+ if save is not None:
101
+ if type(save) is not bool:
102
+ if len(save)>3:
103
+ if save[-3:]=="pdf" or save[-3:]=="svg":
104
+ log.write(" -Adjusting options for saving as pdf/svg...",verbose=verbose)
105
+ fig_args["dpi"]=72
106
+ if "scatter_args" not in mqq_args.keys():
107
+ mqq_args["scatter_args"]={"rasterized":True}
108
+ else:
109
+ mqq_args["scatter_args"]["rasterized"] = True
110
+ else:
111
+ fig_args["dpi"] = save_args["dpi"]
97
112
  # create figure and axes ##################################################################################################################
98
113
  #
99
114
  # subplot_height : subplot height
@@ -141,6 +156,8 @@ def plot_stacked_mqq(objects,
141
156
  region_lead_grids = [i for i in range(len(axes))]
142
157
  ##########################################################################################################################################
143
158
  mqq_args_for_each_plot = _sort_args(mqq_args, n_plot)
159
+
160
+
144
161
  ##########################################################################################################################################
145
162
  # get x axis dict
146
163
  if mode=="m" or mode=="r":
@@ -238,7 +255,6 @@ def plot_stacked_mqq(objects,
238
255
  # adjust labels
239
256
  # drop labels for each plot
240
257
  # set a common laebl for all plots
241
-
242
258
  #if title_box is None:
243
259
  # title_box = dict(boxstyle='square', facecolor='white', alpha=1.0, edgecolor="black")
244
260
  # title_box = {}
@@ -270,8 +286,9 @@ def plot_stacked_mqq(objects,
270
286
  #else:
271
287
  if title_pos is None:
272
288
  title_pos = [0.01,0.97]
273
- for index,title in enumerate(titles):
274
- axes[index].text(title_pos[0], title_pos[1] , title, transform=axes[index].transAxes,ha="left", va='top',zorder=999999, **title_args)
289
+ if titles is not None:
290
+ for index,title in enumerate(titles):
291
+ axes[index].text(title_pos[0], title_pos[1] , title, transform=axes[index].transAxes,ha="left", va='top',zorder=999999, **title_args)
275
292
 
276
293
  ##########################################################################################################################################
277
294
  # draw the line for lead variants
@@ -281,8 +281,10 @@ def plottrumpet(mysumstats,
281
281
  sumstats["ABS_BETA"] = sumstats[beta].abs()
282
282
 
283
283
  ##################################################################################################
284
- size_norm = (sumstats["ABS_BETA"].min(), sumstats["ABS_BETA"].max())
284
+ size_norm = (sumstats[size].min(), sumstats[size].max())
285
285
  ## if highlight ##################################################################################################
286
+
287
+ log.write(" -Creating scatter plot...", verbose=verbose)
286
288
  dots = sns.scatterplot(data=sumstats,
287
289
  x=maf,
288
290
  y=beta,
@@ -295,7 +297,7 @@ def plottrumpet(mysumstats,
295
297
  alpha=0.8,
296
298
  zorder=2,
297
299
  **scatter_args)
298
-
300
+ log.write(" -Finished screating scatter plot...", verbose=verbose)
299
301
  if len(highlight) >0:
300
302
 
301
303
  legend = None
@@ -380,15 +382,13 @@ def plottrumpet(mysumstats,
380
382
  ####################################################################################################################
381
383
 
382
384
  #second_legend = ax.legend(title="Power", loc="upper right",fontsize =fontsize,title_fontsize=fontsize)
383
-
385
+ log.write(" -Creating legends...")
384
386
  h,l = ax.get_legend_handles_labels()
385
387
  if len(ts)>0:
386
388
  l1 = ax.legend(h[:int(len(ts))],l[:int(len(ts))], title="Power", loc="upper right",fontsize =fontsize,title_fontsize=fontsize)
387
389
  for line in l1.get_lines():
388
390
  line.set_linewidth(5.0)
389
391
  if hue is None:
390
- l2 = ax.legend(h[int(len(ts)):],l[int(len(ts)):], title=size, loc="lower right",fontsize =fontsize,title_fontsize=fontsize)
391
- else:
392
392
  l2 = ax.legend(h[int(len(ts)):],l[int(len(ts)):], title=None, loc="lower right",fontsize =fontsize,title_fontsize=fontsize)
393
393
  if len(ts)>0:
394
394
  ax.add_artist(l1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gwaslab
3
- Version: 3.5.1
3
+ Version: 3.5.3
4
4
  Summary: A collection of handy tools for GWAS SumStats
5
5
  Author-email: Yunye <yunye@gwaslab.com>
6
6
  Project-URL: Homepage, https://cloufield.github.io/gwaslab/
@@ -1,4 +1,4 @@
1
- gwaslab/__init__.py,sha256=7TKJaODdpeuQKibL7gIEa4MtyQ0pmrU-vIHQ-Et27lQ,2433
1
+ gwaslab/__init__.py,sha256=pP_OQwkaXMJokVVU_o6AXnJEBs2HtaMtpcHIls3ezO8,2486
2
2
  gwaslab/bd_common_data.py,sha256=2voBqMrIsII1TN5T6uvyDax90fWcJK1Stmo1ZHNGGsE,13898
3
3
  gwaslab/bd_config.py,sha256=TP-r-DPhJD3XnRYZbw9bQHXaDIkiRgK8bG9HCt-UaLc,580
4
4
  gwaslab/bd_download.py,sha256=cDDk2C5IvjeAzvPvVYGTkI4Ss33DUtEDjGo8eAbQRvY,15663
@@ -6,21 +6,21 @@ gwaslab/bd_get_hapmap3.py,sha256=FQpwbhWUPFT152QtiLevEkkN4YcVDIeKzoK0Uz1NlRo,410
6
6
  gwaslab/cache_manager.py,sha256=HOTnSkCOyGEPLRl90WT8D_6pAdI8d8AzenMIDGuCeWc,28113
7
7
  gwaslab/g_Log.py,sha256=C3Zv-_6c3C9ms8bgQ-ytplz22sjk7euqXYkWr9zNeAs,1573
8
8
  gwaslab/g_Phenotypes.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- gwaslab/g_Sumstats.py,sha256=c_qYY2H-nf-JtGepzafoHuYwnWxmOOBf9CDytPZc60Q,36704
9
+ gwaslab/g_Sumstats.py,sha256=eqEpHEH5fnBMsOIufVzwaRp0_vCsuHvGEUe5OzNL41s,36969
10
10
  gwaslab/g_SumstatsPair.py,sha256=20snPb4SlI6ftMGVjgxAuyxsxYRQF-GzzlBSnoB-3Lo,8851
11
11
  gwaslab/g_SumstatsT.py,sha256=u_DighLMnMxwTLnqm-B58pA0G6WXRj6pudPyKMVKjSU,2133
12
12
  gwaslab/g_Sumstats_summary.py,sha256=FECvvFXJVKaCX5dggBvvk9YvJ6AbdbcLfjltysX7wEE,6380
13
13
  gwaslab/g_meta.py,sha256=htWlgURWclm9R6UqFcX1a93WN27xny7lGUeyJZOtszQ,2583
14
14
  gwaslab/g_vchange_status.py,sha256=w3zsYYOcCaI3PTeboonvkQjudzUAfVIgATzRdiPViZs,1939
15
- gwaslab/g_version.py,sha256=Wpfo8Y_fjYS4ajalombaHrLezBO7BOr070GnjQHhOGw,1885
15
+ gwaslab/g_version.py,sha256=wfkMhPi1U1fd25HKu8F5F4j1YcnYQslOrERu0bBTD38,1885
16
16
  gwaslab/hm_casting.py,sha256=FqP4EQl83Q2OKLw004OgLIvUH795TVCGwziLk5jsHqY,11368
17
17
  gwaslab/hm_harmonize_sumstats.py,sha256=_sZ8soikAxDokw-dcr_CLguBB8OmTmPPS04MfmsJc_Q,79509
18
18
  gwaslab/hm_rsid_to_chrpos.py,sha256=ODWREO0jPN0RAfNzL5fRzSRANfhiksOvUVPuEsFZQqA,6552
19
19
  gwaslab/io_preformat_input.py,sha256=J8Ny4OPMaLVdo2nP8lTM-c5A8LSdqphSrp9G4i9JjDQ,24097
20
- gwaslab/io_process_args.py,sha256=bF7oHBtMnxJgksIit0O0_U94dZFh8r5YblgDqEEsqoM,806
20
+ gwaslab/io_process_args.py,sha256=0ljJOVGsD7qPuBLvdfvR7Vrh7zXPlvfPg-rhOw8xRpQ,1366
21
21
  gwaslab/io_read_ldsc.py,sha256=wsYXpH50IchBKd2dhYloSqc4YgnDkiwMsAweaCoN5Eo,12471
22
22
  gwaslab/io_read_tabular.py,sha256=EG-C6KhCutt4J4LlOMgXnqzJvU-EZXzVhMvaDFnHrMM,2380
23
- gwaslab/io_to_formats.py,sha256=8FmbQjWUIsz_V1Lb80TuwRIXKBgs5t42j25Znougk1Y,29401
23
+ gwaslab/io_to_formats.py,sha256=hiIaR-JKOVehv7Y14_SklvzPi_E4U-wUybjKWfPyIus,32587
24
24
  gwaslab/io_to_pickle.py,sha256=HhePU0VcaGni0HTNU0BqoRaOnrr0NOxotgY6ISdx3Ck,1833
25
25
  gwaslab/ldsc_irwls.py,sha256=83JbAMAhD0KOfpv4IJa6LgUDfQjp4XSJveTjnhCBJYQ,6142
26
26
  gwaslab/ldsc_jackknife.py,sha256=XrWHoKS_Xn9StG1I83S2vUMTertsb-GH-_gOFYUhLeU,17715
@@ -55,25 +55,26 @@ gwaslab/util_in_get_sig.py,sha256=53NOh7KueLY3vJPTNhhb37KPAIgLEfcP3k2zIV61lc4,39
55
55
  gwaslab/util_in_meta.py,sha256=5K9lIZcIgUy0AERqHy1GvMN2X6dp45JUUgopuDLgt4o,11284
56
56
  gwaslab/util_in_snphwe.py,sha256=-KpIDx6vn_nah6H55IkV2OyjXQVXV13XyBL069WE1wM,1751
57
57
  gwaslab/vis_plot_credible sets.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
- gwaslab/viz_aux_annotate_plot.py,sha256=gA-s8a90dsl3MB5CIapdI_DecD9h2FmuqMgy07kMYJI,25207
59
- gwaslab/viz_aux_chromatin.py,sha256=7cGmej5EkKO7fxR1b5w8r1oRRl9ofVzFRG52SCYWtz0,4109
58
+ gwaslab/viz_aux_annotate_plot.py,sha256=3PyBioC-3ZBzqKT2JRv6vp-hZUXSqTTePIT5cblEvnQ,25334
59
+ gwaslab/viz_aux_chromatin.py,sha256=aWZaXOSvGyZY7wQcoFDaqHRYCSHZbi_K4Q70HruN9ts,4125
60
60
  gwaslab/viz_aux_property.py,sha256=UIaivghnLXYpTwkKnXRK0F28Jbn9L6OaICk3K73WZaU,33
61
- gwaslab/viz_aux_quickfix.py,sha256=HnhVvY0GP0EN0gLJ-B11OYiE5PWDcdKGUpQ3QZeu0lE,18592
61
+ gwaslab/viz_aux_quickfix.py,sha256=cGX5i3WBmvKIiqck8V00caDg-pvKOO709Ux3DBXsUrM,18693
62
62
  gwaslab/viz_aux_reposition_text.py,sha256=iRIP-Rkltlei068HekJcVubiqPrunBqvAoSQ1eHk04M,4304
63
- gwaslab/viz_aux_save_figure.py,sha256=GdUXNBOelsWqtTXm8pEZzeGGwDxHYnBkyrGwLOK16ew,2723
63
+ gwaslab/viz_aux_save_figure.py,sha256=x_b4DlTSmHJddfQgoYoReCi4QQbQEtcwCWTKfGetfTA,2768
64
64
  gwaslab/viz_plot_compare_af.py,sha256=qtXW45-Sq_ugK8ZfqBYMpmf58SKi3lB3YyHnzn_akcE,5344
65
65
  gwaslab/viz_plot_compare_effect.py,sha256=kq-rVWygHEeTBMOtd_jk8nK85ClZHU-ADSf4nI2gTKo,66604
66
66
  gwaslab/viz_plot_forestplot.py,sha256=xgOnefh737CgdQxu5naVyRNBX1NQXPFKzf51fbh6afs,6771
67
67
  gwaslab/viz_plot_miamiplot.py,sha256=rCFEp7VNuVqeBBG3WRkmFAtFklbF79BvIQQYiSY70VY,31238
68
68
  gwaslab/viz_plot_miamiplot2.py,sha256=xiFCgFX8hEySmCJORpEurMVER9eEXQyy_Ik7mLkbi9g,16015
69
- gwaslab/viz_plot_mqqplot.py,sha256=emyEXZZenzm8eh3XFCkTWI8sz0fEnL5QJxohOZMxWZc,67189
69
+ gwaslab/viz_plot_mqqplot.py,sha256=Pea0uNWe4ZW3S8z8BGQ_dIEWmnKWoxAgEMVxGsMJYxk,68560
70
70
  gwaslab/viz_plot_phe_heatmap.py,sha256=qoXVeFTIm-n8IinNbDdPFVBSz2yGCGK6QzTstXv6aj4,9532
71
71
  gwaslab/viz_plot_qqplot.py,sha256=psQgVpP29686CEZkzQz0iRbApzqy7aE3GGiBcazVvNw,7247
72
- gwaslab/viz_plot_regional2.py,sha256=tBoGox-4ngL5o_twdIjk_VW6Iam3JDyrPKuttm6_4Sg,36862
72
+ gwaslab/viz_plot_regional2.py,sha256=rvvIU60pOJFiDReQFd_Q2MkthuwvNOUkptV1swP9uJM,39444
73
73
  gwaslab/viz_plot_regionalplot.py,sha256=8u-5-yfy-UaXhaxVVz3Y5k2kBAoqzczUw1hyyD450iI,37983
74
74
  gwaslab/viz_plot_rg_heatmap.py,sha256=PidUsgOiEVt6MfBPCF3_yDhOEytZ-I1q-ZD6_0pFrV4,13713
75
- gwaslab/viz_plot_stackedregional.py,sha256=HfNUhwxevbwSoauE0ysG020U7YFVy4111nkIWdaJ4Q8,16664
76
- gwaslab/viz_plot_trumpetplot.py,sha256=ZHdc6WcVx0-oKoj88yglRkmB4bS9pOiEMcuwKW35Yvo,42672
75
+ gwaslab/viz_plot_scatter_with_reg.py,sha256=PmUZDQl2q4Dme3HLPXEwf_TrMjwJADA-uFXNDBWUEa4,8333
76
+ gwaslab/viz_plot_stackedregional.py,sha256=UefPxnd-EJApFdwcafE2k6jZqrfGKEjy51NhieiRaPM,17362
77
+ gwaslab/viz_plot_trumpetplot.py,sha256=y4sAFjzMaSLuWrdr9_ao-wPYCK5DlP2ykiqulWsoN_k,42680
77
78
  gwaslab/data/formatbook.json,sha256=N2nJs80HH98Rsu9FxaSvIQO9J5yIV97WEtAKjRqYwiY,38207
78
79
  gwaslab/data/reference.json,sha256=IrjwFnXjrpVUp3zYfcYClpibJE9Y-94gtrC1Aw8sXxg,12332
79
80
  gwaslab/data/chrx_par/chrx_par_hg19.bed.gz,sha256=LocZg_ozhZjQiIpgWCO4EYCW9xgkEKpRy1m-YdIpzQs,83
@@ -82,9 +83,9 @@ gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz,sha256=qD9RsC5S2h6l-OdpW
82
83
  gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz,sha256=Y8ZT2FIAhbhlgCJdE9qQVAiwnV_fcsPt72usBa7RSBM,10225828
83
84
  gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz,sha256=R7IkssKu0L4WwkU9SrS84xCMdrkkKL0gnTNO_OKbG0Y,219
84
85
  gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz,sha256=76CIU0pibDJ72Y6UY-TbIKE9gEPwTELAaIbCXyjm80Q,470
85
- gwaslab-3.5.1.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
86
- gwaslab-3.5.1.dist-info/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
87
- gwaslab-3.5.1.dist-info/METADATA,sha256=Dqj65vurvDR3JCwlyCVnnfUS64cmN1vMJJjUTCm3xLI,7758
88
- gwaslab-3.5.1.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
89
- gwaslab-3.5.1.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
90
- gwaslab-3.5.1.dist-info/RECORD,,
86
+ gwaslab-3.5.3.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
87
+ gwaslab-3.5.3.dist-info/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
88
+ gwaslab-3.5.3.dist-info/METADATA,sha256=lMetRGVrgpM2KaxU-y17WKZhZkOd7aafJY1pta3oKJA,7758
89
+ gwaslab-3.5.3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
90
+ gwaslab-3.5.3.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
91
+ gwaslab-3.5.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.5.0)
2
+ Generator: setuptools (75.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5