gwaslab 3.5.1__py3-none-any.whl → 3.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

gwaslab/__init__.py CHANGED
@@ -46,3 +46,4 @@ from gwaslab.util_ex_process_h5 import process_vcf_to_hfd5
46
46
  from gwaslab.util_ex_run_susie import _run_susie_rss as run_susie_rss
47
47
  from gwaslab.io_read_tabular import _read_tabular as read_tabular
48
48
  from gwaslab.util_in_meta import meta_analyze
49
+ from gwaslab.viz_plot_scatter_with_reg import scatter
gwaslab/g_Sumstats.py CHANGED
@@ -389,6 +389,12 @@ class Sumstats():
389
389
  self.data =flipallelestats(self.data,log=self.log,**flipallelestats_args)
390
390
 
391
391
  gc.collect()
392
+
393
+ if (ref_seq is not None or ref_infer is not None) and (ref_rsid_tsv is not None or ref_rsid_vcf is not None):
394
+
395
+ self.data = fixID(self.data, log=self.log, **{"fixid":True, "fixsep":True, "overwrite":True})
396
+
397
+ gc.collect()
392
398
 
393
399
  #####################################################
394
400
  if ref_rsid_tsv is not None:
@@ -833,4 +839,4 @@ class Sumstats():
833
839
  def to_format(self, path, build=None, verbose=True, **kwargs):
834
840
  if build is None:
835
841
  build = self.meta["gwaslab"]["genome_build"]
836
- _to_format(self.data, path, log=self.log, verbose=verbose, meta=self.meta, build=build, **kwargs)
842
+ _to_format(self.data, path, log=self.log, verbose=verbose, meta=self.meta, build=build, **kwargs)
gwaslab/g_version.py CHANGED
@@ -15,8 +15,8 @@ def _get_version():
15
15
  def gwaslab_info():
16
16
  # version meta information
17
17
  dic={
18
- "version":"3.5.1",
19
- "release_date":"20241120"
18
+ "version":"3.5.2",
19
+ "release_date":"20241203"
20
20
  }
21
21
  return dic
22
22
 
@@ -1,15 +1,13 @@
1
1
  import copy
2
- def _merge_and_sync_dic(list_of_dics:list, default:dict) -> dict:
3
- temp = copy.copy(default)
4
- for dic in list_of_dics:
5
- if isinstance(dic, dict):
6
- temp.update(dic)
7
- return temp
8
2
 
9
3
  def _list_func_args(func):
10
4
  return func.__code__.co_varnames
11
5
 
12
6
  def _extract_kwargs(prefix:str, default:dict, kwargs:dict) -> dict:
7
+ # prefix: keyword
8
+ # default: default dict
9
+ # kwargs: all local kwargs + args + kwargs
10
+
13
11
  extracted = []
14
12
  for key,value in kwargs.items():
15
13
  if key=="kwargs" or key=="args":
@@ -20,4 +18,11 @@ def _extract_kwargs(prefix:str, default:dict, kwargs:dict) -> dict:
20
18
  if prefix in key and "arg" in key:
21
19
  extracted.append(value)
22
20
  merged_arg = _merge_and_sync_dic(extracted, default)
23
- return merged_arg
21
+ return merged_arg
22
+
23
+ def _merge_and_sync_dic(list_of_dics:list, default:dict) -> dict:
24
+ temp = copy.copy(default)
25
+ for dic in list_of_dics:
26
+ if isinstance(dic, dict):
27
+ temp.update(dic)
28
+ return temp
@@ -1,6 +1,8 @@
1
1
  import pandas as pd
2
2
  import numpy as np
3
3
  from gwaslab.g_Log import Log
4
+ from matplotlib import ticker
5
+ import matplotlib.pyplot as plt
4
6
  from gwaslab.bd_common_data import get_chr_to_number
5
7
  from gwaslab.bd_common_data import get_number_to_chr
6
8
  from math import ceil
@@ -350,7 +352,7 @@ def _cut(series, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, lines_to_pl
350
352
  #sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"] = (sumstats.loc[sumstats["scaled_P"]>cut,"scaled_P"]-cut)/cutfactor + cut
351
353
 
352
354
  maxy = (maxticker-cut)/cutfactor + cut
353
-
355
+
354
356
  return series, maxy, maxticker, cut, cutfactor,ylabels,lines_to_plot
355
357
 
356
358
  #def _cut_line(level, mode,cutfactor,cut,skip, ylabels, cut_log, verbose, log):
@@ -379,10 +381,9 @@ def _set_yticklabels(cut,
379
381
  log.write(" -Processing Y tick lables...",verbose=verbose)
380
382
  # if no cut
381
383
  if cut == 0:
382
- ax1.set_ylim(skip, ceil(maxy*1.2) )
383
-
384
+ ax1.set_ylim((skip, ceil(maxy*1.2)) )
384
385
  # if cut
385
- if cut:
386
+ if cut!=0:
386
387
  # add cut line
387
388
 
388
389
  cutline = ax1.axhline(y=cut, linewidth = sc_linewidth,linestyle="--",color=cut_line_color,zorder=1)
@@ -432,14 +433,15 @@ def _set_yticklabels(cut,
432
433
  else:
433
434
  ax1.set_yticks(ticks1+ticks2)
434
435
  ax1.set_yticklabels(tickslabel1+tickslabel2,fontsize=fontsize,family=font_family)
435
- ax1.set_ylim(bottom = skip)
436
436
 
437
437
  if ylabels is not None:
438
438
  ax1.set_yticks(ylabels_converted)
439
439
  ax1.set_yticklabels(ylabels,fontsize=fontsize,family=font_family)
440
440
 
441
+ ylim_top = ax1.get_ylim()[1]
442
+ ax1.set_ybound(lower=skip,upper=ylim_top)
441
443
  ax1.tick_params(axis='y', labelsize=fontsize)
442
-
444
+
443
445
  return ax1
444
446
 
445
447
  def _jagged_y(cut,skip,ax1,mode,mqqratio,jagged_len,jagged_wid, log=Log(), verbose=True):
@@ -51,7 +51,8 @@ def get_default_path(keyword,fmt="png"):
51
51
  "miami":"miami",
52
52
  "esc":"effect_size_comparision",
53
53
  "afc":"allele_frequency_comparision",
54
- "gwheatmap":"genome_wide_heatmap"
54
+ "gwheatmap":"genome_wide_heatmap",
55
+ "scatter":"scatter"
55
56
  }
56
57
  prefix = path_dictionary[keyword]
57
58
  count = 1
@@ -264,7 +264,7 @@ def mqqplot(insumstats,
264
264
 
265
265
  if region_marker_shapes is None:
266
266
  # 9 shapes
267
- region_marker_shapes = ['o', 's','^','D','*','P','X','h','8']
267
+ region_marker_shapes = ['o', '^','s','D','*','P','X','h','8']
268
268
  if region_grid_line is None:
269
269
  region_grid_line = {"linewidth": 2,"linestyle":"--"}
270
270
  if region_lead_grid_line is None:
@@ -640,6 +640,14 @@ def mqqplot(insumstats,
640
640
  linewidth=1
641
641
  if len(region_ref) == 1:
642
642
  palette = {100+i:region_ld_colors[i] for i in range(len(region_ld_colors))}
643
+ scatter_args["markers"]= {(i+1):m for i,m in enumerate(region_marker_shapes[:2])}
644
+ if region_ref[0] is None:
645
+ id_to_hide = sumstats["scaled_P"].idxmax()
646
+ sumstats.loc[id_to_hide,"s"] = -100
647
+ else:
648
+ sumstats.loc[sumstats["SNPID"]==region_ref[0],"s"] = -100
649
+ marker_size=(0,marker_size[1])
650
+ style="SHAPE"
643
651
  else:
644
652
  palette = {}
645
653
  region_color_maps = []
@@ -652,12 +660,14 @@ def mqqplot(insumstats,
652
660
  # 1 + 5 + 1
653
661
  region_ld_colors_single = [region_ld_colors[0]] + output_hex_colors + [output_hex_colors[-1]]
654
662
  region_color_maps.append(region_ld_colors_single)
655
- # gradient colors
663
+
664
+ # gradient color dict
656
665
  for i, hex_colors in enumerate(region_color_maps):
657
666
  for j, hex_color in enumerate(hex_colors):
658
667
  palette[(i+1)*100 + j ] = hex_color
659
668
 
660
669
  edgecolor="none"
670
+ # create a marker shape dict
661
671
  scatter_args["markers"]= {(i+1):m for i,m in enumerate(region_marker_shapes[:len(region_ref)])}
662
672
  style="SHAPE"
663
673
 
@@ -1396,14 +1406,19 @@ def _process_xtick(ax1, chrom_df, xtick_chr_dict, fontsize, font_family, log=Log
1396
1406
 
1397
1407
  def _process_ytick(ax1, fontsize, font_family, ax4, log=Log(),verbose=True):
1398
1408
  log.write(" -Processing Y labels...",verbose=verbose)
1399
- ax1_yticklabels = ax1.get_yticklabels()
1409
+ #ax1_yticklabels = ax1.get_yticklabels()
1410
+ #print(ax1_yticklabels)
1411
+ #plt.draw()
1412
+ #ax1_yticks = ax1.get_yticks()
1413
+ #print(ax1_yticks)
1400
1414
  #ax1.set_yticklabels(ax1_yticklabels,fontsize=fontsize,family=font_family)
1401
- ax1_yticks = ax1.get_yticks()
1402
- ax1.set_yticks(ax1_yticks,ax1_yticklabels,fontsize=fontsize,family=font_family)
1415
+ ax1.tick_params(axis='y', labelsize=fontsize,labelfontfamily=font_family)
1416
+ #ax1.set_yticks(ax1_yticks,ax1_yticklabels,fontsize=fontsize,family=font_family)
1403
1417
  if ax4 is not None:
1404
- ax4_yticklabels = ax4.get_yticklabels()
1405
- ax4_yticks = ax4.get_yticks()
1406
- ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
1418
+ #ax4_yticklabels = ax4.get_yticklabels()
1419
+ #ax4_yticks = ax4.get_yticks()
1420
+ ax4.tick_params(axis='y', labelsize=fontsize,labelfontfamily=font_family)
1421
+ #ax4.set_yticks(ax4_yticks,ax4_yticklabels, fontsize=fontsize,family=font_family)
1407
1422
  return ax1, ax4
1408
1423
 
1409
1424
  def _process_xlabel(region, xlabel, ax1, gtf_path, mode, fontsize, font_family, ax3=None , log=Log(),verbose=True):
@@ -93,6 +93,7 @@ def _plot_regional(
93
93
  ax1, lead_id_single = _pinpoint_lead(sumstats = sumstats,
94
94
  ax1 = ax1,
95
95
  region_ref=region_ref_single,
96
+ region_ref_total_n = len(region_ref),
96
97
  lead_color = palette[(index+1)*100 + len(region_ld_threshold)+2],
97
98
  marker_size= marker_size,
98
99
  region_marker_shapes=region_marker_shapes,
@@ -130,7 +131,9 @@ def _plot_regional(
130
131
  region_ref=region_ref,
131
132
  region_ld_threshold=region_ld_threshold,
132
133
  region_ref_index_dic=region_ref_index_dic,
133
- palette=palette)
134
+ region_marker_shapes=region_marker_shapes,
135
+ palette=palette,
136
+ fig=fig)
134
137
  else:
135
138
  cbar=None
136
139
 
@@ -300,7 +303,7 @@ def _get_lead_id(sumstats=None, region_ref=None, log=None, verbose=True):
300
303
 
301
304
  return lead_id
302
305
 
303
- def _pinpoint_lead(sumstats,ax1,region_ref, lead_color, marker_size, log, verbose, region_marker_shapes):
306
+ def _pinpoint_lead(sumstats,ax1,region_ref, region_ref_total_n, lead_color, marker_size, log, verbose, region_marker_shapes):
304
307
 
305
308
  if region_ref is None:
306
309
  log.write(" -Extracting lead variant..." , verbose=verbose)
@@ -308,12 +311,19 @@ def _pinpoint_lead(sumstats,ax1,region_ref, lead_color, marker_size, log, verbos
308
311
  else:
309
312
  lead_id = _get_lead_id(sumstats, region_ref, log, verbose)
310
313
 
314
+ if region_ref_total_n <2:
315
+ # single-ref mode
316
+ marker_shape = region_marker_shapes[sumstats.loc[lead_id,"SHAPE"]]
317
+ else:
318
+ # multi-ref mode
319
+ marker_shape = region_marker_shapes[sumstats.loc[lead_id,"SHAPE"]-1]
320
+
311
321
  if lead_id is not None:
312
322
  ax1.scatter(sumstats.loc[lead_id,"i"],sumstats.loc[lead_id,"scaled_P"],
313
323
  color=lead_color,
314
324
  zorder=3,
315
- marker= region_marker_shapes[sumstats.loc[lead_id,"SHAPE"]-1],
316
- s=marker_size[1]+2,
325
+ marker= marker_shape,
326
+ s=marker_size[1]*1.5,
317
327
  edgecolor="black")
318
328
 
319
329
  return ax1, lead_id
@@ -322,14 +332,15 @@ def _add_region_title(region_title, ax1,region_title_args):
322
332
  ax1.text(0.015,0.97, region_title, transform=ax1.transAxes, va="top", ha="left", region_ref=None, **region_title_args )
323
333
  return ax1
324
334
 
325
- def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_index_dic,palette =None, position=1):
335
+ def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_index_dic,region_marker_shapes,fig, palette =None, position=1):
326
336
 
327
337
  width_pct = "11%"
328
338
  height_pct = "{}%".format( 14 + 7 * len(region_ref))
329
339
  axins1 = inset_axes(ax1,
330
340
  width=width_pct, # width = 50% of parent_bbox width
331
341
  height=height_pct, # height : 5%
332
- loc='upper right',axes_kwargs={"frameon":True,"facecolor":"white","zorder":999999})
342
+ loc='upper right',
343
+ axes_kwargs={"frameon":True,"facecolor":"white","zorder":999999})
333
344
 
334
345
  ld_ticks = [0]+region_ld_threshold+[1]
335
346
 
@@ -345,7 +356,7 @@ def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_ind
345
356
  a = Rectangle((x,y),width, height, fill = True, color = hex_color , linewidth = 2)
346
357
  #patches.append(a)
347
358
  axins1.add_patch(a)
348
-
359
+
349
360
  # y snpid
350
361
  yticks_position = 0.1 + 0.2 *np.arange(0,len(region_ref))
351
362
  axins1.set_yticks(yticks_position, ["{}".format(x) for x in region_ref])
@@ -354,9 +365,31 @@ def _add_ld_legend(sumstats, ax1, region_ld_threshold, region_ref,region_ref_ind
354
365
  # x ld thresholds
355
366
  axins1.set_xticks(ticks=ld_ticks)
356
367
  axins1.set_xticklabels([str(i) for i in ld_ticks])
357
- axins1.set_xlim(0,1)
358
368
 
369
+ xmin, xmax = 0, 1
370
+ axins1.set_xlim(xmin,xmax)
371
+
372
+ ############### ##############plot marker ############## ##############
373
+ for group_index, ref in enumerate(region_ref):
374
+ x= -0.1
375
+ y= 0.1 + 0.2 * group_index
376
+
377
+ if len(region_ref) <2:
378
+ # single-ref mode
379
+ marker = region_marker_shapes[group_index+1]
380
+ else:
381
+ # multi-ref mode
382
+ marker = region_marker_shapes[group_index]
383
+
384
+ # ([x0,y0][x1,y1])
385
+ data_to_point =(axins1.bbox.get_points()[1][0]-axins1.bbox.get_points()[0][0]) / (xmax - xmin)
386
+ s = data_to_point * 0.075
387
+ c = palette[(region_ref_index_dic[region_ref[group_index]]+1)*100 + len(ld_ticks)-1]
388
+ axins1.scatter(x, y, s=s, marker=marker,c=c, edgecolors="black", linewidths = 1, clip_on=False, zorder=100)
389
+
390
+ axins1.set_xlim(0,1)
359
391
  axins1.set_aspect('equal', adjustable='box')
392
+ axins1.tick_params(axis="y", pad=np.sqrt(data_to_point * 0.11))
360
393
  axins1.set_title('LD $r^{2}$ with variant',loc="center",y=-0.2)
361
394
  cbar = axins1
362
395
  return ax1, cbar
@@ -664,6 +697,9 @@ def process_vcf(sumstats,
664
697
  sumstats[final_shape_col] = 1
665
698
  sumstats[final_rsq_col] = 0.0
666
699
 
700
+ if len(region_ref)==1:
701
+ sumstats.loc[lead_id, final_shape_col] +=1
702
+
667
703
  for i in range(len(region_ref)):
668
704
  ld_single = "LD_{}".format(i)
669
705
  current_rsq = "RSQ_{}".format(i)
@@ -672,7 +708,6 @@ def process_vcf(sumstats,
672
708
  sumstats.loc[a_ngt_b, final_ld_col] = 100 * (i+1) + sumstats.loc[a_ngt_b, ld_single]
673
709
  sumstats.loc[a_ngt_b, final_rsq_col] = sumstats.loc[a_ngt_b, current_rsq]
674
710
  sumstats.loc[a_ngt_b, final_shape_col] = i + 1
675
-
676
711
  ####################################################################################################
677
712
  log.write("Finished loading reference genotype successfully!", verbose=verbose)
678
713
  return sumstats
@@ -0,0 +1,229 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import scipy.stats as ss
5
+ import seaborn as sns
6
+ import gc
7
+ import math
8
+ import scipy.stats as ss
9
+ from matplotlib.patches import Rectangle
10
+ from adjustText import adjust_text
11
+ from gwaslab.viz_aux_save_figure import save_figure
12
+ from gwaslab.util_in_get_sig import getsig
13
+ from gwaslab.util_in_get_sig import annogene
14
+ from gwaslab.g_Log import Log
15
+ from gwaslab.util_in_correct_winnerscurse import wc_correct
16
+ from gwaslab.util_in_correct_winnerscurse import wc_correct_test
17
+ from gwaslab.g_Sumstats import Sumstats
18
+ from gwaslab.io_process_args import _merge_and_sync_dic
19
+ from gwaslab.io_process_args import _extract_kwargs
20
+
21
+ def scatter(df,
22
+ x,
23
+ y,
24
+ mode="0",
25
+ reg_box=None,
26
+ is_reg=True,
27
+ fdr=False,
28
+ allele_match=False,
29
+ r_se=False,
30
+ is_45_helper_line=False,
31
+ plt_args=None,
32
+ xylabel_prefix="Per-allele effect size in ",
33
+ helper_line_args=None,
34
+ font_args=None,
35
+ fontargs=None,
36
+ build="19",
37
+ r_or_r2="r",
38
+ err_kwargs=None,
39
+ legend_args=None,
40
+ log = Log(),
41
+ save=False,
42
+ reg_xmin=None,
43
+ verbose=True,
44
+ save_args=None,
45
+ scatter_kwargs=None,
46
+ font_kwargs=None,
47
+ plt_kwargs=None,
48
+ null_beta=0,
49
+ engine="plt",
50
+ **kwargs):
51
+
52
+ if save_args is None:
53
+ save_args = {"dpi":300,"facecolor":"white"}
54
+ if reg_box is None:
55
+ reg_box = dict(boxstyle='round', facecolor='white', alpha=1,edgecolor="None")
56
+ if err_kwargs is None:
57
+ err_kwargs={"ecolor":"#cccccc","elinewidth":1}
58
+ if font_kwargs is None:
59
+ font_kwargs={'fontsize':12,'family':'sans','fontname':'Arial'}
60
+ if helper_line_args is None:
61
+ helper_line_args={"color":'black', "linestyle":'-',"lw":1}
62
+ if plt_kwargs is None:
63
+ plt_kwargs={"figsize":(8,8),"dpi":300}
64
+ if scatter_kwargs is None:
65
+ scatter_kwargs={"s":20}
66
+ if reg_xmin is None:
67
+ reg_xmin = df[x].min()
68
+
69
+ save_kwargs = _extract_kwargs("save", save_args, locals())
70
+ err_kwargs = _extract_kwargs("err", err_kwargs, locals())
71
+ plt_kwargs = _extract_kwargs("plt", plt_kwargs, locals())
72
+ scatter_kwargs = _extract_kwargs("scatter", scatter_kwargs, locals())
73
+ font_kwargs = _extract_kwargs("font",font_kwargs, locals())
74
+
75
+ log.write("Start to create scatter plot...", verbose=verbose)
76
+ fig,ax = plt.subplots(**plt_kwargs)
77
+
78
+ # plot x=0,y=0, and a 45 degree line
79
+ xl,xh=ax.get_xlim()
80
+ yl,yh=ax.get_ylim()
81
+
82
+ #ax.axhline(y=0, zorder=1,**helper_line_args)
83
+ #ax.axvline(x=0, zorder=1,**helper_line_args)
84
+
85
+ #for spine in ['top', 'right']:
86
+ # ax.spines[spine].set_visible(False)
87
+
88
+ log.write(" -Creating scatter plot : {} - {}...".format(x, y), verbose=verbose)
89
+ if engine=="plt":
90
+ ax.scatter(df[x],df[y],**scatter_kwargs)
91
+ elif engine=="sns":
92
+ sns.scatterplot(data=df,x=x,y=y,ax=ax,**scatter_kwargs)
93
+ ###regression line##############################################################################################################################
94
+ ax, reg = confire_regression_line(x, y,
95
+ is_reg,
96
+ reg_box,
97
+ df,
98
+ ax,
99
+ mode,
100
+ xl,
101
+ yl,
102
+ xh,
103
+ yh,
104
+ null_beta,
105
+ r_se,
106
+ is_45_helper_line,
107
+ helper_line_args,
108
+ font_kwargs,
109
+ log,
110
+ verbose, reg_xmin)
111
+
112
+ save_figure(fig = fig, save = save, keyword="scatter", save_args=save_args, log = log, verbose=verbose)
113
+
114
+ return fig, ax, reg
115
+
116
+
117
+ def confire_regression_line(x, y, is_reg, reg_box, df, ax, mode,xl,yl,xh,yh, null_beta, r_se,
118
+ is_45_helper_line,helper_line_args, font_kwargs,
119
+ log, verbose, reg_xmin):
120
+ # if N <3
121
+ if len(df)<3:
122
+ is_reg=False
123
+
124
+ if is_reg is True:
125
+ # reg
126
+ # slope, intercept, r, p, slope_se, intercept_se
127
+ if mode=="0":
128
+ reg = ss.linregress(df[x],df[y])
129
+ # estimate se for r
130
+ if r_se==True:
131
+ log.write(" -Estimating SE for rsq using Jackknife method.", verbose=verbose)
132
+ r_se_jackknife = jackknife_r(df,x,y,log,verbose)
133
+ r_se_jackknife_string = " ({:.2f})".format(r_se_jackknife)
134
+ else:
135
+ r_se_jackknife_string= ""
136
+ else:
137
+ reg = ss.linregress(df[x],df[y])
138
+ r_se_jackknife_string= ""
139
+
140
+ #### calculate p values based on selected value , default = 0
141
+ create_reg_log(reg, log, verbose)
142
+
143
+ reg_string = create_reg_string(reg,
144
+ r_se_jackknife_string)
145
+
146
+ ax.text(0.99,0.01, reg_string, va="bottom",ha="right",transform=ax.transAxes,bbox=reg_box,**font_kwargs)
147
+
148
+ ax = create_helper_line(ax, reg[0], is_45_helper_line, helper_line_args, reg_xmin=reg_xmin)
149
+ ax = create_reg_line(ax, reg, reg_xmin=reg_xmin)
150
+
151
+ return ax, reg
152
+
153
+ #############################################################################################################################################################################
154
+ def create_reg_log(reg,log, verbose):
155
+ #t_score = (reg[0]-null_beta) / reg[4]
156
+ #degree = len(df.dropna())-2
157
+ p = reg[3]
158
+ #ss.t.sf(abs(t_score), df=degree)*2
159
+ log.write(" -Beta = ", reg[0], verbose=verbose)
160
+ log.write(" -Beta_se = ", reg[4], verbose=verbose)
161
+ log.write(" -H0 beta = 0",", default p = ", "{:.2e}".format(reg[3]), verbose=verbose)
162
+ log.write(" -Peason correlation coefficient = ", "{:.2f}".format(reg[2]), verbose=verbose)
163
+ log.write(" -r2 = ", "{:.2f}".format(reg[2]**2), verbose=verbose)
164
+
165
+ def create_helper_line(ax,
166
+ slope,
167
+ is_45_helper_line,
168
+ helper_line_args,
169
+ reg_xmin=0):
170
+
171
+ if is_45_helper_line is True:
172
+ xl,xh=ax.get_xlim()
173
+ yl,yh=ax.get_ylim()
174
+ if slope >0:
175
+ ax.axline([min(xl,yl),min(xl,yl)], [max(xh, yh),max(xh, yh)],zorder=1,**helper_line_args)
176
+ else:
177
+ ax.axline([min(xl,yl),-min(xl,yl)], [max(xh, yh),-max(xh, yh)],zorder=1,**helper_line_args)
178
+
179
+ return ax
180
+
181
+ def create_reg_line(ax, reg, reg_xmin=0):
182
+ xy1 = (reg_xmin,reg[0]*reg_xmin+reg[1])
183
+ ax.axline(xy1=xy1,slope=reg[0],color="#cccccc",linestyle='--',zorder=1)
184
+ return ax
185
+
186
+ def create_reg_string(reg,
187
+ r_se_jackknife_string):
188
+ p = reg[2]
189
+ try:
190
+ p12=str("{:.2e}".format(p)).split("e")[0]
191
+ pe =str(int("{:.2e}".format(p).split("e")[1]))
192
+ except:
193
+ p12="0"
194
+ pe="0"
195
+
196
+ p_text="$p = " + p12 + " \\times 10^{"+pe+"}$"
197
+ p_latex= f'{p_text}'
198
+
199
+ reg_string = "$y =$ "+"{:.2f}".format(reg[1]) +" $+$ "+ "{:.2f}".format(reg[0])+" $x$, "+ p_latex + ", $r =$" +"{:.2f}".format(reg[2])+r_se_jackknife_string
200
+
201
+ return reg_string
202
+
203
+ def jackknife_r(df,x,y,log,verbose):
204
+ """Jackknife estimation of se for rsq
205
+ """
206
+
207
+ # dropna
208
+ df_nona = df.loc[:,[x,y]].dropna()
209
+ # non-empty entries
210
+ n=len(df)
211
+ # assign row number
212
+ df_nona["_NROW"] = range(n)
213
+ # a list to store r2
214
+ r_list=[]
215
+ # estimate r
216
+ for i in range(n):
217
+ # exclude 1 record
218
+ records_to_use = df_nona["_NROW"]!=i
219
+ # estimate r
220
+ reg_jackknife = ss.linregress(df_nona.loc[records_to_use, x],df_nona.loc[records_to_use,y])
221
+ # add r_i to list
222
+ r_list.append(reg_jackknife[2])
223
+
224
+ # convert list to array
225
+ rs = np.array(r_list)
226
+ # https://en.wikipedia.org/wiki/Jackknife_resampling
227
+ r_se = np.sqrt( (n-1)/n * np.sum((rs - np.mean(rs))**2) )
228
+ log.write(" -R se (jackknife) = {:.2e}".format(r_se), verbose=verbose)
229
+ return r_se
@@ -281,7 +281,7 @@ def plottrumpet(mysumstats,
281
281
  sumstats["ABS_BETA"] = sumstats[beta].abs()
282
282
 
283
283
  ##################################################################################################
284
- size_norm = (sumstats["ABS_BETA"].min(), sumstats["ABS_BETA"].max())
284
+ size_norm = (sumstats[size].min(), sumstats[size].max())
285
285
  ## if highlight ##################################################################################################
286
286
  dots = sns.scatterplot(data=sumstats,
287
287
  x=maf,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gwaslab
3
- Version: 3.5.1
3
+ Version: 3.5.2
4
4
  Summary: A collection of handy tools for GWAS SumStats
5
5
  Author-email: Yunye <yunye@gwaslab.com>
6
6
  Project-URL: Homepage, https://cloufield.github.io/gwaslab/
@@ -1,4 +1,4 @@
1
- gwaslab/__init__.py,sha256=7TKJaODdpeuQKibL7gIEa4MtyQ0pmrU-vIHQ-Et27lQ,2433
1
+ gwaslab/__init__.py,sha256=pP_OQwkaXMJokVVU_o6AXnJEBs2HtaMtpcHIls3ezO8,2486
2
2
  gwaslab/bd_common_data.py,sha256=2voBqMrIsII1TN5T6uvyDax90fWcJK1Stmo1ZHNGGsE,13898
3
3
  gwaslab/bd_config.py,sha256=TP-r-DPhJD3XnRYZbw9bQHXaDIkiRgK8bG9HCt-UaLc,580
4
4
  gwaslab/bd_download.py,sha256=cDDk2C5IvjeAzvPvVYGTkI4Ss33DUtEDjGo8eAbQRvY,15663
@@ -6,18 +6,18 @@ gwaslab/bd_get_hapmap3.py,sha256=FQpwbhWUPFT152QtiLevEkkN4YcVDIeKzoK0Uz1NlRo,410
6
6
  gwaslab/cache_manager.py,sha256=HOTnSkCOyGEPLRl90WT8D_6pAdI8d8AzenMIDGuCeWc,28113
7
7
  gwaslab/g_Log.py,sha256=C3Zv-_6c3C9ms8bgQ-ytplz22sjk7euqXYkWr9zNeAs,1573
8
8
  gwaslab/g_Phenotypes.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- gwaslab/g_Sumstats.py,sha256=c_qYY2H-nf-JtGepzafoHuYwnWxmOOBf9CDytPZc60Q,36704
9
+ gwaslab/g_Sumstats.py,sha256=eqEpHEH5fnBMsOIufVzwaRp0_vCsuHvGEUe5OzNL41s,36969
10
10
  gwaslab/g_SumstatsPair.py,sha256=20snPb4SlI6ftMGVjgxAuyxsxYRQF-GzzlBSnoB-3Lo,8851
11
11
  gwaslab/g_SumstatsT.py,sha256=u_DighLMnMxwTLnqm-B58pA0G6WXRj6pudPyKMVKjSU,2133
12
12
  gwaslab/g_Sumstats_summary.py,sha256=FECvvFXJVKaCX5dggBvvk9YvJ6AbdbcLfjltysX7wEE,6380
13
13
  gwaslab/g_meta.py,sha256=htWlgURWclm9R6UqFcX1a93WN27xny7lGUeyJZOtszQ,2583
14
14
  gwaslab/g_vchange_status.py,sha256=w3zsYYOcCaI3PTeboonvkQjudzUAfVIgATzRdiPViZs,1939
15
- gwaslab/g_version.py,sha256=Wpfo8Y_fjYS4ajalombaHrLezBO7BOr070GnjQHhOGw,1885
15
+ gwaslab/g_version.py,sha256=GO-TBEpIUgE6esSkU-I4E8yS1MrCnzKwbYKM7htcJcw,1885
16
16
  gwaslab/hm_casting.py,sha256=FqP4EQl83Q2OKLw004OgLIvUH795TVCGwziLk5jsHqY,11368
17
17
  gwaslab/hm_harmonize_sumstats.py,sha256=_sZ8soikAxDokw-dcr_CLguBB8OmTmPPS04MfmsJc_Q,79509
18
18
  gwaslab/hm_rsid_to_chrpos.py,sha256=ODWREO0jPN0RAfNzL5fRzSRANfhiksOvUVPuEsFZQqA,6552
19
19
  gwaslab/io_preformat_input.py,sha256=J8Ny4OPMaLVdo2nP8lTM-c5A8LSdqphSrp9G4i9JjDQ,24097
20
- gwaslab/io_process_args.py,sha256=bF7oHBtMnxJgksIit0O0_U94dZFh8r5YblgDqEEsqoM,806
20
+ gwaslab/io_process_args.py,sha256=KnQWMBwEZjQpCsVMKPrR1qQzeXCg542YdXsP0KwKat8,906
21
21
  gwaslab/io_read_ldsc.py,sha256=wsYXpH50IchBKd2dhYloSqc4YgnDkiwMsAweaCoN5Eo,12471
22
22
  gwaslab/io_read_tabular.py,sha256=EG-C6KhCutt4J4LlOMgXnqzJvU-EZXzVhMvaDFnHrMM,2380
23
23
  gwaslab/io_to_formats.py,sha256=8FmbQjWUIsz_V1Lb80TuwRIXKBgs5t42j25Znougk1Y,29401
@@ -58,22 +58,23 @@ gwaslab/vis_plot_credible sets.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
58
58
  gwaslab/viz_aux_annotate_plot.py,sha256=gA-s8a90dsl3MB5CIapdI_DecD9h2FmuqMgy07kMYJI,25207
59
59
  gwaslab/viz_aux_chromatin.py,sha256=7cGmej5EkKO7fxR1b5w8r1oRRl9ofVzFRG52SCYWtz0,4109
60
60
  gwaslab/viz_aux_property.py,sha256=UIaivghnLXYpTwkKnXRK0F28Jbn9L6OaICk3K73WZaU,33
61
- gwaslab/viz_aux_quickfix.py,sha256=HnhVvY0GP0EN0gLJ-B11OYiE5PWDcdKGUpQ3QZeu0lE,18592
61
+ gwaslab/viz_aux_quickfix.py,sha256=cGX5i3WBmvKIiqck8V00caDg-pvKOO709Ux3DBXsUrM,18693
62
62
  gwaslab/viz_aux_reposition_text.py,sha256=iRIP-Rkltlei068HekJcVubiqPrunBqvAoSQ1eHk04M,4304
63
- gwaslab/viz_aux_save_figure.py,sha256=GdUXNBOelsWqtTXm8pEZzeGGwDxHYnBkyrGwLOK16ew,2723
63
+ gwaslab/viz_aux_save_figure.py,sha256=x_b4DlTSmHJddfQgoYoReCi4QQbQEtcwCWTKfGetfTA,2768
64
64
  gwaslab/viz_plot_compare_af.py,sha256=qtXW45-Sq_ugK8ZfqBYMpmf58SKi3lB3YyHnzn_akcE,5344
65
65
  gwaslab/viz_plot_compare_effect.py,sha256=kq-rVWygHEeTBMOtd_jk8nK85ClZHU-ADSf4nI2gTKo,66604
66
66
  gwaslab/viz_plot_forestplot.py,sha256=xgOnefh737CgdQxu5naVyRNBX1NQXPFKzf51fbh6afs,6771
67
67
  gwaslab/viz_plot_miamiplot.py,sha256=rCFEp7VNuVqeBBG3WRkmFAtFklbF79BvIQQYiSY70VY,31238
68
68
  gwaslab/viz_plot_miamiplot2.py,sha256=xiFCgFX8hEySmCJORpEurMVER9eEXQyy_Ik7mLkbi9g,16015
69
- gwaslab/viz_plot_mqqplot.py,sha256=emyEXZZenzm8eh3XFCkTWI8sz0fEnL5QJxohOZMxWZc,67189
69
+ gwaslab/viz_plot_mqqplot.py,sha256=mfmHseYHIFoEfSKBX46ps6abSQ6t9xDNahLDLLj4K8I,67924
70
70
  gwaslab/viz_plot_phe_heatmap.py,sha256=qoXVeFTIm-n8IinNbDdPFVBSz2yGCGK6QzTstXv6aj4,9532
71
71
  gwaslab/viz_plot_qqplot.py,sha256=psQgVpP29686CEZkzQz0iRbApzqy7aE3GGiBcazVvNw,7247
72
- gwaslab/viz_plot_regional2.py,sha256=tBoGox-4ngL5o_twdIjk_VW6Iam3JDyrPKuttm6_4Sg,36862
72
+ gwaslab/viz_plot_regional2.py,sha256=g9cGI5sPAH32WR0ICvZB1wMG5butMgpdxYtxZUfG6fE,38314
73
73
  gwaslab/viz_plot_regionalplot.py,sha256=8u-5-yfy-UaXhaxVVz3Y5k2kBAoqzczUw1hyyD450iI,37983
74
74
  gwaslab/viz_plot_rg_heatmap.py,sha256=PidUsgOiEVt6MfBPCF3_yDhOEytZ-I1q-ZD6_0pFrV4,13713
75
+ gwaslab/viz_plot_scatter_with_reg.py,sha256=PmUZDQl2q4Dme3HLPXEwf_TrMjwJADA-uFXNDBWUEa4,8333
75
76
  gwaslab/viz_plot_stackedregional.py,sha256=HfNUhwxevbwSoauE0ysG020U7YFVy4111nkIWdaJ4Q8,16664
76
- gwaslab/viz_plot_trumpetplot.py,sha256=ZHdc6WcVx0-oKoj88yglRkmB4bS9pOiEMcuwKW35Yvo,42672
77
+ gwaslab/viz_plot_trumpetplot.py,sha256=uuEdHNr2ZBXJkOMA6uu0OzKFh0de-BxOnPsUAYqwqOU,42660
77
78
  gwaslab/data/formatbook.json,sha256=N2nJs80HH98Rsu9FxaSvIQO9J5yIV97WEtAKjRqYwiY,38207
78
79
  gwaslab/data/reference.json,sha256=IrjwFnXjrpVUp3zYfcYClpibJE9Y-94gtrC1Aw8sXxg,12332
79
80
  gwaslab/data/chrx_par/chrx_par_hg19.bed.gz,sha256=LocZg_ozhZjQiIpgWCO4EYCW9xgkEKpRy1m-YdIpzQs,83
@@ -82,9 +83,9 @@ gwaslab/data/hapmap3_SNPs/hapmap3_db150_hg19.snplist.gz,sha256=qD9RsC5S2h6l-OdpW
82
83
  gwaslab/data/hapmap3_SNPs/hapmap3_db151_hg38.snplist.gz,sha256=Y8ZT2FIAhbhlgCJdE9qQVAiwnV_fcsPt72usBa7RSBM,10225828
83
84
  gwaslab/data/high_ld/high_ld_hla_hg19.bed.gz,sha256=R7IkssKu0L4WwkU9SrS84xCMdrkkKL0gnTNO_OKbG0Y,219
84
85
  gwaslab/data/high_ld/high_ld_hla_hg38.bed.gz,sha256=76CIU0pibDJ72Y6UY-TbIKE9gEPwTELAaIbCXyjm80Q,470
85
- gwaslab-3.5.1.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
86
- gwaslab-3.5.1.dist-info/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
87
- gwaslab-3.5.1.dist-info/METADATA,sha256=Dqj65vurvDR3JCwlyCVnnfUS64cmN1vMJJjUTCm3xLI,7758
88
- gwaslab-3.5.1.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
89
- gwaslab-3.5.1.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
90
- gwaslab-3.5.1.dist-info/RECORD,,
86
+ gwaslab-3.5.2.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
87
+ gwaslab-3.5.2.dist-info/LICENSE_before_v3.4.39,sha256=GhLOU_1UDEKeOacYhsRN_m9u-eIuVTazSndZPeNcTZA,1066
88
+ gwaslab-3.5.2.dist-info/METADATA,sha256=KYa_HwVvvyGlZeookgHHJAUgfde0d5YqitViCSmEU8M,7758
89
+ gwaslab-3.5.2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
90
+ gwaslab-3.5.2.dist-info/top_level.txt,sha256=PyY6hWtrALpv2MAN3kjkIAzJNmmBTH5a2risz9KwH08,8
91
+ gwaslab-3.5.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.5.0)
2
+ Generator: setuptools (75.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5