gwaslab 3.4.38__py3-none-any.whl → 3.4.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gwaslab might be problematic. Click here for more details.

Files changed (51) hide show
  1. gwaslab/bd_common_data.py +6 -3
  2. gwaslab/bd_download.py +9 -9
  3. gwaslab/bd_get_hapmap3.py +43 -9
  4. gwaslab/g_Log.py +14 -5
  5. gwaslab/g_Sumstats.py +86 -18
  6. gwaslab/g_SumstatsPair.py +70 -23
  7. gwaslab/g_SumstatsT.py +2 -2
  8. gwaslab/g_version.py +10 -10
  9. gwaslab/hm_casting.py +9 -4
  10. gwaslab/hm_harmonize_sumstats.py +88 -83
  11. gwaslab/io_preformat_input.py +14 -14
  12. gwaslab/io_read_ldsc.py +49 -1
  13. gwaslab/ldsc_irwls.py +198 -0
  14. gwaslab/ldsc_jackknife.py +514 -0
  15. gwaslab/ldsc_ldscore.py +417 -0
  16. gwaslab/ldsc_parse.py +294 -0
  17. gwaslab/ldsc_regressions.py +747 -0
  18. gwaslab/ldsc_sumstats.py +629 -0
  19. gwaslab/qc_check_datatype.py +1 -1
  20. gwaslab/qc_fix_sumstats.py +163 -161
  21. gwaslab/util_ex_calculate_ldmatrix.py +2 -2
  22. gwaslab/util_ex_gwascatalog.py +24 -24
  23. gwaslab/util_ex_ldproxyfinder.py +9 -9
  24. gwaslab/util_ex_ldsc.py +189 -0
  25. gwaslab/util_in_calculate_gc.py +6 -6
  26. gwaslab/util_in_calculate_power.py +42 -43
  27. gwaslab/util_in_convert_h2.py +8 -8
  28. gwaslab/util_in_fill_data.py +28 -28
  29. gwaslab/util_in_filter_value.py +91 -52
  30. gwaslab/util_in_get_density.py +8 -8
  31. gwaslab/util_in_get_sig.py +407 -65
  32. gwaslab/viz_aux_annotate_plot.py +12 -12
  33. gwaslab/viz_aux_quickfix.py +18 -18
  34. gwaslab/viz_aux_reposition_text.py +3 -3
  35. gwaslab/viz_aux_save_figure.py +14 -5
  36. gwaslab/viz_plot_compare_af.py +29 -30
  37. gwaslab/viz_plot_compare_effect.py +63 -71
  38. gwaslab/viz_plot_miamiplot2.py +6 -6
  39. gwaslab/viz_plot_mqqplot.py +17 -3
  40. gwaslab/viz_plot_qqplot.py +1 -1
  41. gwaslab/viz_plot_regionalplot.py +33 -32
  42. gwaslab/viz_plot_rg_heatmap.py +28 -26
  43. gwaslab/viz_plot_stackedregional.py +40 -21
  44. gwaslab/viz_plot_trumpetplot.py +50 -55
  45. gwaslab-3.4.39.dist-info/LICENSE +674 -0
  46. {gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/METADATA +4 -3
  47. gwaslab-3.4.39.dist-info/RECORD +80 -0
  48. gwaslab-3.4.38.dist-info/RECORD +0 -72
  49. /gwaslab-3.4.38.dist-info/LICENSE → /gwaslab-3.4.39.dist-info/LICENSE_before_v3.4.39 +0 -0
  50. {gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/WHEEL +0 -0
  51. {gwaslab-3.4.38.dist-info → gwaslab-3.4.39.dist-info}/top_level.txt +0 -0
@@ -86,7 +86,7 @@ def _plot_regional(
86
86
  region_ld_threshold = region_ld_threshold,
87
87
  region_ld_colors = region_ld_colors,
88
88
  marker_size= marker_size,
89
- log=log)
89
+ log=log,verbose=verbose)
90
90
  else:
91
91
  ax1, lead_id = _pinpoint_lead(sumstats = sumstats,
92
92
  ax1 = ax1,
@@ -94,14 +94,14 @@ def _plot_regional(
94
94
  region_ld_threshold = region_ld_threshold,
95
95
  region_ld_colors = region_ld_colors1,
96
96
  marker_size= marker_size,
97
- log=log)
97
+ log=log,verbose=verbose)
98
98
  ax1, lead_id2 = _pinpoint_lead(sumstats = sumstats,
99
99
  ax1 = ax1,
100
100
  region_ref=region_ref_second,
101
101
  region_ld_threshold = region_ld_threshold,
102
102
  region_ld_colors = region_ld_colors2,
103
103
  marker_size= marker_size,
104
- log=log)
104
+ log=log,verbose=verbose)
105
105
 
106
106
  if (vcf_path is not None) and region_ld_legend:
107
107
  if region_ref_second is None:
@@ -240,7 +240,7 @@ def _plot_regional(
240
240
  return ax1, ax3, ax4, cbar, lead_snp_i, lead_snp_i2
241
241
 
242
242
  # + ###########################################################################################################################################################################
243
- def _get_lead_id(sumstats=None, region_ref=None, log=None):
243
+ def _get_lead_id(sumstats=None, region_ref=None, log=None, verbose=True):
244
244
  region_ref_to_check = copy.copy(region_ref)
245
245
  try:
246
246
  if len(region_ref_to_check)>0 and type(region_ref_to_check) is not str:
@@ -260,23 +260,23 @@ def _get_lead_id(sumstats=None, region_ref=None, log=None):
260
260
  if region_ref_to_check is not None:
261
261
  if type(lead_id) is list:
262
262
  if len(lead_id)==0 :
263
- log.write(" -WARNING: {} not found. Roll back to lead variant...".format(region_ref_to_check))
263
+ log.warning("{} not found. Roll back to lead variant...".format(region_ref_to_check))
264
264
  lead_id = sumstats["scaled_P"].idxmax()
265
265
  else:
266
266
  log.write(" -Reference variant ID: {} - {}".format(region_ref_to_check, lead_id))
267
267
 
268
268
  if lead_id is None:
269
- log.write(" -Extracting lead variant...")
269
+ log.write(" -Extracting lead variant...", verbose=verbose)
270
270
  lead_id = sumstats["scaled_P"].idxmax()
271
271
 
272
272
  return lead_id
273
273
 
274
- def _pinpoint_lead(sumstats,ax1,region_ref, region_ld_threshold, region_ld_colors, marker_size, log):
274
+ def _pinpoint_lead(sumstats,ax1,region_ref, region_ld_threshold, region_ld_colors, marker_size, log, verbose):
275
275
  if region_ref is None:
276
- log.write(" -Extracting lead variant...")
276
+ log.write(" -Extracting lead variant..." , verbose=verbose)
277
277
  lead_id = sumstats["scaled_P"].idxmax()
278
278
  else:
279
- lead_id = _get_lead_id(sumstats, region_ref, log)
279
+ lead_id = _get_lead_id(sumstats, region_ref, log, verbose)
280
280
 
281
281
  ax1.scatter(sumstats.loc[lead_id,"i"],sumstats.loc[lead_id,"scaled_P"],
282
282
  color=region_ld_colors[-1],
@@ -398,7 +398,7 @@ def _plot_gene_track(
398
398
  log=Log()):
399
399
 
400
400
  # load gtf
401
- if verbose: log.write(" -Loading gtf files from:" + gtf_path)
401
+ log.write(" -Loading gtf files from:" + gtf_path, verbose=verbose)
402
402
  uniq_gene_region,exons = process_gtf( gtf_path = gtf_path ,
403
403
  region = region,
404
404
  region_flank_factor = region_flank_factor,
@@ -416,7 +416,7 @@ def _plot_gene_track(
416
416
  font_size_in_pixels= taf[2] * pixels_per_track
417
417
  font_size_in_points = font_size_in_pixels * pixels_per_point
418
418
  linewidth_in_points= pixels_per_track * pixels_per_point
419
- if verbose: log.write(" -plotting gene track..")
419
+ log.write(" -plotting gene track..", verbose=verbose)
420
420
 
421
421
  sig_gene_name = "Undefined"
422
422
  sig_gene_name2 = "Undefined"
@@ -424,6 +424,7 @@ def _plot_gene_track(
424
424
  texts_to_adjust_middle = []
425
425
  texts_to_adjust_right = []
426
426
  for index,row in uniq_gene_region.iterrows():
427
+
427
428
  gene_color="#020080"
428
429
  #if row[6][0]=="+":
429
430
  if row["strand"][0]=="+":
@@ -496,7 +497,7 @@ def _plot_gene_track(
496
497
  ax3.plot((gene_track_start_i+row["start"],gene_track_start_i+row["end"]),
497
498
  (row["stack"]*2,row["stack"]*2),linewidth=linewidth_in_points*taf[3],color=exon_color,solid_capstyle="butt")
498
499
 
499
- if verbose: log.write(" -Finished plotting gene track..")
500
+ log.write(" -Finished plotting gene track..", verbose=verbose)
500
501
 
501
502
  return ax3,texts_to_adjust_middle
502
503
 
@@ -504,25 +505,26 @@ def _plot_gene_track(
504
505
  # Helpers
505
506
  # -############################################################################################################################################################################
506
507
  def process_vcf(sumstats, vcf_path, region,region_ref, region_ref_second, log, verbose, pos ,nea,ea, region_ld_threshold, vcf_chr_dict,tabix):
507
- if verbose: log.write("Start to load reference genotype...")
508
- if verbose: log.write(" -reference vcf path : "+ vcf_path)
508
+ log.write("Start to load reference genotype...", verbose=verbose)
509
+ log.write(" -reference vcf path : "+ vcf_path, verbose=verbose)
509
510
 
510
511
 
511
512
 
512
513
  # load genotype data of the targeted region
513
514
  ref_genotype = read_vcf(vcf_path,region=vcf_chr_dict[region[0]]+":"+str(region[1])+"-"+str(region[2]),tabix=tabix)
514
515
  if ref_genotype is None:
515
- if verbose: log.write(" -Warning: no data was retrieved. Skipping ...")
516
+ log.warning("No data was retrieved. Skipping ...")
516
517
  ref_genotype=dict()
517
518
  ref_genotype["variants/POS"]=np.array([],dtype="int64")
518
- if verbose: log.write(" -Retrieving index...")
519
- if verbose: log.write(" -Ref variants in the region: {}".format(len(ref_genotype["variants/POS"])))
519
+ log.write(" -Retrieving index...", verbose=verbose)
520
+ log.write(" -Ref variants in the region: {}".format(len(ref_genotype["variants/POS"])), verbose=verbose)
520
521
  # match sumstats pos and ref pos:
521
522
  # get ref index for its first appearance of sumstats pos
522
523
  #######################################################################################
523
524
  def match_varaint(x):
524
525
  # x: "POS,NEA,EA"
525
526
  if np.any(ref_genotype["variants/POS"] == x.iloc[0]):
527
+ # position match
526
528
  if len(np.where(ref_genotype["variants/POS"] == x.iloc[0] )[0])>1:
527
529
  # multiple position matches
528
530
  for j in np.where(ref_genotype["variants/POS"] == x.iloc[0])[0]:
@@ -532,16 +534,15 @@ def process_vcf(sumstats, vcf_path, region,region_ref, region_ref_second, log, v
532
534
  return j
533
535
  elif x.iloc[1] in ref_genotype["variants/ALT"][j]:
534
536
  if x.iloc[2] == ref_genotype["variants/REF"][j]:
535
- return j
536
- else:
537
- return None
537
+ return j
538
+ return None
538
539
  else:
539
540
  # single match
540
541
  return np.where(ref_genotype["variants/POS"] == x.iloc[0] )[0][0]
541
542
  else:
542
543
  # no position match
543
544
  return None
544
- if verbose: log.write(" -Matching variants using POS, NEA, EA ...")
545
+ log.write(" -Matching variants using POS, NEA, EA ...", verbose=verbose)
545
546
  sumstats["REFINDEX"] = sumstats[[pos,nea,ea]].apply(lambda x: match_varaint(x),axis=1)
546
547
  #############################################################################################
547
548
  #sumstats["REFINDEX"] = sumstats[pos].apply(lambda x: np.where(ref_genotype["variants/POS"] == x )[0][0] if np.any(ref_genotype["variants/POS"] == x) else None)
@@ -551,7 +552,7 @@ def process_vcf(sumstats, vcf_path, region,region_ref, region_ref_second, log, v
551
552
  if region_ref is None:
552
553
  lead_id = sumstats["scaled_P"].idxmax()
553
554
  else:
554
- lead_id = _get_lead_id(sumstats, region_ref, log)
555
+ lead_id = _get_lead_id(sumstats, region_ref, log, verbose)
555
556
  lead_pos = sumstats.loc[lead_id,pos]
556
557
 
557
558
  # if lead pos is available:
@@ -567,12 +568,12 @@ def process_vcf(sumstats, vcf_path, region,region_ref, region_ref_second, log, v
567
568
  lead_snp_genotype = GenotypeArray([ref_genotype["calldata/GT"][lead_snp_ref_index]]).to_n_alt()
568
569
  try:
569
570
  if len(set(lead_snp_genotype[0]))==1:
570
- log.write(" -WARNING: The variant is mono-allelic in reference VCF. LD can not be calculated.", verbose=verbose)
571
+ log.warning("The variant is mono-allelic in reference VCF. LD can not be calculated.")
571
572
  except:
572
573
  pass
573
574
  other_snp_genotype = GenotypeArray(ref_genotype["calldata/GT"][other_snps_ref_index]).to_n_alt()
574
575
 
575
- if verbose: log.write(" -Calculating Rsq...")
576
+ log.write(" -Calculating Rsq...", verbose=verbose)
576
577
 
577
578
  if len(other_snp_genotype)>1:
578
579
  valid_r2= np.power(rogers_huff_r_between(lead_snp_genotype,other_snp_genotype)[0],2)
@@ -580,7 +581,7 @@ def process_vcf(sumstats, vcf_path, region,region_ref, region_ref_second, log, v
580
581
  valid_r2= np.power(rogers_huff_r_between(lead_snp_genotype,other_snp_genotype),2)
581
582
  sumstats.loc[~sumstats["REFINDEX"].isna(),"RSQ"] = valid_r2
582
583
  else:
583
- if verbose: log.write(" -Lead SNP not found in reference...")
584
+ log.write(" -Lead SNP not found in reference...", verbose=verbose)
584
585
  sumstats["RSQ"]=None
585
586
 
586
587
  sumstats["RSQ"] = sumstats["RSQ"].astype("float")
@@ -600,7 +601,7 @@ def process_vcf(sumstats, vcf_path, region,region_ref, region_ref_second, log, v
600
601
  #####################################################################################################
601
602
  if region_ref_second is not None:
602
603
 
603
- lead_id2 = _get_lead_id(sumstats, region_ref_second, log)
604
+ lead_id2 = _get_lead_id(sumstats, region_ref_second, log, verbose)
604
605
 
605
606
  lead_pos2 = sumstats.loc[lead_id2,pos]
606
607
  if lead_pos2 in ref_genotype["variants/POS"]:
@@ -615,12 +616,12 @@ def process_vcf(sumstats, vcf_path, region,region_ref, region_ref_second, log, v
615
616
  lead_snp_genotype = GenotypeArray([ref_genotype["calldata/GT"][lead_snp_ref_index]]).to_n_alt()
616
617
  try:
617
618
  if len(set(lead_snp_genotype[0]))==1:
618
- log.write(" -WARNING: The variant is mono-allelic in reference VCF. LD can not be calculated.", verbose=verbose)
619
+ log.warning("The variant is mono-allelic in reference VCF. LD can not be calculated.")
619
620
  except:
620
621
  pass
621
622
  other_snp_genotype = GenotypeArray(ref_genotype["calldata/GT"][other_snps_ref_index]).to_n_alt()
622
623
 
623
- if verbose: log.write(" -Calculating Rsq...")
624
+ log.write(" -Calculating Rsq...", verbose=verbose)
624
625
 
625
626
  if len(other_snp_genotype)>1:
626
627
  valid_r2= np.power(rogers_huff_r_between(lead_snp_genotype,other_snp_genotype)[0],2)
@@ -628,7 +629,7 @@ def process_vcf(sumstats, vcf_path, region,region_ref, region_ref_second, log, v
628
629
  valid_r2= np.power(rogers_huff_r_between(lead_snp_genotype,other_snp_genotype),2)
629
630
  sumstats.loc[~sumstats["REFINDEX"].isna(),"RSQ2"] = valid_r2
630
631
  else:
631
- if verbose: log.write(" -Lead SNP not found in reference...")
632
+ log.write(" -Lead SNP not found in reference...", verbose=verbose)
632
633
  sumstats["RSQ2"]=None
633
634
 
634
635
  sumstats["RSQ2"] = sumstats["RSQ2"].astype("float")
@@ -652,7 +653,7 @@ def process_vcf(sumstats, vcf_path, region,region_ref, region_ref_second, log, v
652
653
  #sumstats.loc[lead_id,"LEAD2"]
653
654
  ####################################################################################################
654
655
 
655
- if verbose: log.write("Finished loading reference genotype successfully!")
656
+ log.write("Finished loading reference genotype successfully!", verbose=verbose)
656
657
  return sumstats
657
658
 
658
659
  # -############################################################################################################################################################################
@@ -714,8 +715,8 @@ def process_gtf(gtf_path,
714
715
  # extract protein coding gene
715
716
  if region_protein_coding is True:
716
717
  #genes_1mb = genes_1mb.loc[genes_1mb["gene_biotype"]=="protein_coding",:].copy()
717
- pc_genes_1mb_list = genes_1mb.loc[(genes_1mb["feature"]=="gene")& (genes_1mb["gene_biotype"]=="protein_coding"),"name"].values
718
- genes_1mb = genes_1mb.loc[genes_1mb["name"].isin(pc_genes_1mb_list),:]
718
+ pc_genes_1mb_list = genes_1mb.loc[(genes_1mb["feature"]=="gene")& (genes_1mb["gene_biotype"]=="protein_coding") & (genes_1mb["name"]!=""),"name"].values
719
+ genes_1mb = genes_1mb.loc[(genes_1mb["feature"].isin(["exon","gene"])) & (genes_1mb["name"].isin(pc_genes_1mb_list)),:]
719
720
  # extract exon
720
721
  exons = genes_1mb.loc[genes_1mb["feature"]=="exon",:].copy()
721
722
 
@@ -8,6 +8,7 @@ import matplotlib
8
8
  from gwaslab.g_Log import Log
9
9
  import scipy.stats as ss
10
10
  from gwaslab.viz_aux_save_figure import save_figure
11
+
11
12
  #################################################################################################
12
13
  def convert_p_to_width(p,sig_level):
13
14
  width_factor= -np.log10(sig_level)
@@ -54,7 +55,7 @@ def plot_rg(ldscrg,
54
55
  save=None,
55
56
  save_args=None):
56
57
 
57
- if verbose: log.write("Start to create ldsc genetic correlation heatmap...")
58
+ log.write("Start to create ldsc genetic correlation heatmap..." ,verbose=verbose)
58
59
  # configure arguments
59
60
  if fig_args is None:
60
61
  fig_args = {"dpi":300}
@@ -78,14 +79,14 @@ def plot_rg(ldscrg,
78
79
  save_args = {}
79
80
 
80
81
  #drop na records in P column
81
- if verbose: log.write("Raw dataset records:",len(ldscrg))
82
+ log.write("Raw dataset records:",len(ldscrg) ,verbose=verbose)
82
83
  df=ldscrg.dropna(subset=[p]).copy()
83
84
 
84
- if verbose: log.write(" -Raw dataset non-NA records:",len(df))
85
+ log.write(" -Raw dataset non-NA records:",len(df) ,verbose=verbose)
85
86
  # create unique pair column
86
87
  df["p1p2"]=df.apply(lambda x:"_".join(sorted([x[p1],x[p2]])),axis=1)
87
88
 
88
- if verbose: log.write("Filling diagnal line and duplicated pair for plotting...")
89
+ log.write("Filling diagnal line and duplicated pair for plotting..." ,verbose=verbose)
89
90
  # fill na
90
91
  df_fill_reverse = df.loc[(df[p2].isin(df[p1].values)) & (df[p1].isin(df[p2].values)),:].copy()
91
92
  df_fill_reverse = df_fill_reverse.rename(columns={p1:p2,p2:p1})
@@ -96,16 +97,23 @@ def plot_rg(ldscrg,
96
97
  p2_dup_list = list(df.loc[(df[p1].isin(df[p2].values)),"p1"].values)
97
98
  p_dup_list = p2_dup_list + p1_dup_list
98
99
  if len(set(p_dup_list)) > 0:
99
- if verbose: log.write(" -Diagnal records:", len(set(p_dup_list)))
100
+ log.write(" -Diagnal records:", len(set(p_dup_list)) ,verbose=verbose)
100
101
  df_fill_dia["p1"] = p_dup_list
101
102
  df_fill_dia["p2"] = df_fill_dia["p1"]
102
103
  df_fill_dia["rg"] = 1
103
104
 
104
105
  df_fill_na = pd.DataFrame(columns=df.columns)
105
106
  df_fill_na[[p1,p2]] = [(i,j) for i in df[p1].sort_values(ascending=False).drop_duplicates() for j in df[p2].sort_values(ascending=False).drop_duplicates()]
107
+
108
+ to_concate=[]
109
+ for i in [df,df_fill_reverse,df_fill_dia,df_fill_na]:
110
+ if len(i)>0:
111
+ to_concate.append(i.dropna(axis=1))
112
+
106
113
  # fill diagonal
107
- df = pd.concat([df,df_fill_reverse,df_fill_dia,df_fill_na],ignore_index=True).sort_values(by=p).drop_duplicates(subset=[p1,p2])
108
- #if verbose: log.write(" -Dataset shape match:", len(df)==)
114
+ df = pd.concat(to_concate,ignore_index=True).sort_values(by=p).drop_duplicates(subset=[p1,p2])
115
+
116
+ #log.write(" -Dataset shape match:", len(df)==)
109
117
  #
110
118
  ## remove record with p1 = p2, dropna in P column
111
119
  dfp=ldscrg.loc[ldscrg[p1]!=ldscrg[p2],:].dropna(subset=[p]).copy()
@@ -116,11 +124,11 @@ def plot_rg(ldscrg,
116
124
  ## drop duplicate and keep only unique pairs
117
125
  dfp = dfp.drop_duplicates(subset=["p1p2"]).copy()
118
126
 
119
- if verbose: log.write("Valid unique trait pairs:",len(dfp))
120
- if verbose: log.write(" -Valid unique trait1:",dfp["p1"].nunique())
121
- if verbose: log.write(" -Valid unique trait2:",dfp["p2"].nunique())
122
- if verbose: log.write(" -Significant correlations with P < 0.05:",sum(dfp[p]<0.05))
123
- if verbose: log.write(" -Significant correlations after Bonferroni correction:",sum(dfp[p]<(0.05/len(dfp))))
127
+ log.write("Valid unique trait pairs:",len(dfp) ,verbose=verbose)
128
+ log.write(" -Valid unique trait1:",dfp["p1"].nunique() ,verbose=verbose)
129
+ log.write(" -Valid unique trait2:",dfp["p2"].nunique() ,verbose=verbose)
130
+ log.write(" -Significant correlations with P < 0.05:",sum(dfp[p]<0.05) ,verbose=verbose)
131
+ log.write(" -Significant correlations after Bonferroni correction:",sum(dfp[p]<(0.05/len(dfp))) ,verbose=verbose)
124
132
 
125
133
  #if correction=="fdr":
126
134
  # fdr corrected p
@@ -131,7 +139,7 @@ def plot_rg(ldscrg,
131
139
  dfp["fdr_p"]=ss.false_discovery_control(dfp[p],method=fdr_method)
132
140
  dfp["fdr"] =ss.false_discovery_control(dfp[p],method=fdr_method) < 0.05
133
141
 
134
- if verbose: log.write(" -Significant correlations with FDR <0.05:",sum(dfp["fdr"]))
142
+ log.write(" -Significant correlations with FDR <0.05:",sum(dfp["fdr"]) ,verbose=verbose)
135
143
  # convert to dict for annotation and plotting
136
144
  df_rawp = dfp.set_index("p1p2").loc[:,p].to_dict()
137
145
  dfp = dfp.set_index("p1p2").loc[:,"fdr_p"].to_dict()
@@ -167,7 +175,7 @@ def plot_rg(ldscrg,
167
175
  df["x"]=df[p2].map(dic_p2)
168
176
  df["x_y"]=df[p2].map(dic_p1)
169
177
 
170
- if verbose: log.write("Plotting heatmap...")
178
+ log.write("Plotting heatmap..." ,verbose=verbose)
171
179
  ########ticks###############################################
172
180
  fig,ax = plt.subplots(**fig_args)
173
181
 
@@ -196,7 +204,7 @@ def plot_rg(ldscrg,
196
204
  panno_list={1:{},2:{}}
197
205
  rgtoanno=[]
198
206
 
199
- if verbose: log.write("Full cell : {}-corrected P == {}".format(full_cell[0],full_cell[1]))
207
+ log.write("Full cell : {}-corrected P == {}".format(full_cell[0],full_cell[1]) ,verbose=verbose)
200
208
 
201
209
  for i,row in df.iterrows():
202
210
  xcenter=row["x"]
@@ -298,11 +306,11 @@ def plot_rg(ldscrg,
298
306
 
299
307
  # annotate p
300
308
  if panno is True:
301
- if verbose: log.write("P value annotation text : ")
309
+ log.write("P value annotation text (Order: Bon -> FDR -> Pnom): " ,verbose=verbose)
302
310
  for i,correction in enumerate(corrections):
303
311
  for j,sig_level in enumerate(sig_levels):
304
312
  index = len(sig_levels)*i + j
305
- if verbose: log.write(" -{} : {}-corrected P < {}".format(panno_texts[index], correction, sig_level))
313
+ log.write(" -{} : {}-corrected P < {} ".format(panno_texts[index], correction, sig_level) ,verbose=verbose)
306
314
  for panno_set_number in panno_list.keys():
307
315
  for key, i in panno_list[panno_set_number].items():
308
316
  if panno_set_number == 1:
@@ -318,14 +326,8 @@ def plot_rg(ldscrg,
318
326
  ax.set_aspect('equal', adjustable='box')
319
327
 
320
328
  save_figure(fig, save, keyword="ldscrg",save_args=save_args, log=log, verbose=verbose)
321
- #if save:
322
- # if verbose: log.write("Saving plot:")
323
- # if save==True:
324
- # fig.savefig("./ldscrg_heatmap.png",bbox_inches="tight",**save_args)
325
- # log.write(" -Saved to "+ "./ldscrg_heatmap.png" + " successfully!" )
326
- # else:
327
- # fig.savefig(save,bbox_inches="tight",**save_args)
328
- # log.write(" -Saved to "+ save + " successfully!" )
329
- if verbose: log.write("Finished creating ldsc genetic correlation heatmap!")
329
+
330
+ log.write("Finished creating ldsc genetic correlation heatmap!" ,verbose=verbose)
331
+
330
332
  return fig,ax,log,df
331
333
 
@@ -59,23 +59,25 @@ def plot_stacked_mqq(objects,
59
59
  log=Log(),
60
60
  **mqq_args
61
61
  ):
62
+
62
63
  log.write("Start to create stacked mqq plot by iteratively calling plot_mqq:",verbose=verbose)
63
64
  # load sumstats
65
+
66
+ ##########################################################################################################################################
64
67
  sumstats_list = []
65
68
  for each_object in objects:
66
69
  sumstats_list.append(each_object.data)
67
70
 
68
-
69
71
  if fig_args is None:
70
72
  fig_args = {"dpi":200}
71
73
  if region_lead_grid_line is None:
72
74
  region_lead_grid_line = {"alpha":0.5,"linewidth" : 2,"linestyle":"--","color":"#FF0000"}
73
75
  if title_pos is None:
74
- title_pos = [0.03,0.97]
76
+ title_pos = [0.01,0.97]
75
77
  if title_args is None:
76
78
  title_args = {}
77
- # create figure and axes
78
-
79
+
80
+ # create figure and axes ##################################################################################################################
79
81
  if mode=="r":
80
82
  if len(vcfs)==1:
81
83
  vcfs = vcfs *len(sumstats_list)
@@ -105,27 +107,29 @@ def plot_stacked_mqq(objects,
105
107
  **fig_args)
106
108
  plt.subplots_adjust(hspace=region_hspace)
107
109
 
108
- #
109
-
110
-
110
+ ##########################################################################################################################################
111
111
  mqq_args_for_each_plot = _sort_args(mqq_args, n_plot)
112
-
113
-
114
-
112
+ ##########################################################################################################################################
113
+ # get x axis dict
115
114
  if mode=="m":
116
115
  _posdiccul = _get_chrom_dic(sumstats_list,chrom="CHR",pos="POS",chrpad=0.02)
117
116
  else:
118
117
  _posdiccul=None
119
118
 
119
+ ##########################################################################################################################################
120
+ # a dict to store lead variants of each plot
120
121
  lead_variants_is={}
122
+
123
+ ##########################################################################################################################################
121
124
  # plot manhattan plot
122
125
  for index,sumstats in enumerate(sumstats_list):
126
+
127
+ #################################################################
123
128
  if mode=="m" or mode=="r":
124
129
  figax = (fig,axes[index],axes[-1])
125
130
  elif mode=="mqq":
126
131
  figax = (fig,axes[index,0],axes[index,1])
127
-
128
-
132
+ #################################################################
129
133
  if index==0:
130
134
  # plot last m and gene track
131
135
  fig,log,lead_i,lead_i2 = mqqplot(sumstats,
@@ -151,6 +155,7 @@ def plot_stacked_mqq(objects,
151
155
  )
152
156
  lead_variants_is[index] = (lead_i,lead_i2)
153
157
  else:
158
+ # plot only the scatter plot
154
159
  fig,log,lead_i,lead_i2 = mqqplot(sumstats,
155
160
  chrom="CHR",
156
161
  pos="POS",
@@ -178,13 +183,32 @@ def plot_stacked_mqq(objects,
178
183
  # adjust labels
179
184
  # drop labels for each plot
180
185
  # set a common laebl for all plots
181
- for index in range(n_plot):
182
- axes[index].set_ylabel("")
186
+
183
187
 
184
188
  if titles is not None:
185
189
  for index,title in enumerate(titles):
186
190
  axes[index].text(title_pos[0], title_pos[1] , title, transform=axes[index].transAxes,ha="left", va='top',**title_args)
191
+ ##########################################################################################################################################
192
+ # draw the line for lead variants
193
+ _draw_grid_line_for_lead_variants(mode, lead_variants_is, n_plot, axes, region_lead_grid_line)
194
+
195
+ ##########################################################################################################################################
196
+ _drop_old_y_labels(axes, n_plot)
197
+
198
+ _add_new_y_label(mode, fig, gene_track_height,n_plot,subplot_height )
199
+
200
+ ##########################################################################################################################################
201
+ save_figure(fig = fig, save = save, keyword= "stacked_" + mode, save_args=save_args, log = log, verbose=verbose)
202
+
203
+ log.write("Finished creating stacked mqq plot by iteratively calling plot_mqq.",verbose=verbose)
204
+
205
+ return fig, log
187
206
 
207
+ def _drop_old_y_labels(axes, n_plot):
208
+ for index in range(n_plot):
209
+ axes[index].set_ylabel("")
210
+
211
+ def _draw_grid_line_for_lead_variants(mode, lead_variants_is, n_plot, axes, region_lead_grid_line):
188
212
  if mode=="r":
189
213
  for index, sig_is in lead_variants_is.items():
190
214
  for sig_i in sig_is:
@@ -192,19 +216,14 @@ def plot_stacked_mqq(objects,
192
216
  for each_axis_index in range(n_plot + 1):
193
217
  axes[each_axis_index].axvline(x=sig_i, zorder=2,**region_lead_grid_line)
194
218
 
195
-
219
+ def _add_new_y_label(mode, fig, gene_track_height,n_plot,subplot_height ):
196
220
  gene_track_height_ratio = gene_track_height/(gene_track_height + n_plot*subplot_height)
197
221
  ylabel_height = (1 - gene_track_height_ratio)*0.5 + gene_track_height_ratio
198
222
  if mode=="r":
199
223
  fig.text(0.08, ylabel_height , "$-log_{10}(P)$", va='center', rotation='vertical')
200
224
  fig.text(0.93, ylabel_height, "Recombination rate(cM/Mb)", va='center', rotation=-90)
201
225
  elif mode=="m":
202
- fig.text(0.08, ylabel_height , "$-log_{10}(P)$", va='center', rotation='vertical')
203
-
204
- save_figure(fig = fig, save = save, keyword= "stacked_" + mode, save_args=save_args, log = log, verbose=verbose)
205
- log.write("Finished creating stacked mqq plot by iteratively calling plot_mqq.",verbose=verbose)
206
- return fig, log
207
-
226
+ fig.text(0.08, ylabel_height , "$-log_{10}(P)$", va='center', rotation='vertical')
208
227
 
209
228
  def _sort_args(mqq_args, n_plot):
210
229
  mqq_args_for_each_plot={i:{} for i in range(n_plot)}