pycmplot 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pycmplot/_core.py CHANGED
@@ -92,6 +92,7 @@ def main() -> None:
92
92
  )
93
93
  from pycmplot.plotting.linear import plot_linear
94
94
  from pycmplot.plotting.circular import plot_circular
95
+ from pycmplot.plotting.qq import plot_qq_combined, plot_qq_separate, plot_qq_overlay
95
96
  from pycmplot.resources import ResourceConfig
96
97
 
97
98
  # ------------------------------------------------------------------
@@ -110,6 +111,13 @@ def main() -> None:
110
111
  labels_raw = args.labels
111
112
  pcol_arg = args.pval_column
112
113
  logp = args.logp
114
+ qq = args.qq_plot
115
+ qq_separate = args.qq_separate
116
+ qq_ncols = args.qq_ncols
117
+ qq_thin = args.qq_thin
118
+ thin_below = args.thin_below
119
+ qq_max_points = args.qq_max_points
120
+ qq_overlay = args.qq_overlay
113
121
  chrom_label_size = args.chrom_label_size
114
122
  chrom_label_side = args.chrom_label_side
115
123
  track_label_size = args.track_label_size
@@ -164,7 +172,8 @@ def main() -> None:
164
172
  # ------------------------------------------------------------------
165
173
  (
166
174
  plt_name,
167
- table_out
175
+ table_out,
176
+ plt_base,
168
177
  ) = get_output_paths(
169
178
  labels,
170
179
  mode = mode,
@@ -202,6 +211,7 @@ def main() -> None:
202
211
  sumstats_loaded,
203
212
  hits_table,
204
213
  signif_lines,
214
+ pval_dict,
205
215
  ) = get_sumstats_and_merged_sector_list(
206
216
  sum_stats=sum_stats,
207
217
  labels=labels,
@@ -296,6 +306,52 @@ def main() -> None:
296
306
  figsize=(15, 9)
297
307
  )
298
308
 
309
+ # ------------------------------------------------------------------
310
+ # QQ PLOT
311
+ # ------------------------------------------------------------------
312
+ if qq and sumstats_loaded:
313
+ logger.info("Generating QQ Plot(s) ...")
314
+ qq_stem = f"{plt_base}_qq"
315
+
316
+ if qq_separate:
317
+ plot_qq_separate(
318
+ pval_dict=pval_dict,
319
+ thin=qq_thin,
320
+ thin_below=thin_below,
321
+ max_points=qq_max_points,
322
+ output_path=qq_stem,
323
+ colors=colors,
324
+ signif_threshold=signif_threshold or 5e-8,
325
+ dpi=dpi,
326
+ fig_format=output_format,
327
+ )
328
+ elif qq_overlay:
329
+ plot_qq_overlay(
330
+ pval_dict=pval_dict,
331
+ thin=qq_thin,
332
+ thin_below=thin_below,
333
+ max_points=qq_max_points,
334
+ colors=colors,
335
+ signif_threshold=signif_threshold or 5e-8,
336
+ dpi=dpi,
337
+ title=plot_title,
338
+ output_path=f"{qq_stem}_overlay",
339
+ fig_format=output_format,
340
+ )
341
+ else:
342
+ plot_qq_combined(
343
+ pval_dict=pval_dict,
344
+ thin=qq_thin,
345
+ thin_below=thin_below,
346
+ max_points=qq_max_points,
347
+ colors=colors,
348
+ ncols=qq_ncols,
349
+ signif_threshold=signif_threshold or 5e-8,
350
+ dpi=dpi,
351
+ title=plot_title,
352
+ output_path=f"{qq_stem}_combined",
353
+ fig_format=output_format,
354
+ )
299
355
 
300
356
  if __name__ == "__main__":
301
357
  main()
pycmplot/cli.py CHANGED
@@ -329,29 +329,50 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
329
329
  )
330
330
  opt.add_argument(
331
331
  "-bc", "--build_column", required=False, type=str, metavar="str",
332
- help="Name of column containing genome build (hg18/hg19/hg38). Or use ``--build`` below to supply genome builds per summary stat file."
333
- )
332
+ help=("Name of column containing genome build (hg18/hg19/hg38)."
333
+ "Or use ``--build`` below to supply genome builds per summary stat file."
334
+ ))
334
335
  opt.add_argument(
335
- "-b","--build",
336
- help="""
337
- Comma-sperated list of genome build of summary stats file(s) listed in the same order as sumstats files.
338
- (e.g. hg19,hg38,hg38,hg19 means:
339
- file1.txt.gz --> hg19
340
- file2.txt.gz --> hg38
341
- file3.tsv --> hg38 ... etc)
342
- """,
343
- required=False,
344
- type=str,
345
- metavar='str'
336
+ "-b","--build", required=False, type=str, metavar='str',
337
+ help=
338
+ """Comma-sperated list of genome build of summary stats file(s) listed
339
+ in the same order as sumstats files. e.g. hg19,hg38,hg38,hg19 means:
340
+ file1.txt.gz --> hg19
341
+ file2.txt.gz --> hg38
342
+ file3.tsv --> hg38 ... etc
343
+ """
346
344
  )
347
345
  opt.add_argument(
348
346
  "--logp", action="store_true",
349
347
  help="Plot −log₁₀(p) instead of raw p-values."
350
348
  )
351
- opt.add_argument(
352
- "-qq", "--qq_plot", action="store_true",
353
- help="Also generate a QQ-plot."
354
- )
349
+ opt.add_argument("-qq", "--qq_plot", action="store_true",
350
+ help="Generate QQ-plot(s) alongside the Manhattan plot.")
351
+ opt.add_argument("-qq_sep", "--qq_separate", action="store_true",
352
+ help=(
353
+ "Save one QQ-plot file per sumstat instead of a "
354
+ "combined multi-panel figure. Only used when -qq is set."
355
+ ))
356
+ opt.add_argument("-qq_cols", "--qq_ncols", default=3, type=int, metavar="int",
357
+ help="Number of columns in the combined QQ-plot grid (default: 3).")
358
+ opt.add_argument("-qq_thin", "--qq_thin", action="store_true", default=False,
359
+ help=(
360
+ "Thin null-like p-values before QQ plotting for speed (default: off)."
361
+ "Include this flag to turn on for speed."
362
+ ))
363
+ opt.add_argument("-thin_below", "--thin_below", type=float, metavar="float", default=0.01,
364
+ help=(
365
+ "P-value threshold below which all points are always kept."
366
+ "Points above this threshold are downsampled (default: 0.01)."
367
+ ))
368
+ opt.add_argument("-qq_max_pts", "--qq_max_points", default=50000, type=int, metavar="int",
369
+ help="Max points to plot per QQ track after thinning (default: 50000).")
370
+ opt.add_argument("-qq_ov", "--qq_overlay", action="store_true",
371
+ help=(
372
+ "Plot all sumstats on a single overlaid QQ-plot, "
373
+ "each coloured by label with lambda in the legend. "
374
+ "Only used when -qq is set."
375
+ ))
355
376
  opt.add_argument(
356
377
  "-tp", "--trim_pval", type=float, metavar="float",
357
378
  help="Trim variants with p > this value before plotting."
pycmplot/io.py CHANGED
@@ -291,7 +291,9 @@ def strip_comma_separated_input_streams(
291
291
 
292
292
  if builds:
293
293
  builds = [s.strip() for s in builds.strip().split(",")]
294
- if len(sum_stats) != len(labels) != len(builds):
294
+ if len(sum_stats) == len(labels) == len(builds):
295
+ pass
296
+ else:
295
297
  sys.exit(
296
298
  "Error: number of summary stats files, labels, and builds must match.\n"
297
299
  f" Files: {sum_stats}\n"
@@ -429,16 +431,16 @@ def get_output_paths(
429
431
 
430
432
  labels = [re.sub(r"[^a-zA-Z0-9\s]", "", x).replace(" ", "_") for x in labels]
431
433
 
432
- plt_base = str(out_path / f"{pltitle}_{'_'.join(labels)}_{mode.lower()}")
433
-
434
434
  suffix = "_logp" if logp else "_pval"
435
435
 
436
- plt_name = f"{plt_base}{suffix}.{output_format.lower()}"
436
+ plt_base = str(out_path / f"{pltitle}_{'_'.join(labels)}_{mode.lower()}{suffix}")
437
+
438
+ plt_name = f"{plt_base}.{output_format.lower()}"
437
439
 
438
- table_out = f"{plt_base}{suffix}_locus_summary_table.tsv"
440
+ table_out = f"{plt_base}_locus_summary_table.tsv"
439
441
 
440
442
 
441
- return plt_name, table_out
443
+ return plt_name, table_out, plt_base
442
444
 
443
445
 
444
446
 
@@ -561,11 +563,13 @@ def prep_pycmplot_input_info(
561
563
  snp_candidates = [c for c in snp_candidates if c]
562
564
  pvl_candidates = [c for c in pvl_candidates if c]
563
565
 
564
- bld_candidates = ["BUILD", "Genome", "Genome_Build", "Genome-build"]
565
- bld_candidates_l = [x.lower() for x in bld_candidates]
566
- bld_candidates_u = [x.upper() for x in bld_candidates]
567
- bld_candidates = [build] + bld_candidates + bld_candidates_l + bld_candidates_u
568
- bld_candidates = [c for c in bld_candidates if c]
566
+ bld_candidates = []
567
+ if buildc:
568
+ bld_candidates = ["BUILD", "Genome", "Genome_Build", "Genome-build"]
569
+ bld_candidates_l = [x.lower() for x in bld_candidates]
570
+ bld_candidates_u = [x.upper() for x in bld_candidates]
571
+ bld_candidates = [buildc] + bld_candidates + bld_candidates_l + bld_candidates_u
572
+ bld_candidates = [c for c in bld_candidates if c]
569
573
 
570
574
  # ------------------------------------------------------------------
571
575
  # Resolve column names per file
@@ -795,6 +799,7 @@ def get_sumstats_and_merged_sector_list(
795
799
  }
796
800
 
797
801
  sumstats_loaded: dict[str, list] = {}
802
+ pval_dict: dict[str, np.ndarray | pd.Series] = {}
798
803
  all_lead_snps: list[pd.DataFrame] = []
799
804
 
800
805
  for label in sumstats.keys() & (file_info or {}).keys():
@@ -818,6 +823,11 @@ def get_sumstats_and_merged_sector_list(
818
823
  dtype=sumstat_dtypes,
819
824
  ).rename(columns=sumstat_newcols)
820
825
 
826
+ # Get dict of p-values for qq-plotting before applying trim_pval
827
+ logger.info("Extracting raw p-values for qq-plotting ...")
828
+ pval_dict[label] = df["P"].dropna().astype(float).values
829
+
830
+
821
831
  # Add build column if not exist and build supplied
822
832
  if build:
823
833
  df['BUILD'] = build
@@ -917,7 +927,13 @@ def get_sumstats_and_merged_sector_list(
917
927
  for _ in sumstats
918
928
  ]
919
929
 
920
- # Optionally sort tracks
930
+
931
+ # sort dicts by user-supplied order
932
+ sumstats_loaded = {key: sumstats_loaded[key] for key in labels if key in sumstats_loaded}
933
+ pval_dict = {key: pval_dict[key] for key in labels if key in pval_dict}
934
+
935
+
936
+ # or sort by user option
921
937
  if sort_tracks is not None:
922
938
  if sort_tracks.lower() == "label":
923
939
  sumstats_loaded = dict(sorted(sumstats_loaded.items()))
@@ -928,6 +944,7 @@ def get_sumstats_and_merged_sector_list(
928
944
  key=lambda item: (item[0], natsort.natsort_keygen()(item[1][1])),
929
945
  )
930
946
  )
947
+
931
948
 
932
949
  # Compute per-sumstat sector sizes (chrom → [min_pos, max_pos])
933
950
  assoc_sector_sizes_list: list[dict] = []
@@ -941,7 +958,7 @@ def get_sumstats_and_merged_sector_list(
941
958
  for chrom in assoc["CHR"].unique():
942
959
  sub = assoc[assoc["CHR"] == chrom]
943
960
  lo_val = max(sub["POS"].min() - 1_000_000, 0)
944
- hi_val = sub["POS"].max() + 1_000_000
961
+ hi_val = sub["POS"].max()
945
962
  assoc_dic[str(chrom)] = [lo_val, hi_val]
946
963
 
947
964
  min_dic_val = min(assoc_dic.values())
@@ -955,9 +972,9 @@ def get_sumstats_and_merged_sector_list(
955
972
 
956
973
  # Add spacer sector for y-axis labelling
957
974
  if min_dic_val is not None:
958
- if len(labels) <= 5:
959
- merged["Spacer1"] = [x + x / 2 for x in min_dic_val]
960
- else:
961
- merged["Spacer1"] = [x * 2 for x in min_dic_val]
975
+ #if len(labels) <= 5:
976
+ # merged["Spacer1"] = [x + x / 2 for x in min_dic_val]
977
+ #else:
978
+ merged["Spacer1"] = [x * 2 for x in min_dic_val]
962
979
 
963
- return merged, sumstats_loaded, hits_table, signif_lines
980
+ return merged, sumstats_loaded, hits_table, signif_lines, pval_dict
@@ -380,6 +380,7 @@ def plot_circular(
380
380
  annotate: str = None,
381
381
  label_col: str = None,
382
382
  chrom_label_side: str = 'inside',
383
+ chrom_label_size: float = 6,
383
384
  signif_line: float = 5e-8,
384
385
  highlight: bool = False,
385
386
  highlight_thresh: float = 5e-8,
@@ -387,7 +388,6 @@ def plot_circular(
387
388
  highlight_line: bool = False,
388
389
  highlight_line_color: str = 'grey',
389
390
  colors: list[str] = ['steelblue', 'grey'],
390
- chrom_label_size: float = 6,
391
391
  track_label_size: float = 6,
392
392
  track_label_orientation: str = 'vertical',
393
393
  hits_table: pd.DataFrame = None,
@@ -526,7 +526,8 @@ def plot_circular(
526
526
  labels = list(sumstats_loaded.keys())
527
527
  (
528
528
  plt_name,
529
- table_out
529
+ table_out,
530
+ plt_base,
530
531
  ) = get_output_paths(
531
532
  labels,
532
533
  mode='cm',
@@ -558,14 +559,16 @@ def plot_circular(
558
559
  radii_reversed = dict(reversed(list(radii.items())))
559
560
 
560
561
  inside_loc = r_min - 3
561
- outside_loc = 105
562
- chrom_label_loc = outside_loc if chrom_label_side == "outside" else inside_loc
562
+ outside_loc = r_max + 4
563
563
 
564
564
  if annotate:
565
565
  annot_key = next(iter(radii_reversed))
566
566
  annot_r = radii_reversed.pop(annot_key)
567
+ outside_loc = max(list(radii_reversed.values())[0]) + 2
567
568
  radii_reversed["annot_track_r"] = annot_r
568
569
 
570
+ chrom_label_loc = outside_loc if chrom_label_side == "outside" else inside_loc
571
+
569
572
  for index, (sector_radius, sumstats_key, sumstats_value, signif_dict) in enumerate(
570
573
  zip(
571
574
  radii_reversed.values(),
@@ -647,7 +650,7 @@ def plot_circular(
647
650
  x=pos,
648
651
  label=str(label),
649
652
  min_r=r_low,
650
- max_r=r_low + 3,
653
+ max_r=r_low + 6,
651
654
  label_size=annotation_size,
652
655
  text_kws={
653
656
  "size": "large",
@@ -668,8 +671,9 @@ def plot_circular(
668
671
  r=[sector_min_r, r_low],
669
672
  start=pos,
670
673
  end=pos,
674
+ alpha=0.4,
671
675
  color=highlight_line_color,
672
- lw=0.5,
676
+ lw=0.4,
673
677
  ls="--",
674
678
  )
675
679
 
@@ -709,6 +713,6 @@ def plot_circular(
709
713
 
710
714
  if plt_name:
711
715
  fig.savefig(fname=plt_name.lower(), dpi=dpi)
712
- logger.info("Saved circular Manhattan plot: %s", plt_name)
716
+ logger.info("Saved circular Manhattan plot: %s", plt_name.lower())
713
717
 
714
718
  return fig
@@ -100,7 +100,7 @@ def _cluster_annotations_by_chr(
100
100
  return clusters
101
101
 
102
102
 
103
- def _draw_annotation_arrows_2(
103
+ def _draw_annotation_arrows(
104
104
  ax,
105
105
  annot_df,
106
106
  chr_col: str,
@@ -212,7 +212,7 @@ def _draw_annotation_arrows_2(
212
212
 
213
213
 
214
214
 
215
- def _draw_annotation_arrows(
215
+ def _draw_annotation_arrows_2(
216
216
  ax,
217
217
  annot_df,
218
218
  chr_col: str,
@@ -361,6 +361,118 @@ def _draw_annotation_arrows(
361
361
 
362
362
  last_xtext = max(x_texts)
363
363
 
364
+
365
+ # Using cumulative distance for anntations and separating clusters
366
+ def _draw_annotation_arrows_3(
367
+ ax,
368
+ annot_df,
369
+ chr_col: str,
370
+ label_col: str,
371
+ offsets: dict,
372
+ chr_max: dict,
373
+ spread_width: float = 60e6,
374
+ isolation_threshold: float = 80e6,
375
+ stack_threshold: float = 10e6,
376
+ y_text_base: float = 0.55,
377
+ y_stack_step: float = 0.02,
378
+ max_rad: float = 0.35,
379
+ y_tip: float = 0.0,
380
+ ) -> None:
381
+
382
+ annot_df = annot_df.sort_values(by=[chr_col, "x"], key=natsort_keygen())
383
+ last_xtext = 0 - spread_width
384
+
385
+ for chr_name, df_chr in annot_df.groupby(chr_col, sort=False):
386
+ df_chr = df_chr.sort_values("x")
387
+ chr_start = offsets[chr_name]
388
+ chr_end = offsets[chr_name] + chr_max[chr_name]
389
+ chr_range = chr_end - chr_start
390
+
391
+ x_signals = df_chr["x"].values
392
+ labels = df_chr[label_col].values
393
+ n = len(x_signals)
394
+
395
+ # ------------------------------------------------------------------
396
+ # Compute label x positions (spread or straight)
397
+ # ------------------------------------------------------------------
398
+ x_texts = []
399
+ for k, x_sig in enumerate(x_signals):
400
+ neighbours = np.delete(x_signals, k)
401
+ min_dist = np.min(np.abs(neighbours - x_sig)) if len(neighbours) else np.inf
402
+
403
+ if min_dist >= isolation_threshold:
404
+ x_texts.append(x_sig) # Tier 1: sit directly above
405
+ else:
406
+ x_texts.append(None) # Tier 2: needs spreading
407
+
408
+ spread_indices = [k for k, v in enumerate(x_texts) if v is None]
409
+ if spread_indices:
410
+ sw = spread_width
411
+ pad = sw / int(str(sw)[:2]) / 2
412
+ while sw > chr_range and sw > pad:
413
+ sw -= pad
414
+
415
+ sig_start = x_signals[spread_indices[0]]
416
+ xmin = sig_start - sw
417
+ positions = np.arange(xmin, xmin + len(spread_indices) * sw, sw)
418
+
419
+ while positions[0] <= last_xtext:
420
+ positions = positions + sw
421
+
422
+ for j, k in enumerate(spread_indices):
423
+ x_texts[k] = positions[j]
424
+
425
+ # ------------------------------------------------------------------
426
+ # Compute label y positions using cumulative x distance
427
+ # ------------------------------------------------------------------
428
+ y_texts = [y_text_base] * n
429
+
430
+ for k in range(1, n):
431
+ cum_dist = abs(x_texts[k] - x_texts[k - 1])
432
+ if cum_dist <= stack_threshold:
433
+ # too close to previous label — stack upward adaptively
434
+ y_texts[k] = y_texts[k - 1] + y_stack_step + (
435
+ y_stack_step * (1 - cum_dist / stack_threshold)
436
+ )
437
+ else:
438
+ y_texts[k] = y_text_base # far enough — reset to baseline
439
+
440
+ # ------------------------------------------------------------------
441
+ # Draw arrows and labels
442
+ # ------------------------------------------------------------------
443
+ for x_sig, x_txt, y_txt, label in zip(x_signals, x_texts, y_texts, labels):
444
+ dx = x_txt - x_sig
445
+ rad = np.clip(dx / (spread_width * 2), -max_rad, max_rad)
446
+
447
+ arrow = FancyArrowPatch(
448
+ (x_txt, y_txt),
449
+ (x_sig, y_tip - 0.05),
450
+ arrowstyle="-|>",
451
+ mutation_scale=12,
452
+ lw=0.6,
453
+ color="grey",
454
+ alpha=0.5,
455
+ connectionstyle=f"arc3,rad={rad}",
456
+ transform=ax.transData,
457
+ )
458
+ ax.add_patch(arrow)
459
+
460
+ ax.text(
461
+ x_txt,
462
+ y_txt + 0.02,
463
+ str(label),
464
+ rotation=45,
465
+ ha="left",
466
+ va="bottom",
467
+ fontsize=10,
468
+ clip_on=False,
469
+ color="black",
470
+ fontstyle="italic",
471
+ fontweight="regular",
472
+ )
473
+
474
+ last_xtext = max(x_texts)
475
+
364
476
  # ---------------------------------------------------------------------------
365
477
  # Public function
366
478
  # ---------------------------------------------------------------------------
@@ -647,7 +759,8 @@ def plot_linearm(
647
759
  # Annotation track
648
760
  # ------------------------------------------------------------------
649
761
  if annotate and annot_df is not None:
650
- """
762
+
763
+
651
764
  _draw_annotation_arrows(
652
765
  ax_annot,
653
766
  annot_df,
@@ -657,9 +770,10 @@ def plot_linearm(
657
770
  chr_max=chr_max,
658
771
  spread_width=60e6,
659
772
  )
660
- """
773
+
661
774
 
662
- _draw_annotation_arrows(
775
+ """
776
+ _draw_annotation_arrows_2(
663
777
  ax=ax_annot,
664
778
  annot_df=annot_df,
665
779
  chr_col=chr_col,
@@ -667,13 +781,31 @@ def plot_linearm(
667
781
  offsets=offsets,
668
782
  chr_max=chr_max,
669
783
  spread_width=60e6,
670
- isolation_threshold=80e6, # above this → straight (Tier 1)
784
+ isolation_threshold=40e6, # above this → straight (Tier 1)
671
785
  stack_threshold=10e6, # below this → stack (Tier 3)
672
786
  max_tilt=45, # max angleA departure from vertical
673
787
  y_tip=0.0,
674
788
  y_text=0.55,
675
789
  y_stack_step=0.12, # vertical gap between stacked labels
676
790
  )
791
+
792
+
793
+ _draw_annotation_arrows_3(
794
+ ax=ax_annot,
795
+ annot_df=annot_df,
796
+ chr_col=chr_col,
797
+ label_col=label_col,
798
+ offsets=offsets,
799
+ chr_max=chr_max,
800
+ spread_width=60e6,
801
+ isolation_threshold=80e6,
802
+ stack_threshold=90e6,
803
+ y_text_base=0.55,
804
+ y_stack_step=0.03,
805
+ max_rad=0.35,
806
+ y_tip=0.0,
807
+ )
808
+ """
677
809
 
678
810
  ax_annot.set_ylim(0, 1)
679
811
  ax_annot.axis("off")
@@ -859,12 +991,11 @@ def plot_linear(
859
991
  logger.info("'SNP' column is used for annotation since '%s' column could not be resolved in hits table.", label_col)
860
992
  pass
861
993
 
862
- logger.info(f"LABEL COL: {label}")
863
-
864
994
  # plot name
865
995
  (
866
996
  plt_name,
867
- table_out
997
+ table_out,
998
+ plt_base,
868
999
  ) = get_output_paths(
869
1000
  labels = t_labels,
870
1001
  mode='lm',
@@ -0,0 +1,643 @@
1
+ from __future__ import annotations
2
+
3
+ """
4
+ pycmplot.plotting.qq
5
+ ====================
6
+ QQ (quantile-quantile) plots for GWAS p-values.
7
+
8
+ Speed notes
9
+ -----------
10
+ GWAS summary statistics often contain millions of SNPs. Most of those points
11
+ lie near the null diagonal and are visually redundant. Two optimisations are
12
+ applied by default:
13
+
14
+ 1. **P-value thinning** (``thin_below`` / ``max_points``):
15
+ All points above a -log10(p) tail threshold are kept in full; the bulk
16
+ of null-like points below that threshold are randomly downsampled to at
17
+ most ``max_points`` total. Lambda (λ) is always computed on the *full*
18
+ unfiltered array before thinning, so the statistic is never affected.
19
+
20
+ 2. **Rasterised scatter** (``rasterized=True``):
21
+ The scatter layer is rendered as a bitmap inside vector formats (PDF/SVG),
22
+ dramatically reducing file size and save time for large point clouds.
23
+
24
+ Public functions
25
+ ----------------
26
+ thin_pvals Downsample null-like p-values for fast plotting.
27
+ plot_qq_single Draw one QQ plot onto a given Axes.
28
+ plot_qq_combined All QQ plots in a single figure (grid layout).
29
+ plot_qq_separate One output file per sumstat.
30
+ plot_qq_overlay All sumstats overlaid on one axes, coloured by label.
31
+ """
32
+
33
+ import logging
34
+ import math
35
+ from pathlib import Path
36
+ from typing import Optional
37
+
38
+ import matplotlib.pyplot as plt
39
+ import matplotlib.colors as mcolors
40
+ import numpy as np
41
+ import pandas as pd
42
+ from scipy.stats import beta as beta_dist
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Thinning helper
49
+ # ---------------------------------------------------------------------------
50
+
51
+ def thin_pvals(
52
+ pvals: np.ndarray,
53
+ tail_threshold: float = 0.01,
54
+ max_points: int = 50_000,
55
+ seed: int = 42,
56
+ ) -> tuple[np.ndarray, np.ndarray, int]:
57
+ """Downsample p-values for faster QQ plotting with no visible breaks.
58
+
59
+ Rather than splitting into tail / bulk regions with different sampling
60
+ strategies (which produces a visible seam at the threshold), this function
61
+ uses a single **log-uniform** thinning pass over all p-values:
62
+
63
+ 1. Sort p-values ascending and convert to −log₁₀ scale.
64
+ 2. Pick ``max_points`` evenly-spaced indices along the −log₁₀ axis.
65
+ Because −log₁₀ compresses large p-values and expands small ones, this
66
+ automatically gives dense coverage in the interesting tail and sparse
67
+ coverage in the null bulk — with no hard boundary.
68
+
69
+ Parameters
70
+ ----------
71
+ pvals:
72
+ Full array of raw p-values.
73
+ tail_threshold:
74
+ Kept for API compatibility; no longer used as a hard split point.
75
+ All points above −log₁₀(tail_threshold) are always represented because
76
+ the log-uniform spacing naturally keeps them.
77
+ max_points:
78
+ Maximum number of points to return (default 50 000).
79
+ seed:
80
+ Unused (kept for API compatibility — log-uniform selection is
81
+ deterministic).
82
+
83
+ Returns
84
+ -------
85
+ (kept_pvals, kept_ranks, n_full)
86
+ *kept_pvals* — thinned p-values in ascending order.
87
+ *kept_ranks* — 1-based ranks in the full sorted array.
88
+ *n_full* — total SNP count before thinning (for expected quantiles).
89
+
90
+ Notes
91
+ -----
92
+ Lambda (λ) must be computed on the full *pvals* array **before** calling
93
+ this function — thinning changes the empirical distribution.
94
+ """
95
+ pvals = np.asarray(pvals, dtype=float)
96
+ pvals = pvals[np.isfinite(pvals) & (pvals > 0) & (pvals <= 1)]
97
+ n_full = len(pvals)
98
+
99
+ if n_full <= max_points:
100
+ # Nothing to thin
101
+ sort_idx = np.argsort(pvals)
102
+ return pvals[sort_idx], np.arange(1, n_full + 1), n_full
103
+
104
+ # Sort ascending; full_ranks[i] = i+1
105
+ pvals_sorted = np.sort(pvals)
106
+ full_ranks = np.arange(1, n_full + 1)
107
+
108
+ # Work in −log10 space so spacing is proportional to visual separation
109
+ logp = -np.log10(pvals_sorted) # ascending p → descending logp
110
+ logp_min = logp[0] # smallest logp (bulk end)
111
+ logp_max = logp[-1] # largest logp (tail end)
112
+
113
+ # Evenly-spaced target positions along the logp axis
114
+ targets = np.linspace(logp_min, logp_max, max_points)
115
+
116
+ # For each target, pick the closest actual point (searchsorted on
117
+ # the reversed array since logp is descending)
118
+ logp_desc = logp[::-1] # descending for searchsorted
119
+ idx_desc = np.searchsorted(logp_desc, targets, side="left")
120
+ idx_desc = np.clip(idx_desc, 0, n_full - 1)
121
+
122
+ # Convert back to ascending-p indices and deduplicate
123
+ idx_asc = (n_full - 1 - idx_desc)
124
+ idx_asc = np.unique(idx_asc) # sorted, no duplicates
125
+
126
+ kept_pvals = pvals_sorted[idx_asc]
127
+ kept_ranks = full_ranks[idx_asc]
128
+
129
+ n_kept = len(kept_pvals)
130
+ logger.debug(
131
+ "QQ thinning: %d → %d points (%.1f%% retained)",
132
+ n_full, n_kept, 100 * n_kept / n_full,
133
+ )
134
+
135
+ return kept_pvals, kept_ranks, n_full
136
+
137
+
138
+ # ---------------------------------------------------------------------------
139
+ # Core array builder
140
+ # ---------------------------------------------------------------------------
141
+
142
+ def _qq_arrays(
143
+ pvals: np.ndarray,
144
+ ranks: Optional[np.ndarray] = None,
145
+ n_full: Optional[int] = None,
146
+ ci: float = 0.95,
147
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
148
+ """Return (expected, observed, ci_lower, ci_upper) in −log10 scale.
149
+
150
+ Parameters
151
+ ----------
152
+ pvals:
153
+ Sorted (ascending) p-values to plot — may be a thinned subset.
154
+ ranks:
155
+ 1-based ranks of *pvals* in the full distribution. If ``None``,
156
+ assumes *pvals* is the complete set and ranks are 1..n.
157
+ n_full:
158
+ Total number of SNPs in the full (pre-thinning) dataset. Used to
159
+ compute correct expected quantiles. Defaults to ``len(pvals)``.
160
+ ci:
161
+ Confidence interval level.
162
+ """
163
+ pvals = np.asarray(pvals, dtype=float)
164
+ n = len(pvals)
165
+
166
+ if n_full is None:
167
+ n_full = n
168
+ if ranks is None:
169
+ ranks = np.arange(1, n + 1)
170
+
171
+ # Expected −log10(p): rank i → expected p = i/(n_full+1)
172
+ expected = -np.log10(ranks / (n_full + 1))
173
+
174
+ # Observed −log10(p): rank i paired with the i-th smallest p-value
175
+ observed = -np.log10(pvals)
176
+
177
+ # CI from the beta distribution (uses original ranks in full dataset)
178
+ alpha = 1.0 - ci
179
+ ci_lo = -np.log10(beta_dist.ppf(1 - alpha / 2, ranks, n_full - ranks + 1))
180
+ ci_hi = -np.log10(beta_dist.ppf( alpha / 2, ranks, n_full - ranks + 1))
181
+
182
+ # Sort by expected ascending for clean polygon fill
183
+ order = np.argsort(expected)
184
+ return expected[order], observed[order], ci_lo[order], ci_hi[order]
185
+
186
+
187
+ # ---------------------------------------------------------------------------
188
+ # Lambda
189
+ # ---------------------------------------------------------------------------
190
+
191
+ def _compute_lambda(pvals: np.ndarray) -> float:
192
+ """Genomic inflation factor λ = median(χ²_obs) / median(χ²_expected)."""
193
+ from scipy.stats import chi2
194
+ pvals = pvals[np.isfinite(pvals) & (pvals > 0) & (pvals <= 1)]
195
+ if len(pvals) == 0:
196
+ return float("nan")
197
+ obs_median_chi2 = chi2.ppf(1 - np.median(pvals), df=1)
198
+ expected_median_chi2 = chi2.ppf(0.5, df=1) # ≈ 0.4549
199
+ return round(float(obs_median_chi2 / expected_median_chi2), 4)
200
+
201
+
202
+ # ---------------------------------------------------------------------------
203
+ # Single-axis QQ plot
204
+ # ---------------------------------------------------------------------------
205
+
206
+ def plot_qq_single(
207
+ pvals: np.ndarray | pd.Series,
208
+ ax: plt.Axes,
209
+ label: Optional[str] = None,
210
+ color: str = "steelblue",
211
+ point_size: float = 8,
212
+ ci: float = 0.95,
213
+ ci_alpha: float = 0.15,
214
+ signif_threshold: Optional[float] = 5e-8,
215
+ show_lambda: bool = True,
216
+ title: Optional[str] = None,
217
+ # --- speed options ---
218
+ thin: bool = False,
219
+ thin_below: float = 0.01,
220
+ max_points: int = 50_000,
221
+ rasterized: bool = True,
222
+ ) -> plt.Axes:
223
+ """Draw a single QQ plot onto *ax*.
224
+
225
+ Parameters
226
+ ----------
227
+ pvals:
228
+ Array or Series of raw p-values (not −log10).
229
+ ax:
230
+ Matplotlib Axes to draw on.
231
+ label:
232
+ Legend label for the scatter points.
233
+ color:
234
+ Colour for points and CI fill.
235
+ point_size:
236
+ Scatter point size.
237
+ ci:
238
+ Confidence interval level (default 0.95).
239
+ ci_alpha:
240
+ Transparency of the CI band.
241
+ signif_threshold:
242
+ If given, draw a horizontal dashed line at −log10(threshold).
243
+ show_lambda:
244
+ Annotate the plot with the genomic inflation factor λ.
245
+ title:
246
+ Axes title.
247
+ thin:
248
+ Enable p-value thinning for speed (default ``True``).
249
+ thin_below:
250
+ P-value threshold below which all points are always kept.
251
+ Points above this threshold are downsampled.
252
+ max_points:
253
+ Maximum number of points to plot after thinning (default 50 000).
254
+ rasterized:
255
+ Render the scatter as a bitmap inside vector output formats —
256
+ greatly reduces PDF/SVG file size (default ``True``).
257
+
258
+ Returns
259
+ -------
260
+ plt.Axes
261
+ """
262
+ pvals_full = np.asarray(pvals, dtype=float)
263
+ pvals_full = pvals_full[np.isfinite(pvals_full) & (pvals_full > 0) & (pvals_full <= 1)]
264
+
265
+ # Lambda always on the full array
266
+ lam = _compute_lambda(pvals_full)
267
+
268
+ if thin and len(pvals_full) > max_points:
269
+ plot_pvals, plot_ranks, n_full = thin_pvals(
270
+ pvals_full, tail_threshold=thin_below, max_points=max_points
271
+ )
272
+ else:
273
+ plot_pvals = np.sort(pvals_full)
274
+ plot_ranks = np.arange(1, len(plot_pvals) + 1)
275
+ n_full = len(plot_pvals)
276
+
277
+ expected, observed, ci_lo, ci_hi = _qq_arrays(
278
+ plot_pvals, ranks=plot_ranks, n_full=n_full, ci=ci
279
+ )
280
+
281
+ # CI band
282
+ ax.fill_between(
283
+ expected, ci_lo, ci_hi,
284
+ color=color, alpha=ci_alpha, linewidth=0,
285
+ label=f"{int(ci * 100)}% CI",
286
+ )
287
+
288
+ # Diagonal null line
289
+ max_val = max(expected.max(), observed.max()) * 1.05
290
+ ax.plot([0, max_val], [0, max_val], color="grey", linewidth=0.8,
291
+ linestyle="--", zorder=1)
292
+
293
+ # Observed points
294
+ ax.scatter(
295
+ expected, observed,
296
+ s=point_size, color=color, alpha=0.85,
297
+ label=label, zorder=2, edgecolors="none",
298
+ rasterized=rasterized,
299
+ )
300
+
301
+ '''"""
302
+ # Significance line
303
+ if signif_threshold is not None:
304
+ sig_logp = -np.log10(signif_threshold)
305
+ ax.axhline(sig_logp, color="red", linewidth=0.7, linestyle="--",
306
+ label=f"p={signif_threshold:.0e}")
307
+ """'''
308
+
309
+ # Lambda annotation
310
+ if show_lambda and not math.isnan(lam):
311
+ ax.text(
312
+ 0.05, 0.95,
313
+ f"λ = {lam:.4f}",
314
+ transform=ax.transAxes,
315
+ va="top", ha="left",
316
+ fontsize=9, fontstyle="italic",
317
+ color="black",
318
+ )
319
+
320
+ ax.set_xlabel("Expected −log₁₀(p)", fontsize=10)
321
+ ax.set_ylabel("Observed −log₁₀(p)", fontsize=10)
322
+ ax.spines["top"].set_visible(False)
323
+ ax.spines["right"].set_visible(False)
324
+
325
+ if title:
326
+ ax.set_title(title, fontsize=10, pad=6)
327
+ if label:
328
+ ax.legend(fontsize=8, frameon=False, loc="lower right")
329
+
330
+ return ax
331
+
332
+
333
+ # ---------------------------------------------------------------------------
334
+ # Combined multi-panel figure
335
+ # ---------------------------------------------------------------------------
336
+
337
+ def plot_qq_combined(
338
+ pval_dict: dict[str, np.ndarray | pd.Series],
339
+ colors: Optional[list[str]] = None,
340
+ point_size: float = 8,
341
+ ci: float = 0.95,
342
+ signif_threshold: Optional[float] = 5e-8,
343
+ show_lambda: bool = True,
344
+ ncols: int = 3,
345
+ figsize: Optional[tuple] = None,
346
+ dpi: int = 300,
347
+ title: Optional[str] = None,
348
+ output_path: Optional[str] = None,
349
+ fig_format: str = "png",
350
+ thin: bool = False,
351
+ thin_below: float = 0.01,
352
+ max_points: int = 50_000,
353
+ rasterized: bool = True,
354
+ ) -> tuple[plt.Figure, list[plt.Axes]]:
355
+ """Plot all QQ plots in a single figure arranged in a grid.
356
+
357
+ Parameters
358
+ ----------
359
+ pval_dict:
360
+ Ordered dict of ``{label: p_value_array}``.
361
+ colors:
362
+ List of colours, one per track. Cycles if fewer than tracks.
363
+ ncols:
364
+ Number of columns in the subplot grid (default 3).
365
+ figsize:
366
+ Figure size. Auto-calculated from *ncols* and number of tracks
367
+ if ``None``.
368
+ output_path:
369
+ If given, save the figure here.
370
+ thin, thin_below, max_points, rasterized:
371
+ See :func:`plot_qq_single`.
372
+
373
+ Returns
374
+ -------
375
+ (fig, axes)
376
+ """
377
+ n = len(pval_dict)
378
+ if n == 0:
379
+ raise ValueError("pval_dict is empty.")
380
+
381
+ nrows = math.ceil(n / ncols)
382
+
383
+ cmap = plt.get_cmap("tab10")
384
+ colors = [mcolors.to_hex(cmap(i % 10)) for i in range(n)]
385
+ #if colors is None:
386
+ # cmap = plt.get_cmap("tab10")
387
+ # colors = [mcolors.to_hex(cmap(i % 10)) for i in range(n)]
388
+ #elif len(colors) < n:
389
+ # colors = [colors[i % len(colors)] for i in range(n)]
390
+
391
+ if figsize is None:
392
+ figsize = (ncols * 4.5, nrows * 4.5)
393
+
394
+ fig, axes_grid = plt.subplots(nrows, ncols, figsize=figsize, squeeze=False)
395
+ axes_flat = axes_grid.flatten()
396
+
397
+ for idx, (label, pvals) in enumerate(pval_dict.items()):
398
+ plot_qq_single(
399
+ pvals=pvals,
400
+ ax=axes_flat[idx],
401
+ label=label,
402
+ color=colors[idx],
403
+ point_size=point_size,
404
+ ci=ci,
405
+ signif_threshold=signif_threshold,
406
+ show_lambda=show_lambda,
407
+ title=label,
408
+ thin=thin,
409
+ thin_below=thin_below,
410
+ max_points=max_points,
411
+ rasterized=rasterized,
412
+ )
413
+
414
+ for ax in axes_flat[n:]:
415
+ ax.set_visible(False)
416
+
417
+ if title:
418
+ fig.suptitle(title, fontsize=13, y=1.01)
419
+
420
+ plt.tight_layout()
421
+
422
+ if output_path:
423
+ fmt = fig_format or Path(output_path).suffix.lstrip(".") or "png"
424
+ fig.savefig(f"{output_path}.{fmt}", format=fmt, dpi=dpi, bbox_inches="tight")
425
+ logger.info("Saved combined QQ plot: %s", f"{output_path}.{fmt}")
426
+
427
+ return fig, list(axes_flat[:n])
428
+
429
+
430
+ # ---------------------------------------------------------------------------
431
+ # Separate figures — one file per sumstat
432
+ # ---------------------------------------------------------------------------
433
+
434
+ def plot_qq_separate(
435
+ pval_dict: dict[str, np.ndarray | pd.Series],
436
+ output_path: str = ".",
437
+ colors: Optional[list[str]] = None,
438
+ point_size: float = 8,
439
+ ci: float = 0.95,
440
+ signif_threshold: Optional[float] = 5e-8,
441
+ show_lambda: bool = True,
442
+ figsize: tuple = (5, 5),
443
+ dpi: int = 300,
444
+ fig_format: str = "png",
445
+ thin: bool = False,
446
+ thin_below: float = 0.01,
447
+ max_points: int = 50_000,
448
+ rasterized: bool = True,
449
+ ) -> list[str]:
450
+ """Save one QQ plot per sumstat as individual files.
451
+
452
+ Parameters
453
+ ----------
454
+ pval_dict:
455
+ Ordered dict of ``{label: p_value_array}``.
456
+ output_dir:
457
+ Directory to save files in.
458
+ file_stem:
459
+ Prefix for output filenames.
460
+ colors:
461
+ List of colours, one per track.
462
+ thin, thin_below, max_points, rasterized:
463
+ See :func:`plot_qq_single`.
464
+
465
+ Returns
466
+ -------
467
+ List of output file paths.
468
+ """
469
+
470
+ n = len(pval_dict)
471
+
472
+ cmap = plt.get_cmap("tab10")
473
+ colors = [mcolors.to_hex(cmap(i % 10)) for i in range(n)]
474
+ #if colors is None:
475
+ # cmap = plt.get_cmap("tab10")
476
+ # colors = [mcolors.to_hex(cmap(i % 10)) for i in range(n)]
477
+ #elif len(colors) < n:
478
+ # colors = [colors[i % len(colors)] for i in range(n)]
479
+
480
+ saved: list[str] = []
481
+
482
+ for idx, (label, pvals) in enumerate(pval_dict.items()):
483
+ fig, ax = plt.subplots(figsize=figsize)
484
+
485
+ plot_qq_single(
486
+ pvals=pvals,
487
+ ax=ax,
488
+ label=label,
489
+ color=colors[idx],
490
+ point_size=point_size,
491
+ ci=ci,
492
+ signif_threshold=signif_threshold,
493
+ show_lambda=show_lambda,
494
+ title=label,
495
+ thin=thin,
496
+ thin_below=thin_below,
497
+ max_points=max_points,
498
+ rasterized=rasterized,
499
+ )
500
+
501
+ plt.tight_layout()
502
+
503
+ safe_label = label.replace(" ", "_").replace("/", "-")
504
+ out_path = f"{output_path}_{safe_label}.{fig_format}"
505
+ fig.savefig(out_path, format=fig_format, dpi=dpi, bbox_inches="tight")
506
+ plt.close(fig)
507
+ logger.info("Saved QQ plot: %s", out_path)
508
+ saved.append(out_path)
509
+
510
+ return saved
511
+
512
+
513
+ # ---------------------------------------------------------------------------
514
+ # Overlay — all sumstats on one axes
515
+ # ---------------------------------------------------------------------------
516
+
517
+ def plot_qq_overlay(
518
+ pval_dict: dict[str, np.ndarray | pd.Series],
519
+ colors: Optional[list[str]] = None,
520
+ point_size: float = 8,
521
+ ci: float = 0.95,
522
+ ci_alpha: float = 0.10,
523
+ signif_threshold: Optional[float] = 5e-8,
524
+ show_lambda: bool = True,
525
+ figsize: tuple = (6, 6),
526
+ dpi: int = 300,
527
+ title: Optional[str] = None,
528
+ output_path: Optional[str] = None,
529
+ fig_format: str = "png",
530
+ thin: bool = False,
531
+ thin_below: float = 0.01,
532
+ max_points: int = 50_000,
533
+ rasterized: bool = True,
534
+ ) -> tuple[plt.Figure, plt.Axes]:
535
+ """Plot all sumstats on a single QQ axes, each coloured differently.
536
+
537
+ Lambda (λ) values appear in the legend label for each sumstat.
538
+
539
+ Parameters
540
+ ----------
541
+ pval_dict:
542
+ Ordered dict of ``{label: p_value_array}``.
543
+ colors:
544
+ List of colours, one per sumstat. Defaults to ``tab10`` palette.
545
+ ci_alpha:
546
+ Transparency of CI bands (default 0.10 — lower than single-panel
547
+ default to keep overlapping bands readable).
548
+ show_lambda:
549
+ Append λ to each legend entry.
550
+ thin, thin_below, max_points, rasterized:
551
+ See :func:`plot_qq_single`.
552
+
553
+ Returns
554
+ -------
555
+ (fig, ax)
556
+ """
557
+ n = len(pval_dict)
558
+ if n == 0:
559
+ raise ValueError("pval_dict is empty.")
560
+
561
+
562
+ cmap = plt.get_cmap("tab10")
563
+ colors = [mcolors.to_hex(cmap(i % 10)) for i in range(n)]
564
+ #if colors is None:
565
+ # cmap = plt.get_cmap("tab10")
566
+ # colors = [mcolors.to_hex(cmap(i % 10)) for i in range(n)]
567
+ #elif len(colors) < n:
568
+ # colors = [colors[i % len(colors)] for i in range(n)]
569
+
570
+ fig, ax = plt.subplots(figsize=figsize)
571
+ global_max = 0.0
572
+
573
+ for idx, (label, pvals) in enumerate(pval_dict.items()):
574
+ pvals_full = np.asarray(pvals, dtype=float)
575
+ pvals_full = pvals_full[np.isfinite(pvals_full) & (pvals_full > 0) & (pvals_full <= 1)]
576
+
577
+ # Lambda on full array before any thinning
578
+ lam = _compute_lambda(pvals_full)
579
+
580
+ if thin and len(pvals_full) > max_points:
581
+ plot_pvals, plot_ranks, n_full = thin_pvals(
582
+ pvals_full, tail_threshold=thin_below, max_points=max_points
583
+ )
584
+ else:
585
+ plot_pvals = np.sort(pvals_full)
586
+ plot_ranks = np.arange(1, len(plot_pvals) + 1)
587
+ n_full = len(plot_pvals)
588
+
589
+ expected, observed, ci_lo, ci_hi = _qq_arrays(
590
+ plot_pvals, ranks=plot_ranks, n_full=n_full, ci=ci
591
+ )
592
+
593
+ color = colors[idx]
594
+ legend_label = f"{label} (λ={lam:.4f})" if show_lambda else label
595
+
596
+ ax.fill_between(
597
+ expected, ci_lo, ci_hi,
598
+ color=color, alpha=ci_alpha, linewidth=0,
599
+ )
600
+ ax.scatter(
601
+ expected, observed,
602
+ s=point_size, color=color, alpha=0.85,
603
+ label=legend_label, zorder=2 + idx, edgecolors="none",
604
+ rasterized=rasterized,
605
+ )
606
+
607
+ global_max = max(global_max, expected.max(), observed.max())
608
+
609
+ ax.plot(
610
+ [0, global_max * 1.05], [0, global_max * 1.05],
611
+ color="grey", linewidth=0.8, linestyle="--", zorder=1,
612
+ )
613
+
614
+ '''"""
615
+ if signif_threshold is not None:
616
+ ax.axhline(
617
+ -np.log10(signif_threshold),
618
+ color="red", linewidth=0.7, linestyle="--",
619
+ label=f"p = {signif_threshold:.0e}",
620
+ )
621
+ """'''
622
+
623
+ ax.set_xlabel("Expected −log₁₀(p)", fontsize=11)
624
+ ax.set_ylabel("Observed −log₁₀(p)", fontsize=11)
625
+ ax.spines["top"].set_visible(False)
626
+ ax.spines["right"].set_visible(False)
627
+
628
+ ax.legend(
629
+ fontsize=8, frameon=True, framealpha=0.7,
630
+ edgecolor="lightgrey", loc="lower right",
631
+ )
632
+
633
+ if title:
634
+ ax.set_title(title, fontsize=11, pad=8)
635
+
636
+ plt.tight_layout()
637
+
638
+ if output_path:
639
+ fmt = fig_format or Path(output_path).suffix.lstrip(".") or "png"
640
+ fig.savefig(f"{output_path}.{fmt}", format=fmt, dpi=dpi, bbox_inches="tight")
641
+ logger.info("Saved combined QQ plot: %s", f"{output_path}.{fmt}")
642
+
643
+ return fig, ax
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycmplot
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
5
5
  Author: Kevin Esoh
6
6
  Author-email: Kevin Esoh <kesohku1@jh.edu>
@@ -183,7 +183,8 @@ pycmplot \
183
183
  | `-b, --build` | Comma-separated genome builds of sumstats | off |
184
184
  | `-bc, --build_column` | Genome build column name (containing hg18/hg19/hg38) | off |
185
185
  | `-m, --mode` | `lm` linear or `cm` circular | `lm` |
186
- | `-qq, --qq_plot` | Also generate a QQ-plot | off (coming soon...) |
186
+ | `-qq, --qq_plot` | Also generate a QQ-plot | off |
187
+ | `-qq_thin, --qq_thin` | Thin p-values for faster QQ-plotting | off |
187
188
  | `--logp` | Plot -log10(p) | off |
188
189
  | `-sig, --signif_threshold` | Genome-wide significance threshold | off (auto 0.05/N) |
189
190
  | `-sigl, --signif_line` | Value for genome-wide significance line if different from `-sig` | 5e-8 |
@@ -1,21 +1,22 @@
1
1
  pycmplot/__init__.py,sha256=fGBHi1vh_9_eu2Xks5B5tPk51RZxji5s-khGhSTNQCI,1288
2
- pycmplot/_core.py,sha256=Jn47lbtrCoE8nBkQ2HEScdKyT0E98b4pgRtgfleMA4k,10965
2
+ pycmplot/_core.py,sha256=H8Ab0db5cV9wbkuq8Nq9xjTqwRkra5ook6xu6A-N-ys,13021
3
3
  pycmplot/annotation.py,sha256=gMgDfnHmMYpkLuQIaJNHmtXVHHIeSUAcviLrisF2vmY,20886
4
- pycmplot/cli.py,sha256=CAOt1vzwLdKY_RnpbQ9oHVuyTeWxlkiPlClG_u6Cz9U,17807
4
+ pycmplot/cli.py,sha256=k6uBR5V0Y89VSzq-GHZxfR0XmBGS6aEUt66fqYEmQ40,19430
5
5
  pycmplot/constants.py,sha256=XaT3pTWM3dkawU1cA0HFpaNnUupSjv28wpPgmnVEjL0,3431
6
- pycmplot/io.py,sha256=zPVSdtXk99TUzJ01BpzRlE5PRTLIGdwpC12NBZBh0dc,34724
6
+ pycmplot/io.py,sha256=hWM4x_euyYUZ3gzFaP9tkZYHi_1fj8XE1wE9f5XOdbE,35301
7
7
  pycmplot/liftover.py,sha256=ZawfO9ZKZADFwyXZBnbrovh4TnV-ja1qHHnIgtxSCBM,6942
8
8
  pycmplot/resources.py,sha256=r0zHy_-9wu98lkqKENYrptX54uO6np_x94_ju3v2KYE,6414
9
9
  pycmplot/stats.py,sha256=8TXHxfGc4sUr3rE3cHnS2mXfIS1PPj0YgDk1C-z2Pqk,5813
10
10
  pycmplot/data/Homo_sapiens.GRCh37.geneinfo.tsv.gz,sha256=kLldtgT5-k4ZzU5jN--woFZEuOaWe9pQ4g4hhB3sdQI,840666
11
11
  pycmplot/data/Homo_sapiens.GRCh38.geneinfo.tsv.gz,sha256=cRAuNxifZi12yOxNKrVt0uTS52HwDDFzV_1N4E2Qwuw,626249
12
12
  pycmplot/data/hg19ToHg38.over.chain,sha256=oHPYkUIztVQtKXYauOxLOBUFKxOWSRnBKh77LjEfvzk,606773
13
- pycmplot/plotting/circular.py,sha256=Tp08RySuXH6mowM7UWH7l9mzAHN_-F12WFn_69r4sYk,27158
14
- pycmplot/plotting/linear.py,sha256=rGqQtFEp6NPWnnFFwT7zL7n1uIkQRbZdijzW6vBRodg,32941
15
- pycmplot-0.2.1.dist-info/licenses/LICENSE,sha256=7HtJWU-I9Tayt7xnvHU0D6oVqeTp3hMqCbTxbkYBTZQ,20904
13
+ pycmplot/plotting/circular.py,sha256=Xa_2GDW7lLKb8PeN_IL3_hvfkQaqcbQ68Q5QrxjCKs4,27295
14
+ pycmplot/plotting/linear.py,sha256=lrioORKyOaz34UAOhBWg3W2kLq6nBT7HNkPxy8hX340,37416
15
+ pycmplot/plotting/qq.py,sha256=ylPJj9gMlDqTOR7JWnQ7wCfvEBUczqEUVMsFiXXEt3s,20883
16
+ pycmplot-0.2.2.dist-info/licenses/LICENSE,sha256=7HtJWU-I9Tayt7xnvHU0D6oVqeTp3hMqCbTxbkYBTZQ,20904
16
17
  pycmplot_docs/docs/conf.py,sha256=gUt_OitflxpaOrIjeP2aYJ_LCWqTRRdmo_HIcVVf3hI,2992
17
- pycmplot-0.2.1.dist-info/METADATA,sha256=H_KRLR0x6ZeJ3OTUlwgeIpOwkj_m-w2SlyrmbKcFYTo,8054
18
- pycmplot-0.2.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
19
- pycmplot-0.2.1.dist-info/entry_points.txt,sha256=cE8IAltA_Q-QQuWQ5DE3Lv-9ktYQ_jyWaD6I97QbeyU,49
20
- pycmplot-0.2.1.dist-info/top_level.txt,sha256=gxbPirasq6TczoykxC2gfk5_En7R65BN-J5ADiV5i3c,23
21
- pycmplot-0.2.1.dist-info/RECORD,,
18
+ pycmplot-0.2.2.dist-info/METADATA,sha256=9qN2xJp3CGcTp8kxjfmzAz7AhfJVPmk60jTqJ8sxeqI,8108
19
+ pycmplot-0.2.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
20
+ pycmplot-0.2.2.dist-info/entry_points.txt,sha256=cE8IAltA_Q-QQuWQ5DE3Lv-9ktYQ_jyWaD6I97QbeyU,49
21
+ pycmplot-0.2.2.dist-info/top_level.txt,sha256=gxbPirasq6TczoykxC2gfk5_En7R65BN-J5ADiV5i3c,23
22
+ pycmplot-0.2.2.dist-info/RECORD,,