pycmplot 0.2.1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {pycmplot-0.2.1/pycmplot.egg-info → pycmplot-0.2.3}/PKG-INFO +3 -2
  2. {pycmplot-0.2.1 → pycmplot-0.2.3}/README.md +2 -1
  3. {pycmplot-0.2.1 → pycmplot-0.2.3}/docs/conf.py +1 -1
  4. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/__init__.py +1 -1
  5. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/_core.py +61 -5
  6. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/cli.py +38 -17
  7. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/io.py +50 -20
  8. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/plotting/circular.py +11 -7
  9. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/plotting/linear.py +140 -9
  10. pycmplot-0.2.3/pycmplot/plotting/qq.py +643 -0
  11. {pycmplot-0.2.1 → pycmplot-0.2.3/pycmplot.egg-info}/PKG-INFO +3 -2
  12. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot.egg-info/SOURCES.txt +1 -0
  13. {pycmplot-0.2.1 → pycmplot-0.2.3}/pyproject.toml +2 -2
  14. {pycmplot-0.2.1 → pycmplot-0.2.3}/setup.cfg +1 -1
  15. {pycmplot-0.2.1 → pycmplot-0.2.3}/LICENSE +0 -0
  16. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/annotation.py +0 -0
  17. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/constants.py +0 -0
  18. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/data/Homo_sapiens.GRCh37.geneinfo.tsv.gz +0 -0
  19. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/data/Homo_sapiens.GRCh38.geneinfo.tsv.gz +0 -0
  20. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/data/hg19ToHg38.over.chain +0 -0
  21. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/liftover.py +0 -0
  22. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/resources.py +0 -0
  23. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/stats.py +0 -0
  24. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot.egg-info/dependency_links.txt +0 -0
  25. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot.egg-info/entry_points.txt +0 -0
  26. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot.egg-info/requires.txt +0 -0
  27. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot.egg-info/top_level.txt +0 -0
  28. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docs/conf.py +0 -0
  29. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docstrings_annotation.py +0 -0
  30. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docstrings_core_cli.py +0 -0
  31. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docstrings_io.py +0 -0
  32. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docstrings_liftover.py +0 -0
  33. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docstrings_plotting.py +0 -0
  34. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docstrings_resources_constants.py +0 -0
  35. {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docstrings_stats.py +0 -0
  36. {pycmplot-0.2.1 → pycmplot-0.2.3}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycmplot
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
5
5
  Author: Kevin Esoh
6
6
  Author-email: Kevin Esoh <kesohku1@jh.edu>
@@ -183,7 +183,8 @@ pycmplot \
183
183
  | `-b, --build` | Comma-separated genome builds of sumstats | off |
184
184
  | `-bc, --build_column` | Genome build column name (containing hg18/hg19/hg38) | off |
185
185
  | `-m, --mode` | `lm` linear or `cm` circular | `lm` |
186
- | `-qq, --qq_plot` | Also generate a QQ-plot | off (coming soon...) |
186
+ | `-qq, --qq_plot` | Also generate a QQ-plot | off |
187
+ | `-qq_thin, --qq_thin` | Thin p-values for faster QQ-plotting | off |
187
188
  | `--logp` | Plot -log10(p) | off |
188
189
  | `-sig, --signif_threshold` | Genome-wide significance threshold | off (auto 0.05/N) |
189
190
  | `-sigl, --signif_line` | Value for genome-wide significance line if different from `-sig` | 5e-8 |
@@ -157,7 +157,8 @@ pycmplot \
157
157
  | `-b, --build` | Comma-separated genome builds of sumstats | off |
158
158
  | `-bc, --build_column` | Genome build column name (containing hg18/hg19/hg38) | off |
159
159
  | `-m, --mode` | `lm` linear or `cm` circular | `lm` |
160
- | `-qq, --qq_plot` | Also generate a QQ-plot | off (coming soon...) |
160
+ | `-qq, --qq_plot` | Also generate a QQ-plot | off |
161
+ | `-qq_thin, --qq_thin` | Thin p-values for faster QQ-plotting | off |
161
162
  | `--logp` | Plot -log10(p) | off |
162
163
  | `-sig, --signif_threshold` | Genome-wide significance threshold | off (auto 0.05/N) |
163
164
  | `-sigl, --signif_line` | Value for genome-wide significance line if different from `-sig` | 5e-8 |
@@ -12,7 +12,7 @@ sys.path.insert(0, os.path.abspath(".."))
12
12
  project = "pycmplot"
13
13
  copyright = "2026, Kevin Esoh"
14
14
  author = "Kevin Esoh"
15
- release = "0.2.1" # update to match PyPI version
15
+ release = "0.2.3" # update to match PyPI version
16
16
 
17
17
  # -- General configuration -----------------------------------------------------
18
18
  extensions = [
@@ -42,4 +42,4 @@ __all__ = [
42
42
  "ResourceConfig",
43
43
  ]
44
44
 
45
- __version__ = "0.2.1"
45
+ __version__ = "0.2.3"
@@ -92,6 +92,7 @@ def main() -> None:
92
92
  )
93
93
  from pycmplot.plotting.linear import plot_linear
94
94
  from pycmplot.plotting.circular import plot_circular
95
+ from pycmplot.plotting.qq import plot_qq_combined, plot_qq_separate, plot_qq_overlay
95
96
  from pycmplot.resources import ResourceConfig
96
97
 
97
98
  # ------------------------------------------------------------------
@@ -110,6 +111,13 @@ def main() -> None:
110
111
  labels_raw = args.labels
111
112
  pcol_arg = args.pval_column
112
113
  logp = args.logp
114
+ qq = args.qq_plot
115
+ qq_separate = args.qq_separate
116
+ qq_ncols = args.qq_ncols
117
+ qq_thin = args.qq_thin
118
+ thin_below = args.thin_below
119
+ qq_max_points = args.qq_max_points
120
+ qq_overlay = args.qq_overlay
113
121
  chrom_label_size = args.chrom_label_size
114
122
  chrom_label_side = args.chrom_label_side
115
123
  track_label_size = args.track_label_size
@@ -164,7 +172,8 @@ def main() -> None:
164
172
  # ------------------------------------------------------------------
165
173
  (
166
174
  plt_name,
167
- table_out
175
+ table_out,
176
+ plt_base,
168
177
  ) = get_output_paths(
169
178
  labels,
170
179
  mode = mode,
@@ -202,6 +211,7 @@ def main() -> None:
202
211
  sumstats_loaded,
203
212
  hits_table,
204
213
  signif_lines,
214
+ pval_dict,
205
215
  ) = get_sumstats_and_merged_sector_list(
206
216
  sum_stats=sum_stats,
207
217
  labels=labels,
@@ -219,13 +229,13 @@ def main() -> None:
219
229
  # ------------------------------------------------------------------
220
230
  # ANNOTATE BY
221
231
  # ------------------------------------------------------------------
232
+ label_col = 'SNP'
222
233
  if annotate:
223
- if str(annotate).upper() == "GENE":
234
+ if str(annotate).upper() == "GENE" and 'top_gene' in hits_table.columns:
224
235
  label_col = 'top_gene'
225
- elif str(annotate).upper() == "SNP":
226
- label_col = 'SNP'
227
- else:
236
+ elif label_col in hits_table.columns:
228
237
  label_col = annotate
238
+
229
239
 
230
240
  logger.info(f"Anotate by: {label_col}")
231
241
 
@@ -296,6 +306,52 @@ def main() -> None:
296
306
  figsize=(15, 9)
297
307
  )
298
308
 
309
+ # ------------------------------------------------------------------
310
+ # QQ PLOT
311
+ # ------------------------------------------------------------------
312
+ if qq and sumstats_loaded:
313
+ logger.info("Generating QQ Plot(s) ...")
314
+ qq_stem = f"{plt_base}_qq"
315
+
316
+ if qq_separate:
317
+ plot_qq_separate(
318
+ pval_dict=pval_dict,
319
+ thin=qq_thin,
320
+ thin_below=thin_below,
321
+ max_points=qq_max_points,
322
+ output_path=qq_stem,
323
+ colors=colors,
324
+ signif_threshold=signif_threshold or 5e-8,
325
+ dpi=dpi,
326
+ fig_format=output_format,
327
+ )
328
+ elif qq_overlay:
329
+ plot_qq_overlay(
330
+ pval_dict=pval_dict,
331
+ thin=qq_thin,
332
+ thin_below=thin_below,
333
+ max_points=qq_max_points,
334
+ colors=colors,
335
+ signif_threshold=signif_threshold or 5e-8,
336
+ dpi=dpi,
337
+ title=plot_title,
338
+ output_path=f"{qq_stem}_overlay",
339
+ fig_format=output_format,
340
+ )
341
+ else:
342
+ plot_qq_combined(
343
+ pval_dict=pval_dict,
344
+ thin=qq_thin,
345
+ thin_below=thin_below,
346
+ max_points=qq_max_points,
347
+ colors=colors,
348
+ ncols=qq_ncols,
349
+ signif_threshold=signif_threshold or 5e-8,
350
+ dpi=dpi,
351
+ title=plot_title,
352
+ output_path=f"{qq_stem}_combined",
353
+ fig_format=output_format,
354
+ )
299
355
 
300
356
  if __name__ == "__main__":
301
357
  main()
@@ -329,29 +329,50 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
329
329
  )
330
330
  opt.add_argument(
331
331
  "-bc", "--build_column", required=False, type=str, metavar="str",
332
- help="Name of column containing genome build (hg18/hg19/hg38). Or use ``--build`` below to supply genome builds per summary stat file."
333
- )
332
+ help=("Name of column containing genome build (hg18/hg19/hg38)."
333
+ "Or use ``--build`` below to supply genome builds per summary stat file."
334
+ ))
334
335
  opt.add_argument(
335
- "-b","--build",
336
- help="""
337
- Comma-sperated list of genome build of summary stats file(s) listed in the same order as sumstats files.
338
- (e.g. hg19,hg38,hg38,hg19 means:
339
- file1.txt.gz --> hg19
340
- file2.txt.gz --> hg38
341
- file3.tsv --> hg38 ... etc)
342
- """,
343
- required=False,
344
- type=str,
345
- metavar='str'
336
+ "-b","--build", required=False, type=str, metavar='str',
337
+ help=
338
+ """Comma-sperated list of genome build of summary stats file(s) listed
339
+ in the same order as sumstats files. e.g. hg19,hg38,hg38,hg19 means:
340
+ file1.txt.gz --> hg19
341
+ file2.txt.gz --> hg38
342
+ file3.tsv --> hg38 ... etc
343
+ """
346
344
  )
347
345
  opt.add_argument(
348
346
  "--logp", action="store_true",
349
347
  help="Plot −log₁₀(p) instead of raw p-values."
350
348
  )
351
- opt.add_argument(
352
- "-qq", "--qq_plot", action="store_true",
353
- help="Also generate a QQ-plot."
354
- )
349
+ opt.add_argument("-qq", "--qq_plot", action="store_true",
350
+ help="Generate QQ-plot(s) alongside the Manhattan plot.")
351
+ opt.add_argument("-qq_sep", "--qq_separate", action="store_true",
352
+ help=(
353
+ "Save one QQ-plot file per sumstat instead of a "
354
+ "combined multi-panel figure. Only used when -qq is set."
355
+ ))
356
+ opt.add_argument("-qq_cols", "--qq_ncols", default=3, type=int, metavar="int",
357
+ help="Number of columns in the combined QQ-plot grid (default: 3).")
358
+ opt.add_argument("-qq_thin", "--qq_thin", action="store_true", default=False,
359
+ help=(
360
+ "Thin null-like p-values before QQ plotting for speed (default: off)."
361
+ "Include this flag to turn on for speed."
362
+ ))
363
+ opt.add_argument("-thin_below", "--thin_below", type=float, metavar="float", default=0.01,
364
+ help=(
365
+ "P-value threshold below which all points are always kept."
366
+ "Points above this threshold are downsampled (default: 0.01)."
367
+ ))
368
+ opt.add_argument("-qq_max_pts", "--qq_max_points", default=50000, type=int, metavar="int",
369
+ help="Max points to plot per QQ track after thinning (default: 50000).")
370
+ opt.add_argument("-qq_ov", "--qq_overlay", action="store_true",
371
+ help=(
372
+ "Plot all sumstats on a single overlaid QQ-plot, "
373
+ "each coloured by label with lambda in the legend. "
374
+ "Only used when -qq is set."
375
+ ))
355
376
  opt.add_argument(
356
377
  "-tp", "--trim_pval", type=float, metavar="float",
357
378
  help="Trim variants with p > this value before plotting."
@@ -39,6 +39,7 @@ import pandas as pd
39
39
  from pycmplot.stats import get_lead_snps, get_highlight_snps
40
40
  from pycmplot.annotation import get_hits_summary_table
41
41
  from pycmplot.resources import ResourceConfig, default_resources
42
+ from pycmplot.constants import hg38_chr_lengths
42
43
 
43
44
  logger = logging.getLogger(__name__)
44
45
 
@@ -291,7 +292,9 @@ def strip_comma_separated_input_streams(
291
292
 
292
293
  if builds:
293
294
  builds = [s.strip() for s in builds.strip().split(",")]
294
- if len(sum_stats) != len(labels) != len(builds):
295
+ if len(sum_stats) == len(labels) == len(builds):
296
+ pass
297
+ else:
295
298
  sys.exit(
296
299
  "Error: number of summary stats files, labels, and builds must match.\n"
297
300
  f" Files: {sum_stats}\n"
@@ -429,16 +432,16 @@ def get_output_paths(
429
432
 
430
433
  labels = [re.sub(r"[^a-zA-Z0-9\s]", "", x).replace(" ", "_") for x in labels]
431
434
 
432
- plt_base = str(out_path / f"{pltitle}_{'_'.join(labels)}_{mode.lower()}")
433
-
434
435
  suffix = "_logp" if logp else "_pval"
435
436
 
436
- plt_name = f"{plt_base}{suffix}.{output_format.lower()}"
437
+ plt_base = str(out_path / f"{pltitle}_{'_'.join(labels)}_{mode.lower()}{suffix}")
438
+
439
+ plt_name = f"{plt_base}.{output_format.lower()}"
437
440
 
438
- table_out = f"{plt_base}{suffix}_locus_summary_table.tsv"
441
+ table_out = f"{plt_base}_locus_summary_table.tsv"
439
442
 
440
443
 
441
- return plt_name, table_out
444
+ return plt_name, table_out, plt_base
442
445
 
443
446
 
444
447
 
@@ -561,11 +564,14 @@ def prep_pycmplot_input_info(
561
564
  snp_candidates = [c for c in snp_candidates if c]
562
565
  pvl_candidates = [c for c in pvl_candidates if c]
563
566
 
564
- bld_candidates = ["BUILD", "Genome", "Genome_Build", "Genome-build"]
565
- bld_candidates_l = [x.lower() for x in bld_candidates]
566
- bld_candidates_u = [x.upper() for x in bld_candidates]
567
- bld_candidates = [build] + bld_candidates + bld_candidates_l + bld_candidates_u
568
- bld_candidates = [c for c in bld_candidates if c]
567
+ #if buildc:
568
+ bld_candidates = buildc
569
+ if not bld_candidates:
570
+ bld_candidates = ["BUILD", "Genome", "Genome_Build", "Genome-build"]
571
+ bld_candidates_l = [x.lower() for x in bld_candidates]
572
+ bld_candidates_u = [x.upper() for x in bld_candidates]
573
+ bld_candidates = [buildc] + bld_candidates + bld_candidates_l + bld_candidates_u
574
+ bld_candidates = [c for c in bld_candidates if c]
569
575
 
570
576
  # ------------------------------------------------------------------
571
577
  # Resolve column names per file
@@ -631,7 +637,7 @@ def prep_pycmplot_input_info(
631
637
  }
632
638
  col_dtypes = {
633
639
  chrom_col: str,
634
- pos_col: int,
640
+ pos_col: object,
635
641
  snp_col: str,
636
642
  pcol: float,
637
643
  bcol: str,
@@ -678,6 +684,7 @@ def get_sumstats_and_merged_sector_list(
678
684
  signif_line: Optional[float] = None,
679
685
  suggest_threshold: Optional[float] = None,
680
686
  resources: Optional[ResourceConfig] = None,
687
+ hg38_chr_lengths = hg38_chr_lengths,
681
688
  ):
682
689
  GET_SUMSTATS = """Load summary statistics, run liftover, extract lead SNPs, and compute
683
690
  merged Circos sector sizes.
@@ -795,6 +802,7 @@ def get_sumstats_and_merged_sector_list(
795
802
  }
796
803
 
797
804
  sumstats_loaded: dict[str, list] = {}
805
+ pval_dict: dict[str, np.ndarray | pd.Series] = {}
798
806
  all_lead_snps: list[pd.DataFrame] = []
799
807
 
800
808
  for label in sumstats.keys() & (file_info or {}).keys():
@@ -818,13 +826,18 @@ def get_sumstats_and_merged_sector_list(
818
826
  dtype=sumstat_dtypes,
819
827
  ).rename(columns=sumstat_newcols)
820
828
 
829
+ # Get dict of p-values for qq-plotting before applying trim_pval
830
+ logger.info("Extracting raw p-values for QQ-plotting ...")
831
+ pval_dict[label] = df["P"].dropna().astype(float).values
832
+
833
+
821
834
  # Add build column if not exist and build supplied
822
835
  if build:
823
836
  df['BUILD'] = build
824
837
 
825
838
  # Trim insignificant variants for faster plotting
826
839
  if trim_pval:
827
- logger.info("Excluding variants with p-value less than %s ...", trim_pval)
840
+ logger.info("Excluding variants with p-value less than %s to speed up Manhattan plotting ...", trim_pval)
828
841
  df = df[df["P"].astype(float) <= float(trim_pval)]
829
842
  else:
830
843
  df = df[df["P"].astype(float) <= 1]
@@ -853,6 +866,7 @@ def get_sumstats_and_merged_sector_list(
853
866
  if "BUILD" in df.columns and "hg19" in df["BUILD"].unique():
854
867
  logger.info("Converting hg19 coordinates to hg38 ...")
855
868
  sumstats_loaded[label][0] = liftover_position(df, resources=resources)
869
+ liftover = True
856
870
 
857
871
  # Lead SNPs
858
872
  logger.info("Extracting variants to highlight ...")
@@ -917,7 +931,13 @@ def get_sumstats_and_merged_sector_list(
917
931
  for _ in sumstats
918
932
  ]
919
933
 
920
- # Optionally sort tracks
934
+
935
+ # sort dicts by user-supplied order
936
+ sumstats_loaded = {key: sumstats_loaded[key] for key in labels if key in sumstats_loaded}
937
+ pval_dict = {key: pval_dict[key] for key in labels if key in pval_dict}
938
+
939
+
940
+ # or sort by user option
921
941
  if sort_tracks is not None:
922
942
  if sort_tracks.lower() == "label":
923
943
  sumstats_loaded = dict(sorted(sumstats_loaded.items()))
@@ -928,6 +948,7 @@ def get_sumstats_and_merged_sector_list(
928
948
  key=lambda item: (item[0], natsort.natsort_keygen()(item[1][1])),
929
949
  )
930
950
  )
951
+
931
952
 
932
953
  # Compute per-sumstat sector sizes (chrom → [min_pos, max_pos])
933
954
  assoc_sector_sizes_list: list[dict] = []
@@ -941,7 +962,16 @@ def get_sumstats_and_merged_sector_list(
941
962
  for chrom in assoc["CHR"].unique():
942
963
  sub = assoc[assoc["CHR"] == chrom]
943
964
  lo_val = max(sub["POS"].min() - 1_000_000, 0)
944
- hi_val = sub["POS"].max() + 1_000_000
965
+ hi_val = sub["POS"].max()
966
+ chrom_max = hi_val
967
+
968
+ # Ensure sector sizes are within chrom ranges if liftover
969
+ if liftover:
970
+ logger.info("Limiting sector sizes to chromosome ranges for liftover sumstats ...")
971
+ hg38_chr_lengths = {k.replace("chr",""): v for k, v in hg38_chr_lengths.items()}
972
+ chrom_max = hg38_chr_lengths[chrom]
973
+
974
+ hi_val = min(hi_val, chrom_max)
945
975
  assoc_dic[str(chrom)] = [lo_val, hi_val]
946
976
 
947
977
  min_dic_val = min(assoc_dic.values())
@@ -955,9 +985,9 @@ def get_sumstats_and_merged_sector_list(
955
985
 
956
986
  # Add spacer sector for y-axis labelling
957
987
  if min_dic_val is not None:
958
- if len(labels) <= 5:
959
- merged["Spacer1"] = [x + x / 2 for x in min_dic_val]
960
- else:
961
- merged["Spacer1"] = [x * 2 for x in min_dic_val]
988
+ #if len(labels) <= 5:
989
+ # merged["Spacer1"] = [x + x / 2 for x in min_dic_val]
990
+ #else:
991
+ merged["Spacer1"] = [x * 2 for x in min_dic_val]
962
992
 
963
- return merged, sumstats_loaded, hits_table, signif_lines
993
+ return merged, sumstats_loaded, hits_table, signif_lines, pval_dict
@@ -380,6 +380,7 @@ def plot_circular(
380
380
  annotate: str = None,
381
381
  label_col: str = None,
382
382
  chrom_label_side: str = 'inside',
383
+ chrom_label_size: float = 6,
383
384
  signif_line: float = 5e-8,
384
385
  highlight: bool = False,
385
386
  highlight_thresh: float = 5e-8,
@@ -387,7 +388,6 @@ def plot_circular(
387
388
  highlight_line: bool = False,
388
389
  highlight_line_color: str = 'grey',
389
390
  colors: list[str] = ['steelblue', 'grey'],
390
- chrom_label_size: float = 6,
391
391
  track_label_size: float = 6,
392
392
  track_label_orientation: str = 'vertical',
393
393
  hits_table: pd.DataFrame = None,
@@ -526,7 +526,8 @@ def plot_circular(
526
526
  labels = list(sumstats_loaded.keys())
527
527
  (
528
528
  plt_name,
529
- table_out
529
+ table_out,
530
+ plt_base,
530
531
  ) = get_output_paths(
531
532
  labels,
532
533
  mode='cm',
@@ -558,14 +559,16 @@ def plot_circular(
558
559
  radii_reversed = dict(reversed(list(radii.items())))
559
560
 
560
561
  inside_loc = r_min - 3
561
- outside_loc = 105
562
- chrom_label_loc = outside_loc if chrom_label_side == "outside" else inside_loc
562
+ outside_loc = r_max + 4
563
563
 
564
564
  if annotate:
565
565
  annot_key = next(iter(radii_reversed))
566
566
  annot_r = radii_reversed.pop(annot_key)
567
+ outside_loc = max(list(radii_reversed.values())[0]) + 2
567
568
  radii_reversed["annot_track_r"] = annot_r
568
569
 
570
+ chrom_label_loc = outside_loc if chrom_label_side == "outside" else inside_loc
571
+
569
572
  for index, (sector_radius, sumstats_key, sumstats_value, signif_dict) in enumerate(
570
573
  zip(
571
574
  radii_reversed.values(),
@@ -647,7 +650,7 @@ def plot_circular(
647
650
  x=pos,
648
651
  label=str(label),
649
652
  min_r=r_low,
650
- max_r=r_low + 3,
653
+ max_r=r_low + 6,
651
654
  label_size=annotation_size,
652
655
  text_kws={
653
656
  "size": "large",
@@ -668,8 +671,9 @@ def plot_circular(
668
671
  r=[sector_min_r, r_low],
669
672
  start=pos,
670
673
  end=pos,
674
+ alpha=0.4,
671
675
  color=highlight_line_color,
672
- lw=0.5,
676
+ lw=0.4,
673
677
  ls="--",
674
678
  )
675
679
 
@@ -709,6 +713,6 @@ def plot_circular(
709
713
 
710
714
  if plt_name:
711
715
  fig.savefig(fname=plt_name.lower(), dpi=dpi)
712
- logger.info("Saved circular Manhattan plot: %s", plt_name)
716
+ logger.info("Saved circular Manhattan plot: %s", plt_name.lower())
713
717
 
714
718
  return fig
@@ -100,7 +100,7 @@ def _cluster_annotations_by_chr(
100
100
  return clusters
101
101
 
102
102
 
103
- def _draw_annotation_arrows_2(
103
+ def _draw_annotation_arrows(
104
104
  ax,
105
105
  annot_df,
106
106
  chr_col: str,
@@ -212,7 +212,7 @@ def _draw_annotation_arrows_2(
212
212
 
213
213
 
214
214
 
215
- def _draw_annotation_arrows(
215
+ def _draw_annotation_arrows_2(
216
216
  ax,
217
217
  annot_df,
218
218
  chr_col: str,
@@ -361,6 +361,118 @@ def _draw_annotation_arrows(
361
361
 
362
362
  last_xtext = max(x_texts)
363
363
 
364
+
365
+ # Using cumulative distance for anntations and separating clusters
366
+ def _draw_annotation_arrows_3(
367
+ ax,
368
+ annot_df,
369
+ chr_col: str,
370
+ label_col: str,
371
+ offsets: dict,
372
+ chr_max: dict,
373
+ spread_width: float = 60e6,
374
+ isolation_threshold: float = 80e6,
375
+ stack_threshold: float = 10e6,
376
+ y_text_base: float = 0.55,
377
+ y_stack_step: float = 0.02,
378
+ max_rad: float = 0.35,
379
+ y_tip: float = 0.0,
380
+ ) -> None:
381
+
382
+ annot_df = annot_df.sort_values(by=[chr_col, "x"], key=natsort_keygen())
383
+ last_xtext = 0 - spread_width
384
+
385
+ for chr_name, df_chr in annot_df.groupby(chr_col, sort=False):
386
+ df_chr = df_chr.sort_values("x")
387
+ chr_start = offsets[chr_name]
388
+ chr_end = offsets[chr_name] + chr_max[chr_name]
389
+ chr_range = chr_end - chr_start
390
+
391
+ x_signals = df_chr["x"].values
392
+ labels = df_chr[label_col].values
393
+ n = len(x_signals)
394
+
395
+ # ------------------------------------------------------------------
396
+ # Compute label x positions (spread or straight)
397
+ # ------------------------------------------------------------------
398
+ x_texts = []
399
+ for k, x_sig in enumerate(x_signals):
400
+ neighbours = np.delete(x_signals, k)
401
+ min_dist = np.min(np.abs(neighbours - x_sig)) if len(neighbours) else np.inf
402
+
403
+ if min_dist >= isolation_threshold:
404
+ x_texts.append(x_sig) # Tier 1: sit directly above
405
+ else:
406
+ x_texts.append(None) # Tier 2: needs spreading
407
+
408
+ spread_indices = [k for k, v in enumerate(x_texts) if v is None]
409
+ if spread_indices:
410
+ sw = spread_width
411
+ pad = sw / int(str(sw)[:2]) / 2
412
+ while sw > chr_range and sw > pad:
413
+ sw -= pad
414
+
415
+ sig_start = x_signals[spread_indices[0]]
416
+ xmin = sig_start - sw
417
+ positions = np.arange(xmin, xmin + len(spread_indices) * sw, sw)
418
+
419
+ while positions[0] <= last_xtext:
420
+ positions = positions + sw
421
+
422
+ for j, k in enumerate(spread_indices):
423
+ x_texts[k] = positions[j]
424
+
425
+ # ------------------------------------------------------------------
426
+ # Compute label y positions using cumulative x distance
427
+ # ------------------------------------------------------------------
428
+ y_texts = [y_text_base] * n
429
+
430
+ for k in range(1, n):
431
+ cum_dist = abs(x_texts[k] - x_texts[k - 1])
432
+ if cum_dist <= stack_threshold:
433
+ # too close to previous label — stack upward adaptively
434
+ y_texts[k] = y_texts[k - 1] + y_stack_step + (
435
+ y_stack_step * (1 - cum_dist / stack_threshold)
436
+ )
437
+ else:
438
+ y_texts[k] = y_text_base # far enough — reset to baseline
439
+
440
+ # ------------------------------------------------------------------
441
+ # Draw arrows and labels
442
+ # ------------------------------------------------------------------
443
+ for x_sig, x_txt, y_txt, label in zip(x_signals, x_texts, y_texts, labels):
444
+ dx = x_txt - x_sig
445
+ rad = np.clip(dx / (spread_width * 2), -max_rad, max_rad)
446
+
447
+ arrow = FancyArrowPatch(
448
+ (x_txt, y_txt),
449
+ (x_sig, y_tip - 0.05),
450
+ arrowstyle="-|>",
451
+ mutation_scale=12,
452
+ lw=0.6,
453
+ color="grey",
454
+ alpha=0.5,
455
+ connectionstyle=f"arc3,rad={rad}",
456
+ transform=ax.transData,
457
+ )
458
+ ax.add_patch(arrow)
459
+
460
+ ax.text(
461
+ x_txt,
462
+ y_txt + 0.02,
463
+ str(label),
464
+ rotation=45,
465
+ ha="left",
466
+ va="bottom",
467
+ fontsize=10,
468
+ clip_on=False,
469
+ color="black",
470
+ fontstyle="italic",
471
+ fontweight="regular",
472
+ )
473
+
474
+ last_xtext = max(x_texts)
475
+
364
476
  # ---------------------------------------------------------------------------
365
477
  # Public function
366
478
  # ---------------------------------------------------------------------------
@@ -647,7 +759,8 @@ def plot_linearm(
647
759
  # Annotation track
648
760
  # ------------------------------------------------------------------
649
761
  if annotate and annot_df is not None:
650
- """
762
+
763
+
651
764
  _draw_annotation_arrows(
652
765
  ax_annot,
653
766
  annot_df,
@@ -657,9 +770,10 @@ def plot_linearm(
657
770
  chr_max=chr_max,
658
771
  spread_width=60e6,
659
772
  )
660
- """
773
+
661
774
 
662
- _draw_annotation_arrows(
775
+ """
776
+ _draw_annotation_arrows_2(
663
777
  ax=ax_annot,
664
778
  annot_df=annot_df,
665
779
  chr_col=chr_col,
@@ -667,13 +781,31 @@ def plot_linearm(
667
781
  offsets=offsets,
668
782
  chr_max=chr_max,
669
783
  spread_width=60e6,
670
- isolation_threshold=80e6, # above this → straight (Tier 1)
784
+ isolation_threshold=40e6, # above this → straight (Tier 1)
671
785
  stack_threshold=10e6, # below this → stack (Tier 3)
672
786
  max_tilt=45, # max angleA departure from vertical
673
787
  y_tip=0.0,
674
788
  y_text=0.55,
675
789
  y_stack_step=0.12, # vertical gap between stacked labels
676
790
  )
791
+
792
+
793
+ _draw_annotation_arrows_3(
794
+ ax=ax_annot,
795
+ annot_df=annot_df,
796
+ chr_col=chr_col,
797
+ label_col=label_col,
798
+ offsets=offsets,
799
+ chr_max=chr_max,
800
+ spread_width=60e6,
801
+ isolation_threshold=80e6,
802
+ stack_threshold=90e6,
803
+ y_text_base=0.55,
804
+ y_stack_step=0.03,
805
+ max_rad=0.35,
806
+ y_tip=0.0,
807
+ )
808
+ """
677
809
 
678
810
  ax_annot.set_ylim(0, 1)
679
811
  ax_annot.axis("off")
@@ -859,12 +991,11 @@ def plot_linear(
859
991
  logger.info("'SNP' column is used for annotation since '%s' column could not be resolved in hits table.", label_col)
860
992
  pass
861
993
 
862
- logger.info(f"LABEL COL: {label}")
863
-
864
994
  # plot name
865
995
  (
866
996
  plt_name,
867
- table_out
997
+ table_out,
998
+ plt_base,
868
999
  ) = get_output_paths(
869
1000
  labels = t_labels,
870
1001
  mode='lm',