pycmplot 0.2.5__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. pycmplot-0.2.6/LICENSE +21 -0
  2. {pycmplot-0.2.5 → pycmplot-0.2.6}/PKG-INFO +10 -2
  3. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/__init__.py +1 -1
  4. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/_core.py +9 -21
  5. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/annotation.py +33 -0
  6. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/cli.py +26 -6
  7. pycmplot-0.2.6/pycmplot/data/hg18ToHg38.over.chain.gz +0 -0
  8. pycmplot-0.2.6/pycmplot/data/hg19ToHg38.over.chain.gz +0 -0
  9. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/io.py +50 -10
  10. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/liftover.py +82 -15
  11. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/plotting/circular.py +12 -18
  12. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/plotting/linear.py +78 -30
  13. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/resources.py +20 -7
  14. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot.egg-info/PKG-INFO +10 -2
  15. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot.egg-info/SOURCES.txt +2 -1
  16. {pycmplot-0.2.5 → pycmplot-0.2.6}/pyproject.toml +11 -3
  17. {pycmplot-0.2.5 → pycmplot-0.2.6}/setup.cfg +1 -1
  18. pycmplot-0.2.5/LICENSE +0 -441
  19. pycmplot-0.2.5/pycmplot/data/hg19ToHg38.over.chain +0 -56506
  20. {pycmplot-0.2.5 → pycmplot-0.2.6}/README.md +0 -0
  21. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/__main__.py +0 -0
  22. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/constants.py +0 -0
  23. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/data/Homo_sapiens.GRCh37.geneinfo.tsv.gz +0 -0
  24. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/data/Homo_sapiens.GRCh38.geneinfo.tsv.gz +0 -0
  25. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/plotting/__init__.py +0 -0
  26. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/plotting/qq.py +0 -0
  27. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot/stats.py +0 -0
  28. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot.egg-info/dependency_links.txt +0 -0
  29. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot.egg-info/entry_points.txt +0 -0
  30. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot.egg-info/requires.txt +0 -0
  31. {pycmplot-0.2.5 → pycmplot-0.2.6}/pycmplot.egg-info/top_level.txt +0 -0
  32. {pycmplot-0.2.5 → pycmplot-0.2.6}/setup.py +0 -0
pycmplot-0.2.6/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Kevin Esoh
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -1,15 +1,23 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycmplot
3
- Version: 0.2.5
3
+ Version: 0.2.6
4
4
  Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
5
5
  Author: Kevin Esoh
6
6
  Author-email: Kevin Esoh <kesohku1@jh.edu>
7
- License-Expression: CC-BY-NC-SA-4.0
7
+ License-Expression: MIT
8
8
  Project-URL: Homepage, https://github.com/esohkevin/pycmplot
9
9
  Project-URL: Issues, https://github.com/esohkevin/pycmplot/issues
10
10
  Project-URL: Docs, https://pycmplot.readthedocs.io/en/latest/
11
11
  Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
12
16
  Classifier: Operating System :: OS Independent
17
+ Classifier: Intended Audience :: Science/Research
18
+ Classifier: Natural Language :: English
19
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
20
+ Classifier: Topic :: Scientific/Engineering :: Visualization
13
21
  Requires-Python: >=3.9
14
22
  Description-Content-Type: text/markdown
15
23
  License-File: LICENSE
@@ -47,4 +47,4 @@ __all__ = [
47
47
  "ResourceConfig",
48
48
  ]
49
49
 
50
- __version__ = "0.2.5"
50
+ __version__ = "0.2.6"
@@ -94,6 +94,7 @@ def main() -> None:
94
94
  from pycmplot.plotting.circular import plot_circular
95
95
  from pycmplot.plotting.qq import plot_qq_combined, plot_qq_separate, plot_qq_overlay
96
96
  from pycmplot.resources import ResourceConfig
97
+ from pycmplot.annotation import get_annotation_column
97
98
 
98
99
  # ------------------------------------------------------------------
99
100
  # Parse CLI
@@ -147,7 +148,9 @@ def main() -> None:
147
148
  track_heights = args.track_heights
148
149
  linear_track_spacing = args.linear_track_spacing
149
150
  no_track_labels = args.no_track_labels
151
+ ylabel = args.ylabel
150
152
  chr_spacing = args.chr_spacing
153
+ figure_size = args.figure_size
151
154
 
152
155
 
153
156
  # ------------------------------------------------------------------
@@ -226,23 +229,6 @@ def main() -> None:
226
229
  signif_lines = pycmplot_dict["lines"]
227
230
  pval_dict = pycmplot_dict["pvals"]
228
231
 
229
- # ------------------------------------------------------------------
230
- # ANNOTATE BY
231
- # ------------------------------------------------------------------
232
- label_col = 'SNP'
233
- if annotate and not hits_table.empty:
234
- if str(annotate).upper() == "GENE" and 'top_gene' in hits_table.columns:
235
- label_col = 'top_gene'
236
- elif annotate in hits_table.columns:
237
- label_col = annotate
238
- else:
239
- logger.warning(
240
- "Annotation column '%s' not found in hits table; "
241
- "falling back to 'SNP'.", annotate,
242
- )
243
-
244
- logger.info("Annotate by: %s", label_col)
245
-
246
232
  # ------------------------------------------------------------------
247
233
  # CIRCULAR MANHATTAN
248
234
  # ------------------------------------------------------------------
@@ -264,7 +250,6 @@ def main() -> None:
264
250
  track_label_size = track_label_size,
265
251
  track_label_orientation = track_label_orientation,
266
252
  annotate = annotate,
267
- label_col = label_col if annotate else None,
268
253
  annotation_size = annotation_size,
269
254
  hits_table = hits_table,
270
255
  sector_sizes = merged_assoc_sector_sizes,
@@ -284,6 +269,9 @@ def main() -> None:
284
269
  # ------------------------------------------------------------------
285
270
  else:
286
271
  logger.info("Generating LINEAR MANHATTAN Plot ...")
272
+ fsize = figure_size.strip(" ").split(",")
273
+ fsize = [int(v) for v in fsize]
274
+ logger.info(f"FIGURE SIZE: {fsize}")
287
275
  plot_linear(
288
276
  sumstats_loaded=sumstats_loaded,
289
277
  track_heights=t_heights,
@@ -295,19 +283,19 @@ def main() -> None:
295
283
  highlight_color=highlight_color,
296
284
  highlight_line=highlight_line,
297
285
  highlight_line_color=highlight_line_color,
298
- annotate=annotate,
286
+ annotate=annotate,
299
287
  hits_table=hits_table if not hits_table.empty else None,
300
- label_col=label_col if annotate else None,
301
288
  chr_spacing=chr_spacing,
302
289
  linear_track_spacing=linear_track_spacing,
303
290
  colors=colors,
304
291
  signif_lines=signif_lines,
305
292
  plot_title=plot_title,
306
293
  no_track_labels=no_track_labels,
294
+ ylabel=ylabel,
307
295
  dpi=dpi,
308
296
  output_format=output_format,
309
297
  output_dir=output_dir,
310
- figsize=(15, 9)
298
+ figsize=fsize
311
299
  )
312
300
 
313
301
  # ------------------------------------------------------------------
@@ -570,3 +570,36 @@ def get_hits_summary_table(
570
570
  logger.info("Locus summary written to: %s", outpath)
571
571
 
572
572
  return _clump_by_distance(locus_table, window_kb=window_kb)
573
+
574
+
575
+ def get_annotation_column(
576
+ annotate: str = None,
577
+ hits_table: pd.DataFrame = None,
578
+ label_col: str = None,
579
+ ):
580
+ if annotate and not hits_table.empty:
581
+ if label_col is not None and label_col in hits_table.columns:
582
+ label_clm = label_col
583
+ elif annotate in hits_table.columns:
584
+ label_clm = annotate
585
+ else:
586
+ if str(annotate).upper() == "GENE":
587
+ for i, (_, row) in enumerate(hits_table.iterrows()):
588
+ try:
589
+ if row["genic"]:
590
+ label_clm = "nearest_upstream_gene"
591
+ label_msg = "'POS' is genic"
592
+ else:
593
+ label_clm = "top_gene"
594
+ label_msg = "'POS' is not genic"
595
+ logger.info("%s", label_msg)
596
+ except Exception:
597
+ logger.warning(
598
+ "Annotation columns '%s' and '%s' not found in hits table: %s; "
599
+ "falling back to 'SNP'.", annotate, label_col, hits_table.columns.values,
600
+ )
601
+ label_clm = 'SNP'
602
+
603
+ logger.info("Annotating by: %s", label_clm)
604
+
605
+ return label_clm
@@ -63,7 +63,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
63
63
  File delimiter name; auto-detected when ``None``.
64
64
  ``build_column`` : str or None
65
65
  Column name containing per-variant genome-build values
66
- (``hg19`` / ``hg38``).
66
+ (``hg18`` / ``hg19`` / ``hg38``).
67
67
  ``build`` : str or None
68
68
  Comma-separated list of genome builds per summary statistics file,
69
69
  in the same order as ``sum_stats``. Alternative to ``build_column``.
@@ -138,6 +138,10 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
138
138
  Track sort order.
139
139
  ``no_track_labels`` : bool
140
140
  Suppress track label rendering when ``True``.
141
+ ``ylabel`` : str or None
142
+ Shared y-axis label for linear Manhattan plots. Override the
143
+ default (``"-log₁₀(p-value)"`` or the p-value column name) for
144
+ non-p-value statistics such as ``"iHS"`` or ``"F_ST"``.
141
145
  ``plot_title`` : str
142
146
  Plot title and output file stem. Default ``'MyCMplot'``.
143
147
  ``plot_title_size`` : float
@@ -252,11 +256,14 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
252
256
  opt.add_argument(
253
257
  "-b","--build", default=None, required=False, type=str, metavar='str',
254
258
  help=
255
- """Comma-sperated list of genome build of summary stats file(s) listed
256
- in the same order as sumstats files. e.g. hg19,hg38,hg38,hg19 means:
259
+ """Comma-separated list of genome build of summary stats file(s) listed
260
+ in the same order as sumstats files. Accepted values: hg18, hg19, hg38.
261
+ E.g. hg19,hg38,hg38,hg18 means:
257
262
  file1.txt.gz --> hg19
258
263
  file2.txt.gz --> hg38
259
- file3.tsv --> hg38 ... etc
264
+ file3.tsv --> hg38
265
+ file4.tsv --> hg18 ... etc
266
+ hg18 and hg19 coordinates are lifted to hg38 before plotting.
260
267
  """
261
268
  )
262
269
  opt.add_argument(
@@ -365,7 +372,17 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
365
372
  "Exclude track labels from plot. (default: False)"
366
373
  ),
367
374
  action="store_true"
368
- )
375
+ )
376
+ opt.add_argument(
377
+ "-yl", "--ylabel",
378
+ default=None, type=str, metavar="str",
379
+ help=(
380
+ "Shared y-axis label for linear Manhattan plots (left margin). "
381
+ "Useful for non-p-value statistics such as iHS, F_ST or "
382
+ "XP-EHH (e.g. --ylabel 'iHS'). Defaults to '-log10(p-value)' "
383
+ "when --logp is set, otherwise the p-value column name."
384
+ )
385
+ )
369
386
  opt.add_argument(
370
387
  "-plt", "--plot_title", default="MyCMplot", type=str, metavar="str",
371
388
  help="Plot plot_title / output file stem."
@@ -439,7 +456,10 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
439
456
  "-t_space", "--linear_track_spacing", default=0.10, type=float, metavar="float",
440
457
  help="Space between linear tracks (default: 0.10)."
441
458
  )
442
-
459
+ lio.add_argument(
460
+ "-figsize", "--figure_size", default='10,4', required=False, type=str, metavar="str",
461
+ help="Linear plot figure size (default: 10,4 for width,height)."
462
+ )
443
463
  opt.add_argument(
444
464
  "-h", "--help", action="help",
445
465
  help="Show this help message and exit."
@@ -644,11 +644,11 @@ def prep_pycmplot_input_info(
644
644
  bcol: "BUILD",
645
645
  }
646
646
  col_dtypes = {
647
- chrom_col: str,
647
+ chrom_col: 'category',
648
648
  pos_col: object,
649
649
  snp_col: str,
650
650
  pcol_col: float,
651
- bcol: str,
651
+ bcol: 'category',
652
652
  }
653
653
  sumstats_hdr_dic[name] = [old_cols, col_dtypes, new_cols, file_sep]
654
654
 
@@ -662,7 +662,7 @@ def prep_pycmplot_input_info(
662
662
  pcol_col: "P",
663
663
  }
664
664
  col_dtypes = {
665
- chrom_col: str,
665
+ chrom_col: 'category',
666
666
  pos_col: object,
667
667
  snp_col: str,
668
668
  pcol_col: float,
@@ -681,18 +681,26 @@ def prep_pycmplot_input_info(
681
681
  pcol_col: "P",
682
682
  }
683
683
  col_dtypes = {
684
- chrom_col: str,
684
+ chrom_col: 'category',
685
685
  pos_col: object,
686
686
  snp_col: str,
687
687
  pcol_col: float,
688
688
  }
689
689
  sumstats_hdr_dic[name] = [old_cols, col_dtypes, new_cols, file_sep]
690
690
 
691
- if not any(len(info) == 5 for info in sumstats_hdr_dic.values()):
691
+ def _has_build_info(info: list) -> bool:
692
+ """A file has build info when either (a) its header had a build
693
+ column (which is stored as a fifth entry in ``old_cols``), or
694
+ (b) a per-file build was supplied via ``--build`` (stored as a
695
+ fifth entry in the top-level list)."""
696
+ old_cols = info[0]
697
+ return len(old_cols) == 5 or len(info) == 5
698
+
699
+ if not any(_has_build_info(info) for info in sumstats_hdr_dic.values()):
692
700
  # Neither build column nor --build was available for any file
693
701
  logger.warning(
694
702
  "No build column or --build values detected. Summary stats will "
695
- "be plotted in their respective coordinate systems. If your data "
703
+ "be plotted in their native coordinate systems. If your data "
696
704
  "are in different coordinate systems, combining them in one plot "
697
705
  "is not advisable, especially if ``--annotate`` is set!"
698
706
  )
@@ -713,6 +721,23 @@ def _merge_min_max_lists(dicts: list[dict]) -> dict:
713
721
  return {k: [min(v), max(v)] for k, v in temp.items()}
714
722
 
715
723
 
724
+ # ---------------------------------------------------------------------------
725
+ # Memory usage
726
+ # ---------------------------------------------------------------------------
727
+ def _get_memory_usage(mem_df: int):
728
+ if mem_df > 1e6:
729
+ df_mem = mem_df / 1e9
730
+ unit = 'GB'
731
+ else:
732
+ df_mem = mem_df / 1e6
733
+ unit = 'MB'
734
+ if df_mem < 1:
735
+ df_mem = df_mem * 100
736
+ unit = 'MB'
737
+
738
+ return f"{df_mem:.3g} {unit}"
739
+
740
+
716
741
  # ---------------------------------------------------------------------------
717
742
  # Main loader
718
743
  # ---------------------------------------------------------------------------
@@ -874,20 +899,27 @@ def get_sumstats_and_merged_sector_list(
874
899
  ).rename(columns=sumstat_newcols)
875
900
 
876
901
  df["POS"] = pd.to_numeric(df["POS"], errors="coerce").astype("Int64").dropna()
902
+ pre_trim_mem = _get_memory_usage(df.memory_usage(deep=True).sum())
903
+ pre_trim_vars = len(df.index)
904
+ logger.info("Loaded %s variants from summary stat file, using %s of memory", pre_trim_vars, pre_trim_mem)
877
905
 
878
906
  # Get dict of p-values for qq-plotting before applying trim_pval
879
907
  logger.info("Extracting raw p-values for QQ-plotting ...")
880
- pval_dict[label] = df["P"].dropna().astype("float").values
908
+ pval_dict[label] = df["P"].dropna().astype(float).values
881
909
 
882
910
 
883
911
  # Add build column if not exist and build supplied
884
912
  if build:
885
913
  df['BUILD'] = build
914
+ df['BUILD'] = df['BUILD'].astype('category')
886
915
 
887
916
  # Trim insignificant variants for faster plotting
888
917
  if trim_pval:
889
918
  logger.info("Excluding variants with p-value less than %s to speed up Manhattan plotting ...", trim_pval)
890
919
  df = df[df["P"].astype(float) <= float(trim_pval)]
920
+ post_trim_mem = _get_memory_usage(df.memory_usage(deep=True).sum())
921
+ post_trim_vars = len(df.index)
922
+ logger.info("%s variants remain after trimming, using %s of memory", post_trim_vars, post_trim_mem)
891
923
  else:
892
924
  df = df[df["P"].astype(float) <= 1]
893
925
 
@@ -911,9 +943,16 @@ def get_sumstats_and_merged_sector_list(
911
943
  n_chroms = len(df["CHR"].unique()) - 1
912
944
  sumstats_loaded[label] = [df, n_chroms]
913
945
 
914
- # Liftover hg19 data if needed
915
- if "BUILD" in df.columns and "hg19" in df["BUILD"].unique():
916
- logger.info("Converting hg19 coordinates to hg38 ...")
946
+ # Liftover hg18/hg19 data if needed
947
+ if "BUILD" in df.columns and (
948
+ "hg19" in df["BUILD"].unique() or "hg18" in df["BUILD"].unique()
949
+ ):
950
+ builds_present = sorted(
951
+ b for b in df["BUILD"].unique() if b in {"hg18", "hg19"}
952
+ )
953
+ logger.info(
954
+ "Converting %s coordinates to hg38 ...", "/".join(builds_present)
955
+ )
917
956
  sumstats_loaded[label][0] = liftover_position(df, resources=resources)
918
957
 
919
958
  # Lead SNPs
@@ -1002,6 +1041,7 @@ def get_sumstats_and_merged_sector_list(
1002
1041
  assoc_sector_sizes_list: list[dict] = []
1003
1042
  min_dic_val = None
1004
1043
 
1044
+ logger.info("Computing per-sumstat sector sizes (chrom → [min_pos, max_pos])")
1005
1045
  for df, _n in sumstats_loaded.values():
1006
1046
  assoc = df[~(df["CHR"].str.len() > 2)].copy()
1007
1047
  assoc["POS"] = assoc["POS"].fillna(0).astype(int)
@@ -2,23 +2,34 @@
2
2
  pycmplot.liftover
3
3
  =================
4
4
 
5
- Genome coordinate liftover utilities (hg19 → hg38).
5
+ Genome coordinate liftover utilities (hg18 → hg38 and hg19 → hg38).
6
6
 
7
- The :class:`pyliftover.LiftOver` object is initialised **lazily** — it is
8
- created on first use and cached in a module-level dictionary, so importing
9
- this module never triggers a file-not-found error even if the chain file has
10
- not been configured yet.
7
+ The :class:`pyliftover.LiftOver` objects are initialised **lazily** — they
8
+ are created on first use and cached in a module-level dictionary, so
9
+ importing this module never triggers a file-not-found error even if the
10
+ chain files have not been configured yet.
11
+
12
+ Supported conversions
13
+ ---------------------
14
+ pycmplot harmonises input coordinates to GRCh38. Two source assemblies are
15
+ supported:
16
+
17
+ * ``hg19`` / GRCh37 → GRCh38 (default, bundled chain file)
18
+ * ``hg18`` / NCBI36 → GRCh38 (bundled chain file; used when input rows
19
+ carry a ``hg18`` build label)
11
20
 
12
21
  Resource configuration
13
22
  ----------------------
14
- The chain file path is resolved through
15
- :class:`~pycmplot.resources.ResourceConfig`. By default, a bundled chain
16
- file is used (``pycmplot/data/hg19ToHg38.over.chain``). This can be
17
- overridden by setting the environment variable:
23
+ Chain file paths are resolved through
24
+ :class:`~pycmplot.resources.ResourceConfig`. By default, bundled chain
25
+ files are used (``pycmplot/data/hg19ToHg38.over.chain.gz`` and
26
+ ``pycmplot/data/hg18ToHg38.over.chain.gz``). They can be overridden by
27
+ setting the environment variables:
18
28
 
19
29
  .. code-block:: bash
20
30
 
21
- export PYCMPLOT_CHAIN_HG19_HG38=/path/to/hg19ToHg38.over.chain
31
+ export PYCMPLOT_CHAIN_HG19_HG38=/path/to/hg19ToHg38.over.chain.gz
32
+ export PYCMPLOT_CHAIN_HG18_HG38=/path/to/hg18ToHg38.over.chain.gz
22
33
  """
23
34
 
24
35
  from __future__ import annotations
@@ -135,17 +146,71 @@ def liftover_hg19_to_hg38(
135
146
  return new_pos
136
147
 
137
148
 
149
+ def liftover_hg18_to_hg38(
150
+ chrom: str,
151
+ pos: int,
152
+ resources: Optional[ResourceConfig] = None,
153
+ ) -> Optional[int]:
154
+ """Convert a single hg18 (NCBI36) position to its hg38 equivalent.
155
+
156
+ Uses a lazily loaded and cached :class:`~pyliftover.LiftOver` object
157
+ backed by the hg18→hg38 chain file specified in *resources*. When
158
+ multiple hg38 mappings exist for a given position, the one with the
159
+ highest chain score is returned.
160
+
161
+ Parameters
162
+ ----------
163
+ chrom : str
164
+ Chromosome name **without** the ``'chr'`` prefix (e.g. ``'1'``,
165
+ ``'X'``). The prefix is added internally before querying
166
+ pyliftover.
167
+ pos : int
168
+ 0-based hg18 position, as expected by :class:`pyliftover.LiftOver`.
169
+ resources : ResourceConfig, optional
170
+ :class:`~pycmplot.resources.ResourceConfig` instance. Falls back
171
+ to :data:`~pycmplot.resources.default_resources` when ``None``.
172
+
173
+ Returns
174
+ -------
175
+ int or None
176
+ Corresponding 0-based hg38 position, or ``None`` if the position
177
+ could not be mapped (unmapped region, chromosome gap, or deleted
178
+ sequence).
179
+
180
+ See Also
181
+ --------
182
+ liftover_hg19_to_hg38 :
183
+ Equivalent helper for hg19 coordinates.
184
+ liftover_position :
185
+ Applies the appropriate per-row dispatcher to a full DataFrame.
186
+ """
187
+
188
+ if resources is None:
189
+ resources = default_resources
190
+
191
+ chain_path = resources.require("chain_hg18_hg38")
192
+ lo = _get_liftover(chain_path)
193
+
194
+ results = lo.convert_coordinate(f"chr{chrom}", pos)
195
+ if not results:
196
+ return None
197
+ _new_chrom, new_pos, _strand, _score = results[0]
198
+ return new_pos
199
+
200
+
138
201
  def liftover_position(
139
202
  df: pd.DataFrame,
140
203
  hg38_chr_limits: dict = None,
141
204
  resources: Optional[ResourceConfig] = None,
142
205
  ) -> pd.DataFrame:
143
- """Liftover all hg19 rows in *df* from hg19 to hg38 coordinates.
206
+ """Liftover all hg18/hg19 rows in *df* to hg38 coordinates.
144
207
 
145
- Iterates over every row in *df* and calls :func:`liftover_hg19_to_hg38`
146
- for rows whose ``BUILD`` column equals ``'hg19'``. Rows with other build
147
- values are passed through unchanged. Rows for which liftover returns
148
- ``None`` or ``0`` (unmappable positions) are silently dropped.
208
+ Iterates over every row in *df* and dispatches to
209
+ :func:`liftover_hg19_to_hg38` for rows whose ``BUILD`` column equals
210
+ ``'hg19'`` or to :func:`liftover_hg18_to_hg38` for rows whose ``BUILD``
211
+ column equals ``'hg18'``. Rows with any other build value are passed
212
+ through unchanged. Rows for which liftover returns ``None`` or ``0``
213
+ (unmappable positions) are silently dropped.
149
214
 
150
215
  Two provenance columns are added to the returned DataFrame so that the
151
216
  original coordinates remain accessible:
@@ -207,6 +272,8 @@ def liftover_position(
207
272
  for chrom, pos, build in zip(df["CHR"], df["POS"], df["BUILD"]):
208
273
  if build == "hg19":
209
274
  new_positions.append(liftover_hg19_to_hg38(chrom, pos, resources))
275
+ elif build == "hg18":
276
+ new_positions.append(liftover_hg18_to_hg38(chrom, pos, resources))
210
277
  else:
211
278
  new_positions.append(pos)
212
279
 
@@ -29,6 +29,7 @@ import pandas as pd
29
29
 
30
30
  from pycmplot.io import get_output_paths
31
31
  from pycmplot.stats import get_highlight_snps
32
+ from pycmplot.annotation import get_annotation_column
32
33
 
33
34
  logger = logging.getLogger(__name__)
34
35
 
@@ -622,25 +623,18 @@ def plot_circular(
622
623
  # Circular: gene/SNP annotations
623
624
  # ------------------------------------------------------------------
624
625
  if annotate and not hits_table.empty:
626
+ label_col = get_annotation_column(
627
+ annotate = annotate,
628
+ hits_table=hits_table,
629
+ label_col=label_col,
630
+ )
631
+ if label_col == 'SNP':
632
+ fstyle = "normal"
633
+ else:
634
+ fstyle = "italic"
635
+
625
636
  for i, (_, row) in enumerate(hits_table.iterrows()):
626
- label = row['SNP']
627
- fstyle = "normal"
628
- if label_col:
629
- label_col = str(label_col)
630
- try:
631
- if label_col == "GENE":
632
- if row["genic"]:
633
- label = row["nearest_upstream_gene"]
634
- else:
635
- label = row["top_gene"]
636
- fstyle = "italic"
637
- elif label_col != "SNP":
638
- label = row[label_col]
639
- fstyle = "italic"
640
- except Exception:
641
- logger.info("'SNP' column is used for annotation since '%s' column could not be resolved in hits table.", label_col)
642
- pass
643
-
637
+ label = row[label_col]
644
638
  for sector in circos.sectors:
645
639
  if str(row["CHR"]) == sector.name:
646
640
  a_track = sector.add_track(annotation_track_radius)