PyPI - pycmplot - Versions diffs - 0.1.6__tar.gz → 0.1.7__tar.gz - Mend

pycmplot 0.1.6tar.gz → 0.1.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{pycmplot-0.1.6/pycmplot.egg-info → pycmplot-0.1.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pycmplot
-Version: 0.1.6
+Version: 0.1.7
 Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
 Author: Kevin Esoh
 Author-email: Kevin Esoh <kesohku1@jh.edu>

{pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/__init__.py RENAMED Viewed

@@ -11,6 +11,7 @@ Command-line::
 Python API::
+    from pycmplot.io import prep_pycmplot_input_info, get_sumstats_and_merged_sector_list
     from pycmplot.plotting import plot_linear, plot_circular
     from pycmplot.stats import get_lead_snps
     from pycmplot.annotation import get_hits_summary_table
@@ -22,7 +23,7 @@ Public surface
 from pycmplot.plotting.linear import plot_linear
 from pycmplot.plotting.circular import plot_circular, compute_track_radii_dict
 from pycmplot.stats import get_lead_snps, get_highlight_snps
-from pycmplot.io import get_sumstats_and_merged_sector_list
+from pycmplot.io import prep_pycmplot_input_info, get_sumstats_and_merged_sector_list
 from pycmplot.annotation import get_hits_summary_table
 from pycmplot.constants import hg38_chr_lengths, BIOTYPE_WEIGHTS
 from pycmplot.resources import ResourceConfig
@@ -33,6 +34,7 @@ __all__ = [
     "compute_track_radii_dict",
     "get_lead_snps",
     "get_highlight_snps",
+    "prep_pycmplot_input_info",
     "get_sumstats_and_merged_sector_list",
     "get_hits_summary_table",
     "hg38_chr_lengths",
@@ -40,4 +42,4 @@ __all__ = [
     "ResourceConfig",
 ]
-__version__ = "0.1.0"
+__version__ = "0.1.7"

pycmplot-0.1.7/pycmplot/_core.py ADDED Viewed

@@ -0,0 +1,218 @@
+"""
+pycmplot._core
+==============
+Main entry point — orchestrates CLI parsing, data loading, and plotting.
+"""
+from __future__ import annotations
+import logging
+import warnings
+# Suppress noisy font-manager warnings before any matplotlib import
+logging.getLogger("matplotlib.font_manager").setLevel(logging.ERROR)
+warnings.filterwarnings("ignore")
+logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
+logger = logging.getLogger(__name__)
+def main() -> None:
+    """CLI entry point — ``pycmplot`` console script."""
+    # ------------------------------------------------------------------
+    # Deferred imports so ``import pycmplot`` remains fast
+    # ------------------------------------------------------------------
+    from pycmplot.cli import get_arguments, DESCMSG
+    from pycmplot.io import (
+        get_sumstats_and_merged_sector_list,
+        prep_pycmplot_input_info,
+        get_output_paths,
+        strip_comma_separated_input_streams,
+        #detect_delimiter,
+        #resolve_delimiter,
+        #get_file_header,
+    )
+    from pycmplot.plotting.linear import plot_linear
+    from pycmplot.plotting.circular import plot_circular
+    from pycmplot.resources import ResourceConfig
+    # ------------------------------------------------------------------
+    # Parse CLI
+    # ------------------------------------------------------------------
+    args = get_arguments(DESCMSG)
+    print(DESCMSG)
+    mode             = args.mode
+    sum_stats_raw    = args.sum_stats
+    chrom_arg        = args.chrom_column
+    pos_arg          = args.pos_column
+    snp_arg          = args.snp_column
+    build_arg        = args.build_column
+    labels_raw       = args.labels
+    pcol_arg         = args.pval_column
+    logp             = args.logp
+    chrom_label_size = args.chrom_label_size
+    chrom_label_side = args.chrom_label_side
+    track_label_size = args.track_label_size
+    track_label_orientation = args.track_label_orientation
+    sort_track       = args.sort_track
+    trim_pval        = args.trim_pval
+    signif_threshold = args.signif_threshold
+    signif_line      = args.signif_line
+    suggest_threshold= args.suggest_threshold
+    annotate         = args.annotate
+    annotation_size  = args.annotation_size
+    point_size       = args.point_size
+    highlight        = args.highlight
+    highlight_thresh = args.highlight_thresh
+    highlight_line   = args.highlight_line
+    colors_raw       = args.colors
+    r_min            = args.r_min
+    r_max            = args.r_max
+    pad              = args.pad
+    output_format    = args.output_format
+    output_dir       = args.output_dir
+    dpi              = args.dpi
+    plot_title       = args.plot_title
+    plot_title_size  = args.plot_title_size
+    track_heights    = args.track_heights
+    track_spacing    = args.track_spacing
+    no_track_labels  = args.no_track_labels
+    chr_spacing      = args.chr_spacing
+    # ------------------------------------------------------------------
+    # Sumstat, labels, colours, track heights str to list
+    # ------------------------------------------------------------------
+    (
+        sum_stats,
+        labels,
+        colors,
+        t_heights
+    ) = strip_comma_separated_input_streams(
+        sum_stats = sum_stats_raw,
+        labels = labels_raw,
+        colors_raw = colors_raw,
+        track_heights = track_heights,
+    )
+    # ------------------------------------------------------------------
+    # Output paths
+    # ------------------------------------------------------------------
+    (
+        plt_name,
+        table_out
+    ) = get_output_paths(
+        labels,
+        mode = mode,
+        logp = logp,
+        output_dir = output_dir,
+        plot_title = plot_title,
+        output_format = output_format
+    )
+    # ------------------------------------------------------------------
+    # Resolve column names
+    # ------------------------------------------------------------------
+    sumstats_hdr_dic = prep_pycmplot_input_info(
+        sum_stats = sum_stats,
+        labels = labels,
+        delim = args.delim,
+        chrom = chrom_arg,
+        pos = pos_arg,
+        snp = snp_arg,
+        pcol = pcol_arg,
+        build = build_arg
+    )
+    # ------------------------------------------------------------------
+    # ResourceConfig — picks up environment variables automatically
+    # ------------------------------------------------------------------
+    resources = ResourceConfig()
+    # ------------------------------------------------------------------
+    # Load data, compute sectors, get hits table
+    # ------------------------------------------------------------------
+    (
+        merged_assoc_sector_sizes,
+        sumstats_loaded,
+        hits_table,
+        signif_lines,
+    ) = get_sumstats_and_merged_sector_list(
+        sum_stats=sum_stats,
+        labels=labels,
+        trim_pval=trim_pval,
+        logp=logp,
+        file_info=sumstats_hdr_dic,
+        sort_tracks=sort_track,
+        table_out=table_out,
+        signif_threshold=signif_threshold,
+        signif_line=signif_line,
+        suggest_threshold=suggest_threshold,
+        resources=resources,
+    )
+    # ------------------------------------------------------------------
+    # CIRCULAR MANHATTAN
+    # ------------------------------------------------------------------
+    if mode.upper() == "CM":
+        logger.info("Generating CIRCULAR MANHATTAN Plot ...")
+        plot_circular(
+            sumstats_loaded = sumstats_loaded,
+            logp = logp,
+            signif_line = signif_line,
+            signif_lines = signif_lines,
+            highlight = highlight,
+            highlight_thresh = highlight_thresh,
+            highlight_line = highlight_line,
+            colors = colors,
+            chrom_label_side = chrom_label_side,
+            chrom_label_size = chrom_label_size,
+            track_label_size = track_label_size,
+            track_label_orientation = track_label_orientation,
+            annotate = annotate,
+            annotation_size = annotation_size,
+            hits_table = hits_table,
+            sector_sizes = merged_assoc_sector_sizes,
+            pad = pad,
+            r_min = r_min,
+            r_max = r_max,
+            plot_title = plot_title,
+            plot_title_size = plot_title_size,
+            no_track_labels = no_track_labels,
+            dpi = dpi,
+            output_format=output_format,
+            output_dir=output_dir
+        )
+    # ------------------------------------------------------------------
+    # LINEAR MANHATTAN
+    # ------------------------------------------------------------------
+    else:
+        logger.info("Generating LINEAR MANHATTAN Plot ...")
+        plot_linear(
+            sumstats_loaded = sumstats_loaded,
+            track_heights = t_heights,
+            trim_pval=trim_pval,
+            logp=True if logp else False,
+            point_size=point_size,
+            highlight=highlight,
+            highlight_thresh=highlight_thresh,
+            annot_df=hits_table if not hits_table.empty else None,
+            label_col="top_gene",
+            chr_spacing=chr_spacing,
+            track_spacing=track_spacing,
+            colors=colors,
+            signif_lines=signif_lines,
+            plot_title=plot_title,
+            no_track_labels = no_track_labels,
+            dpi=dpi,
+            output_format=output_format,
+            output_dir=output_dir,
+            figsize=(15, 9)
+        )
+if __name__ == "__main__":
+    main()

{pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/cli.py RENAMED Viewed

@@ -147,6 +147,13 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
         const="chrom_len", default=None, type=str, #metavar="str",
         help="Sort tracks by chromosome count or label."
     )
+    opt.add_argument(
+        "-ntl", "--no_track_labels",
+        help=(
+            "Exclude track labels from plot. (default: False)"
+        ),
+        action="store_true"
+    )
     opt.add_argument(
         "-plt", "--plot_title", default="MyCMplot", type=str, metavar="str",
         help="Plot plot_title / output file stem."

{pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/io.py RENAMED Viewed

@@ -8,6 +8,8 @@ from __future__ import annotations
 import csv
 import gzip
+import sys
+import re
 import logging
 from collections import defaultdict
 from pathlib import Path
@@ -109,6 +111,198 @@ def get_file_header(
     return list(hdr)
+def strip_comma_separated_input_streams(
+    sum_stats,
+    labels,
+    colors_raw = 'steelblue,grey',
+    track_heights = None,
+):
+    if len(sum_stats) != len(labels):
+        sys.exit(
+            "Error: number of summary stats files and labels must match.\n"
+            f"  Files:  {sum_stats}\n"
+            f"  Labels: {labels}"
+        )
+    # ------------------------------------------------------------------
+    # Sumstat, labels str to list
+    # ------------------------------------------------------------------
+    labels     = [lbl.strip() for lbl in labels.strip().split(",")]
+    sum_stats  = [s.strip() for s in sum_stats.strip().split(",")]
+    # ------------------------------------------------------------------
+    # Colours str to list
+    # ------------------------------------------------------------------
+    colors = [c.strip() for c in colors_raw.strip().split(",")]
+    # ------------------------------------------------------------------
+    # Linear track heights str to list
+    # ------------------------------------------------------------------
+    t_heights = [float(x) for x in track_heights.strip().split(",")]
+    return sum_stats, labels, colors, t_heights
+# ------------------------------------------------------------------
+# Random string for output paths
+# ------------------------------------------------------------------
+def generate_random_string(length):
+    import random
+    import string
+    # Combine uppercase, lowercase, and digits
+    characters = string.ascii_letters + string.digits
+    # random.choices picks multiple characters with replacement
+    return ''.join(random.choices(characters, k=length))
+# ------------------------------------------------------------------
+# Output paths
+# ------------------------------------------------------------------
+def get_output_paths(
+    labels,
+    mode: Optional[str] = 'lm',
+    logp: bool = False,
+    output_dir: Optional[str] = '.',
+    plot_title: Optional[str] = None,
+    output_format: Optional[str] = 'png'
+):
+    out_path = Path(output_dir).resolve()
+    out_path.mkdir(parents=True, exist_ok=True)
+    if plot_title:
+        pltitle = re.sub(r"[^a-zA-Z0-9\s]", "", plot_title).replace(" ", "_")
+    else:
+        pltitle = generate_random_string(10)
+    plt_base = str(out_path / f"{pltitle}_{'_'.join(labels)}_{mode.lower()}")
+    suffix     = "_logp" if logp else "_pval"
+    plt_name   = f"{plt_base}{suffix}.{output_format.lower()}"
+    table_out  = f"{plt_base}{suffix}_locus_summary_table.tsv"
+    return plt_name, table_out
+# ---------------------------------------------------------------------------
+# input formatter
+# ---------------------------------------------------------------------------
+def prep_pycmplot_input_info(
+    sum_stats: list[str],
+    labels: list[str],
+    delim: Optional[str] = None,
+    chrom: Optional[str] = None,
+    pos: Optional[str] = None,
+    snp: Optional[str] = None,
+    pcol: Optional[str] = None,
+    build: Optional[str] = None
+):
+    """Resolve column names and delimiter
+    Parameters
+    ----------
+    sum_stats:
+        List of file paths to GWAS summary statistics (possibly gzip-compressed).
+    labels:
+        Track labels in the same order as *sum_stats*.
+    delim:
+        File delimiter (autodetected if omitted)
+    chrom:
+        Chromosome column
+    pos:
+        Position column
+    snp:
+        SNP or Marker ID column
+    pcol:
+        P-value column
+    build:
+        Build version column
+    Returns
+    -------
+    {old_columns, column_dtypes, new_columns, delim}
+    """
+    # ------------------------------------------------------------------
+    # Resolve delimiter
+    # ------------------------------------------------------------------
+    if delim:
+        sep = resolve_delimiter(delim)
+    else:
+        sep = None  # autodetect per file
+    # ------------------------------------------------------------------
+    # Column-name candidate lists for auto-resolution
+    # ------------------------------------------------------------------
+    chr_candidates = [chrom, "CHR", "CHROM", "Chromosome", "#CHROM", "#CHR",
+                    "Chrom", "chrom", "chr", "chromosome", "#chr", "#chrom"]
+    pos_candidates = [pos,   "BP", "POS", "bp", "pos", "Basepair"]
+    snp_candidates = [snp,   "SNP", "RSID", "rsID", "MarkerName", "MarkerID",
+                    "Predictor", "Marker", "SNPID", "ID"]
+    pvl_candidates = [pcol,  "P", "P-value", "Wald_P", "pvalue", "p_val", "pval"]
+    bld_candidates = [build, "BUILD", "Genome", "Genome_Build", "Genome-build"]
+    # Remove None entries
+    chr_candidates = [c for c in chr_candidates if c]
+    pos_candidates = [c for c in pos_candidates if c]
+    snp_candidates = [c for c in snp_candidates if c]
+    pvl_candidates = [c for c in pvl_candidates if c]
+    bld_candidates = [c for c in bld_candidates if c]
+    # ------------------------------------------------------------------
+    # Resolve column names per file
+    # ------------------------------------------------------------------
+    sumstats_hdr_dic: dict = {}
+    for name, fpath in zip(labels, sum_stats):
+        if sep:
+            file_sep, dialect = sep, None
+        else:
+            file_sep, dialect = detect_delimiter(fpath, sample_size=5_000)
+        hdr = get_file_header(fpath, delim=file_sep, dialect=dialect)
+        try:
+            chrom_col = next(c for c in hdr if c in set(chr_candidates))
+            pos_col   = next(c for c in hdr if c in set(pos_candidates))
+            snp_col   = next(c for c in hdr if c in set(snp_candidates))
+            pcol      = next(c for c in hdr if c in set(pvl_candidates))
+            bcol      = next(c for c in hdr if c in set(bld_candidates))
+        except StopIteration as exc:
+            sys.exit(
+                f"Error: could not find a required column in {fpath}.\n"
+                f"  Header: {hdr}\n"
+                f"  Details: {exc}"
+            )
+        old_cols = [chrom_col, pos_col, snp_col, pcol, bcol]
+        new_cols = {
+            chrom_col: "CHR",
+            pos_col:   "POS",
+            snp_col:   "SNP",
+            pcol:      "P",
+            bcol:      "BUILD",
+        }
+        col_dtypes = {
+            chrom_col: str,
+            pos_col:   object,
+            snp_col:   str,
+            pcol:      float,
+            bcol:      str,
+        }
+        sumstats_hdr_dic[name] = [old_cols, col_dtypes, new_cols, file_sep]
+    return sumstats_hdr_dic
 # ---------------------------------------------------------------------------
 # Sector-size helpers
 # ---------------------------------------------------------------------------
@@ -134,8 +328,6 @@ def get_sumstats_and_merged_sector_list(
     file_info: Optional[dict] = None,
     sort_tracks: Optional[str] = "chrom_len",
     table_out: Optional[str] = None,
-    highlight: bool = False,
-    highlight_thresh: float = 5e-8,
     signif_threshold: Optional[float] = None,
     signif_line: Optional[float] = None,
     suggest_threshold: Optional[float] = None,
@@ -156,8 +348,8 @@ def get_sumstats_and_merged_sector_list(
         ``'label'`` — sort tracks alphabetically by label.
         ``'chrom_len'`` — sort by number of chromosomes (default).
         ``None`` — preserve input order.
-    highlight:
-        Whether to flag loci for highlighting.
+    signif_threshold:
+        Threshold of significance to create hits table.
     resources:
         :class:`~pycmplot.resources.ResourceConfig` instance.
@@ -225,21 +417,13 @@ def get_sumstats_and_merged_sector_list(
             logger.info("Converting hg19 coordinates to hg38 ...")
             sumstats_loaded[label][0] = liftover_position(df, resources=resources)
-        # Lead SNPs / highlight SNPs
-        if highlight:
-            logger.info("Extracting variants to highlight ...")
-            sumstats_loaded[label][0], leads = get_highlight_snps(
-                df=sumstats_loaded[label][0],
-                window=2_000_000,
-                highlight_thresh=highlight_thresh,
-                logp=True,
-            )
-        else:
-            leads = get_lead_snps(
-                df=sumstats_loaded[label][0],
-                highlight_thresh=signif_threshold or 5e-8,
-                logp=True,
-            )
+        # Lead SNPs
+        logger.info("Extracting variants to highlight ...")
+        leads = get_lead_snps(
+            df=sumstats_loaded[label][0],
+            signif_threshold=signif_threshold or 5e-8,
+            logp=True,
+        )
         all_lead_snps.append(leads)

pycmplot 0.1.6__tar.gz → 0.1.7__tar.gz

pycmplot 0.1.6tar.gz → 0.1.7tar.gz