PyPI - pycmplot - Versions diffs - 0.2.0__tar.gz → 0.2.2__tar.gz - Mend

pycmplot 0.2.0tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{pycmplot-0.2.0 → pycmplot-0.2.2}/LICENSE RENAMED Viewed

@@ -1,4 +1,4 @@
-CC BY-NC-SA 4.0 License
+CC-BY-NC-SA-4.0 License
 Copyright (c) 2026 Kevin Esoh

{pycmplot-0.2.0/pycmplot.egg-info → pycmplot-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pycmplot
-Version: 0.2.0
+Version: 0.2.2
 Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
 Author: Kevin Esoh
 Author-email: Kevin Esoh <kesohku1@jh.edu>
@@ -85,6 +85,8 @@ certain threshold, e.g. `0.01 (1e-2)` or `0.001 (1e-3)`.
 A potential useful application is **comparative visualization** of results from multiple imputation panels,
 multiple populations, or multiple traits to observe shared genetic architecture.
+Read more in the package documentation page: https://pycmplot.readthedocs.io/en/latest/
 ---
 ## Installation
@@ -178,15 +180,17 @@ pycmplot \
 |------|-------------|---------|
 | `-s, --sum_stats` | Comma-separated sumstats files | **required** |
 | `-l, --labels` | Comma-separated track labels | **required** |
-| `-b, --build_column` | Genome build column name (containing hg18/hg19/hg38) | **required** |
+| `-b, --build` | Comma-separated genome builds of sumstats  | off |
+| `-bc, --build_column` | Genome build column name (containing hg18/hg19/hg38) | off |
 | `-m, --mode` | `lm` linear or `cm` circular | `lm` |
-| `-qq, --qq_plot` | Also generate a QQ-plot | off (coming soon...) |
+| `-qq, --qq_plot` | Also generate a QQ-plot | off |
+| `-qq_thin, --qq_thin` | Thin p-values for faster QQ-plotting | off |
 | `--logp` | Plot -log10(p) | off |
 | `-sig, --signif_threshold` | Genome-wide significance threshold | off (auto 0.05/N) |
 | `-sigl, --signif_line` | Value for genome-wide significance line if different from `-sig` | 5e-8 |
 | `-sug, --suggest_threshold` | Threshold for suggestive signals | off |
 | `-hl, --highlight` | Highlight significant loci | off |
-| `-a, --annotate` | Annotate with `SNP` or `GENE` | `SNP` |
+| `-a, --annotate` | Annotate with `snp`, `gene`, or any column in `hits_table` | `snp` |
 | `-tp, --trim_pval` | Trim variants above this p-value for speed | off |
 | `-st, --sort_track` | Sort tracks by `label` or `chrom_len` | input order |
 | `-od, --output_dir` | Output directory | `.` |

{pycmplot-0.2.0 → pycmplot-0.2.2}/README.md RENAMED Viewed

@@ -59,6 +59,8 @@ certain threshold, e.g. `0.01 (1e-2)` or `0.001 (1e-3)`.
 A potential useful application is **comparative visualization** of results from multiple imputation panels,
 multiple populations, or multiple traits to observe shared genetic architecture.
+Read more in the package documentation page: https://pycmplot.readthedocs.io/en/latest/
 ---
 ## Installation
@@ -152,15 +154,17 @@ pycmplot \
 |------|-------------|---------|
 | `-s, --sum_stats` | Comma-separated sumstats files | **required** |
 | `-l, --labels` | Comma-separated track labels | **required** |
-| `-b, --build_column` | Genome build column name (containing hg18/hg19/hg38) | **required** |
+| `-b, --build` | Comma-separated genome builds of sumstats  | off |
+| `-bc, --build_column` | Genome build column name (containing hg18/hg19/hg38) | off |
 | `-m, --mode` | `lm` linear or `cm` circular | `lm` |
-| `-qq, --qq_plot` | Also generate a QQ-plot | off (coming soon...) |
+| `-qq, --qq_plot` | Also generate a QQ-plot | off |
+| `-qq_thin, --qq_thin` | Thin p-values for faster QQ-plotting | off |
 | `--logp` | Plot -log10(p) | off |
 | `-sig, --signif_threshold` | Genome-wide significance threshold | off (auto 0.05/N) |
 | `-sigl, --signif_line` | Value for genome-wide significance line if different from `-sig` | 5e-8 |
 | `-sug, --suggest_threshold` | Threshold for suggestive signals | off |
 | `-hl, --highlight` | Highlight significant loci | off |
-| `-a, --annotate` | Annotate with `SNP` or `GENE` | `SNP` |
+| `-a, --annotate` | Annotate with `snp`, `gene`, or any column in `hits_table` | `snp` |
 | `-tp, --trim_pval` | Trim variants above this p-value for speed | off |
 | `-st, --sort_track` | Sort tracks by `label` or `chrom_len` | input order |
 | `-od, --output_dir` | Output directory | `.` |

{pycmplot-0.2.0 → pycmplot-0.2.2}/docs/conf.py RENAMED Viewed

@@ -12,7 +12,7 @@ sys.path.insert(0, os.path.abspath(".."))
 project = "pycmplot"
 copyright = "2026, Kevin Esoh"
 author = "Kevin Esoh"
-release = "0.2.0"  # update to match your PyPI version
+release = "0.2.2"  # update to match PyPI version
 # -- General configuration -----------------------------------------------------
 extensions = [

{pycmplot-0.2.0 → pycmplot-0.2.2}/pycmplot/__init__.py RENAMED Viewed

@@ -42,4 +42,4 @@ __all__ = [
     "ResourceConfig",
 ]
-__version__ = "0.1.9"
+__version__ = "0.2.1"

{pycmplot-0.2.0 → pycmplot-0.2.2}/pycmplot/_core.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-CORE_MODULE = '''"""
+CORE_MODULE = """
 pycmplot._core
 ==============
@@ -12,7 +12,7 @@ work to :mod:`pycmplot.io`, :mod:`pycmplot.plotting.linear`, and
 All imports are deferred inside :func:`main` so that
 ``import pycmplot`` remains fast regardless of the size of the dependency
 tree.
-"""'''
+"""
 import logging
 import warnings
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
 def main() -> None:
-    MAIN = '''"""Orchestrate the full pycmplot pipeline from the command line.
+    MAIN = """Orchestrate the full pycmplot pipeline from the command line.
     This function is registered as the ``pycmplot`` console-script entry point
     in ``pyproject.toml`` / ``setup.cfg``.  It performs the following steps in
@@ -75,7 +75,7 @@ def main() -> None:
         Linear Manhattan plotter called for ``--mode lm`` (default).
     pycmplot.plotting.circular.plot_circular :
         Circular Manhattan plotter called for ``--mode cm``.
-    """'''
+    """
     # ------------------------------------------------------------------
     # Deferred imports so ``import pycmplot`` remains fast
@@ -92,6 +92,7 @@ def main() -> None:
     )
     from pycmplot.plotting.linear import plot_linear
     from pycmplot.plotting.circular import plot_circular
+    from pycmplot.plotting.qq import plot_qq_combined, plot_qq_separate, plot_qq_overlay
     from pycmplot.resources import ResourceConfig
     # ------------------------------------------------------------------
@@ -105,10 +106,18 @@ def main() -> None:
     chrom_arg        = args.chrom_column
     pos_arg          = args.pos_column
     snp_arg          = args.snp_column
-    build_arg        = args.build_column
+    build_arg        = args.build
+    buildc_arg       = args.build_column
     labels_raw       = args.labels
     pcol_arg         = args.pval_column
     logp             = args.logp
+    qq               = args.qq_plot
+    qq_separate      = args.qq_separate
+    qq_ncols         = args.qq_ncols
+    qq_thin          = args.qq_thin
+    thin_below       = args.thin_below
+    qq_max_points    = args.qq_max_points
+    qq_overlay       = args.qq_overlay
     chrom_label_size = args.chrom_label_size
     chrom_label_side = args.chrom_label_side
     track_label_size = args.track_label_size
@@ -123,13 +132,13 @@ def main() -> None:
     point_size       = args.point_size
     highlight        = args.highlight
     highlight_thresh = args.highlight_thresh
-    highight_color   = args.highight_color
+    highlight_color   = args.highlight_color
     highlight_line   = args.highlight_line
-    highight_line_color = args.highight_line_color
+    highlight_line_color = args.highlight_line_color
     colors_raw       = args.colors
-    r_min            = args.r_min
-    r_max            = args.r_max
-    pad              = args.pad
+    r_min            = args.min_radius
+    r_max            = args.max_radius
+    pad              = args.circular_track_spacing
     output_format    = args.output_format
     output_dir       = args.output_dir
     dpi              = args.dpi
@@ -142,18 +151,20 @@ def main() -> None:
     # ------------------------------------------------------------------
-    # Sumstat, labels, colours, track heights str to list
+    # Sumstat, labels, colours, track heights [build] str to list
     # ------------------------------------------------------------------
     (
         sum_stats,
         labels,
         colors,
-        t_heights
+        t_heights,
+        builds
     ) = strip_comma_separated_input_streams(
         sum_stats = sum_stats_raw,
         labels = labels_raw,
         colors_raw = colors_raw,
         track_heights = track_heights,
+        builds = build_arg if build_arg else None,
     )
     # ------------------------------------------------------------------
@@ -161,7 +172,8 @@ def main() -> None:
     # ------------------------------------------------------------------
     (
         plt_name,
-        table_out
+        table_out,
+        plt_base,
     ) = get_output_paths(
         labels,
         mode = mode,
@@ -182,7 +194,8 @@ def main() -> None:
         pos = pos_arg,
         snp = snp_arg,
         pcol = pcol_arg,
-        build = build_arg
+        buildc = buildc_arg,
+        build = builds
     )
     # ------------------------------------------------------------------
@@ -198,6 +211,7 @@ def main() -> None:
         sumstats_loaded,
         hits_table,
         signif_lines,
+        pval_dict,
     ) = get_sumstats_and_merged_sector_list(
         sum_stats=sum_stats,
         labels=labels,
@@ -212,6 +226,19 @@ def main() -> None:
         resources=resources,
     )
+    # ------------------------------------------------------------------
+    # ANNOTATE BY
+    # ------------------------------------------------------------------
+    if annotate:
+        if str(annotate).upper() == "GENE":
+            label_col = 'top_gene'
+        elif str(annotate).upper() == "SNP":
+            label_col = 'SNP'
+        else:
+            label_col = annotate
+        logger.info(f"Anotate by: {label_col}")
     # ------------------------------------------------------------------
     # CIRCULAR MANHATTAN
     # ------------------------------------------------------------------
@@ -224,15 +251,16 @@ def main() -> None:
             signif_lines = signif_lines,
             highlight = highlight,
             highlight_thresh = highlight_thresh,
-            highight_color = highight_color,
+            highlight_color = highlight_color,
             highlight_line = highlight_line,
-            highight_line_color = highight_line_color,
+            highlight_line_color = highlight_line_color,
             colors = colors,
             chrom_label_side = chrom_label_side,
             chrom_label_size = chrom_label_size,
             track_label_size = track_label_size,
             track_label_orientation = track_label_orientation,
             annotate = annotate,
+            label_col = label_col if annotate else None,
             annotation_size = annotation_size,
             hits_table = hits_table,
             sector_sizes = merged_assoc_sector_sizes,
@@ -253,30 +281,77 @@ def main() -> None:
     else:
         logger.info("Generating LINEAR MANHATTAN Plot ...")
         plot_linear(
-            sumstats_loaded = sumstats_loaded,
-            track_heights = t_heights,
+            sumstats_loaded=sumstats_loaded,
+            track_heights=t_heights,
             trim_pval=trim_pval,
             logp=True if logp else False,
             point_size=point_size,
             highlight=highlight,
             highlight_thresh=highlight_thresh,
-            highight_color = highight_color,
-            highlight_line = highlight_line,
-            highight_line_color = highight_line_color,
-            annot_df=hits_table if not hits_table.empty else None,
-            label_col="top_gene",
+            highlight_color=highlight_color,
+            highlight_line=highlight_line,
+            highlight_line_color=highlight_line_color,
+            annotate=annotate,
+            hits_table=hits_table if not hits_table.empty else None,
+            label_col=label_col if annotate else None,
             chr_spacing=chr_spacing,
             linear_track_spacing=linear_track_spacing,
             colors=colors,
             signif_lines=signif_lines,
             plot_title=plot_title,
-            no_track_labels = no_track_labels,
+            no_track_labels=no_track_labels,
             dpi=dpi,
             output_format=output_format,
             output_dir=output_dir,
             figsize=(15, 9)
         )
+    # ------------------------------------------------------------------
+    # QQ PLOT
+    # ------------------------------------------------------------------
+    if qq and sumstats_loaded:
+        logger.info("Generating QQ Plot(s) ...")
+        qq_stem = f"{plt_base}_qq"
+        if qq_separate:
+            plot_qq_separate(
+                pval_dict=pval_dict,
+                thin=qq_thin,
+                thin_below=thin_below,
+                max_points=qq_max_points,
+                output_path=qq_stem,
+                colors=colors,
+                signif_threshold=signif_threshold or 5e-8,
+                dpi=dpi,
+                fig_format=output_format,
+            )
+        elif qq_overlay:
+            plot_qq_overlay(
+                pval_dict=pval_dict,
+                thin=qq_thin,
+                thin_below=thin_below,
+                max_points=qq_max_points,
+                colors=colors,
+                signif_threshold=signif_threshold or 5e-8,
+                dpi=dpi,
+                title=plot_title,
+                output_path=f"{qq_stem}_overlay",
+                fig_format=output_format,
+            )
+        else:
+            plot_qq_combined(
+                pval_dict=pval_dict,
+                thin=qq_thin,
+                thin_below=thin_below,
+                max_points=qq_max_points,
+                colors=colors,
+                ncols=qq_ncols,
+                signif_threshold=signif_threshold or 5e-8,
+                dpi=dpi,
+                title=plot_title,
+                output_path=f"{qq_stem}_combined",
+                fig_format=output_format,
+            )
 if __name__ == "__main__":
     main()

{pycmplot-0.2.0 → pycmplot-0.2.2}/pycmplot/annotation.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-MODULE_DOCSTRING = '''"""
+MODULE_DOCSTRING = """
 pycmplot.annotation
 ====================
@@ -20,7 +20,7 @@ Annotation relies on a bundled Ensembl gene-info TSV (hg38 or hg19).  The
 file is resolved through :class:`~pycmplot.resources.ResourceConfig`; custom
 paths can be supplied via the ``PYCMPLOT_GENEINFO_HG38`` /
 ``PYCMPLOT_GENEINFO_HG19`` environment variables.
-"""'''
+"""
 import bisect
 import logging
@@ -41,7 +41,7 @@ logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 def _build_genes_dict(genes_df: pd.DataFrame) -> dict:
-    BUILD_GENES_DICT = '''"""Build a chromosome-keyed interval dictionary with sorted start positions.
+    BUILD_GENES_DICT = """Build a chromosome-keyed interval dictionary with sorted start positions.
     Pre-processes the gene reference DataFrame into a structure that supports
     efficient O(log N) binary-search lookup of genes near a query position.
@@ -67,7 +67,7 @@ def _build_genes_dict(genes_df: pd.DataFrame) -> dict:
     -----
     This function is called once per :func:`get_hits_summary_table` invocation;
     the result is passed to :func:`_annotate_variant` for each lead SNP.
-    """'''
+    """
     genes_df = genes_df.sort_values(["CHR", "START"])
     genes_dict: dict = {}
@@ -98,7 +98,7 @@ def _annotate_variant(
     window: int = 500_000,
     promoter_window: int = 2_000,
 ) -> dict:
-    ANNOTATE_VARIANT = '''"""Return strand-aware nearest-gene annotation for a single variant.
+    ANNOTATE_VARIANT = """Return strand-aware nearest-gene annotation for a single variant.
     Searches the pre-built *genes_dict* within *window* bp of *pos* on
     *chrom*.  Reports the nearest upstream and downstream genes (relative to
@@ -138,7 +138,7 @@ def _annotate_variant(
         within *promoter_window* bp upstream of any TSS.
         * ``gene_density`` (int) – number of genes with any overlap in the
         search window.
-    """'''
+    """
     _empty = {
         "genic": False,
@@ -238,7 +238,7 @@ def _annotate_and_prioritize_variant(
     promoter_window: int = 2_000,
     biotype_weights: Optional[dict] = None,
 ) -> Optional[dict]:
-    ANNOTATE_PRIORITIZE = '''"""Score and rank candidate genes for a single variant using a composite
+    ANNOTATE_PRIORITIZE = """Score and rank candidate genes for a single variant using a composite
     priority metric.
     Builds a candidate gene set within *window* bp of *pos* on *chrom*, then
@@ -287,7 +287,7 @@ def _annotate_and_prioritize_variant(
         For intergenic variants, ``top_gene`` contains the two nearest flanking
         gene symbols joined by ``'-'`` (e.g. ``'HBB-HBD'``) and ``biotype``
         is set to ``'intergenic'``.
-    """'''
+    """
     if biotype_weights is None:
         biotype_weights = BIOTYPE_WEIGHTS
@@ -386,7 +386,7 @@ def _annotate_and_prioritize_variant(
 # ---------------------------------------------------------------------------
 def _clump_by_distance(df: pd.DataFrame, window_kb: int = 500) -> pd.DataFrame:
-    CLUMP_BY_DISTANCE = '''"""Reduce a lead-SNP table to one representative SNP per locus.
+    CLUMP_BY_DISTANCE = """Reduce a lead-SNP table to one representative SNP per locus.
     Applies greedy distance-based clumping within each chromosome group,
     starting from the most significant SNP (lowest ``P`` or highest ``logP``).
@@ -406,7 +406,7 @@ def _clump_by_distance(df: pd.DataFrame, window_kb: int = 500) -> pd.DataFrame:
     pandas.DataFrame
         Deduplicated locus representatives sorted by chromosome and position
         (natural sort order).
-    """'''
+    """
     window = window_kb * 1000
     clumped: list[pd.Series] = []
@@ -438,7 +438,7 @@ def get_hits_summary_table(
     table_out: Optional[str] = None,
     resources: Optional[ResourceConfig] = None,
 ) -> pd.DataFrame:
-    GET_HITS_SUMMARY_TABLE = '''"""Annotate lead SNPs with nearest genes and write the locus summary table.
+    GET_HITS_SUMMARY_TABLE = """Annotate lead SNPs with nearest genes and write the locus summary table.
     For each lead SNP in *leads_df*, runs two complementary annotation passes:
@@ -528,51 +528,54 @@ def get_hits_summary_table(
             SNP CHR       POS  top_gene           biotype
     0  rs123456   2  60718043    BCL11A    protein_coding
     1  rs789012  11   5246696       HBB    protein_coding
-    """'''
+    """
     if resources is None:
         resources = default_resources
     # Choose gene info file based on build
-    if "OLD_POS" not in leads_df.columns and list(set(leads_df["BUILD"])) == ["hg19"]:
-        geneinfo_path = resources.require("geneinfo_hg19")
-    else:
-        geneinfo_path = resources.require("geneinfo_hg38")
+    if 'BUILD' in leads_df.columns:
+        if "OLD_POS" not in leads_df.columns and list(set(leads_df["BUILD"])) == ["hg19"]:
+            geneinfo_path = resources.require("geneinfo_hg19")
+        else:
+            geneinfo_path = resources.require("geneinfo_hg38")
-    logger.info("Loading gene info from: %s", geneinfo_path)
-    geneinfo = pd.read_csv(geneinfo_path, header=0, sep="\t")
-    genes_dict = _build_genes_dict(geneinfo)
+        logger.info("Loading gene info from: %s", geneinfo_path)
+        geneinfo = pd.read_csv(geneinfo_path, header=0, sep="\t")
+        genes_dict = _build_genes_dict(geneinfo)
-    window = window_kb * 1_000
-    records: list[dict] = []
+        window = window_kb * 1_000
+        records: list[dict] = []
-    logger.info("Annotating lead variants and generating hits summary table ...")
-    for _, row in leads_df.iterrows():
-        annotation = _annotate_variant(
-            chrom=row["CHR"],
-            pos=row["POS"],
-            genes_dict=genes_dict,
-            window=window,
-        )
-        prioritized = _annotate_and_prioritize_variant(
-            chrom=row["CHR"],
-            pos=row["POS"],
-            genes_df=geneinfo,
-            lead_snps_df=leads_df,
-            window=window,
-        )
+        logger.info("Annotating lead variants and generating hits summary table ...")
+        for _, row in leads_df.iterrows():
+            annotation = _annotate_variant(
+                chrom=row["CHR"],
+                pos=row["POS"],
+                genes_dict=genes_dict,
+                window=window,
+            )
+            prioritized = _annotate_and_prioritize_variant(
+                chrom=row["CHR"],
+                pos=row["POS"],
+                genes_df=geneinfo,
+                lead_snps_df=leads_df,
+                window=window,
+            )
-        record = {
-            **(row.to_dict()),
-            **(annotation if annotation is not None else {}),
-            **(prioritized if prioritized is not None else {}),
-        }
-        records.append(record)
+            record = {
+                **(row.to_dict()),
+                **(annotation if annotation is not None else {}),
+                **(prioritized if prioritized is not None else {}),
+            }
+            records.append(record)
-    locus_table = pd.DataFrame(records).sort_values(
-        ["CHR", "POS"], key=natsort.natsort_keygen()
-    )
+        locus_table = pd.DataFrame(records).sort_values(
+            ["CHR", "POS"], key=natsort.natsort_keygen()
+        )
+    else:
+        locus_table = leads_df
     if table_out is not None:
         locus_table.to_csv(table_out, index=False, sep="\t", na_rep="None")

pycmplot 0.2.0__tar.gz → 0.2.2__tar.gz

pycmplot 0.2.0tar.gz → 0.2.2tar.gz