PyPI - pycmplot - Versions diffs - 0.2.4__tar.gz → 0.2.6__tar.gz - Mend

pycmplot 0.2.4tar.gz → 0.2.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

pycmplot-0.2.6/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Kevin Esoh
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

{pycmplot-0.2.4/pycmplot.egg-info → pycmplot-0.2.6}/PKG-INFO RENAMED Viewed

@@ -1,15 +1,23 @@
 Metadata-Version: 2.4
 Name: pycmplot
-Version: 0.2.4
+Version: 0.2.6
 Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
 Author: Kevin Esoh
 Author-email: Kevin Esoh <kesohku1@jh.edu>
-License-Expression: CC-BY-NC-SA-4.0
+License-Expression: MIT
 Project-URL: Homepage, https://github.com/esohkevin/pycmplot
 Project-URL: Issues, https://github.com/esohkevin/pycmplot/issues
 Project-URL: Docs, https://pycmplot.readthedocs.io/en/latest/
 Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Classifier: Operating System :: OS Independent
+Classifier: Intended Audience :: Science/Research
+Classifier: Natural Language :: English
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
+Classifier: Topic :: Scientific/Engineering :: Visualization
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE

{pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/__init__.py RENAMED Viewed

@@ -12,7 +12,7 @@ Command-line::
 Python API::
     from pycmplot.io import prep_pycmplot_input_info, get_sumstats_and_merged_sector_list
-    from pycmplot.plotting import plot_linear, plot_circular
+    from pycmplot.plotting import plot_linear, plot_circular, plot_qq_single, plot_qq_separate, plot_qq_overlay, plot_qq_combined
     from pycmplot.stats import get_lead_snps
     from pycmplot.annotation import get_hits_summary_table
@@ -22,6 +22,7 @@ Public surface
 from pycmplot.plotting.linear import plot_linear
 from pycmplot.plotting.circular import plot_circular, compute_track_radii_dict
+from pycmplot.plotting.qq import plot_qq_single, plot_qq_separate, plot_qq_overlay, plot_qq_combined
 from pycmplot.stats import get_lead_snps, get_highlight_snps
 from pycmplot.io import prep_pycmplot_input_info, get_sumstats_and_merged_sector_list
 from pycmplot.annotation import get_hits_summary_table
@@ -31,6 +32,10 @@ from pycmplot.resources import ResourceConfig
 __all__ = [
     "plot_linear",
     "plot_circular",
+    "plot_qq_single",
+    "plot_qq_separate",
+    "plot_qq_overlay",
+    "plot_qq_combined",
     "compute_track_radii_dict",
     "get_lead_snps",
     "get_highlight_snps",
@@ -42,4 +47,4 @@ __all__ = [
     "ResourceConfig",
 ]
-__version__ = "0.2.4"
+__version__ = "0.2.6"

pycmplot-0.2.6/pycmplot/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Entry point for ``python -m pycmplot`` invocation."""
+from pycmplot._core import main
+if __name__ == "__main__":
+    main()

{pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/_core.py RENAMED Viewed

@@ -1,62 +1,62 @@
-from __future__ import annotations
-CORE_MODULE = """
+"""
 pycmplot._core
 ==============
 Main entry point that orchestrates CLI argument parsing, data loading, and
 plot dispatch.  This module is intentionally thin: it delegates all heavy
-work to :mod:`pycmplot.io`, :mod:`pycmplot.plotting.linear`, and
-:mod:`pycmplot.plotting.circular`.
+work to :mod:`pycmplot.io`, :mod:`pycmplot.plotting.linear`,
+:mod:`pycmplot.plotting.circular`, and :mod:`pycmplot.plotting.qq`.
 All imports are deferred inside :func:`main` so that
 ``import pycmplot`` remains fast regardless of the size of the dependency
 tree.
 """
+from __future__ import annotations
 import logging
 import warnings
+import sys
 # Suppress noisy font-manager warnings before any matplotlib import
 logging.getLogger("matplotlib.font_manager").setLevel(logging.ERROR)
 warnings.filterwarnings("ignore")
-logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
+logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s", stream=sys.stdout)
 logger = logging.getLogger(__name__)
 def main() -> None:
-    MAIN = """Orchestrate the full pycmplot pipeline from the command line.
+    """Orchestrate the full pycmplot pipeline from the command line.
     This function is registered as the ``pycmplot`` console-script entry point
     in ``pyproject.toml`` / ``setup.cfg``.  It performs the following steps in
     order:
     1. **Parse CLI arguments** via :func:`~pycmplot.cli.get_arguments`.
-    2. **Parse comma-separated inputs** (files, labels, colours, track heights)
-    into Python lists via
-    :func:`~pycmplot.io.strip_comma_separated_input_streams`.
+    2. **Parse comma-separated inputs** (files, labels, colours, track heights,
+       builds) into Python lists via
+       :func:`~pycmplot.io.strip_comma_separated_input_streams`.
     3. **Construct output paths** (plot image and locus summary table TSV) via
-    :func:`~pycmplot.io.get_output_paths`.
+       :func:`~pycmplot.io.get_output_paths`.
     4. **Resolve column names** for every input file via
-    :func:`~pycmplot.io.prep_pycmplot_input_info`.
+       :func:`~pycmplot.io.prep_pycmplot_input_info`.
     5. **Load data** — reads summary statistics, normalises chromosome names,
-    runs hg19 → hg38 liftover if needed, extracts lead SNPs, generates the
-    hits summary table, and computes merged Circos sector sizes via
-    :func:`~pycmplot.io.get_sumstats_and_merged_sector_list`.
-    6. **Dispatch plotting** — calls
-    :func:`~pycmplot.plotting.circular.plot_circular` when ``--mode cm``,
-    or :func:`~pycmplot.plotting.linear.plot_linear` otherwise.
-    Parameters
-    ----------
-    None
-        All input is taken from ``sys.argv`` via :mod:`argparse`.
+       runs hg19 → hg38 liftover if needed, extracts lead SNPs, generates the
+       hits summary table, and computes merged Circos sector sizes via
+       :func:`~pycmplot.io.get_sumstats_and_merged_sector_list`.
+    6. **Dispatch Manhattan plot** — calls
+       :func:`~pycmplot.plotting.circular.plot_circular` when ``--mode cm``,
+       or :func:`~pycmplot.plotting.linear.plot_linear` otherwise.
+    7. **Optional QQ plot** — when ``--qq_plot`` is set, dispatches to one of
+       :func:`~pycmplot.plotting.qq.plot_qq_combined` (default),
+       :func:`~pycmplot.plotting.qq.plot_qq_separate` (``--qq_separate``), or
+       :func:`~pycmplot.plotting.qq.plot_qq_overlay` (``--qq_overlay``).
     Returns
     -------
     None
-        Saves the plot image and locus summary table to the directory
+        Saves the plot image(s) and locus summary table to the directory
         specified by ``--output_dir``.
     Raises
@@ -94,6 +94,7 @@ def main() -> None:
     from pycmplot.plotting.circular import plot_circular
     from pycmplot.plotting.qq import plot_qq_combined, plot_qq_separate, plot_qq_overlay
     from pycmplot.resources import ResourceConfig
+    from pycmplot.annotation import get_annotation_column
     # ------------------------------------------------------------------
     # Parse CLI
@@ -147,7 +148,9 @@ def main() -> None:
     track_heights    = args.track_heights
     linear_track_spacing    = args.linear_track_spacing
     no_track_labels  = args.no_track_labels
+    ylabel           = args.ylabel
     chr_spacing      = args.chr_spacing
+    figure_size      = args.figure_size
     # ------------------------------------------------------------------
@@ -194,8 +197,8 @@ def main() -> None:
         pos = pos_arg,
         snp = snp_arg,
         pcol = pcol_arg,
-        buildc = buildc_arg,
-        build = builds
+        build_column = buildc_arg,
+        build_list = builds
     )
     # ------------------------------------------------------------------
@@ -206,38 +209,25 @@ def main() -> None:
     # ------------------------------------------------------------------
     # Load data, compute sectors, get hits table
     # ------------------------------------------------------------------
-    (
-        merged_assoc_sector_sizes,
-        sumstats_loaded,
-        hits_table,
-        signif_lines,
-        pval_dict,
-    ) = get_sumstats_and_merged_sector_list(
+    pycmplot_dict = get_sumstats_and_merged_sector_list(
         sum_stats=sum_stats,
         labels=labels,
         trim_pval=trim_pval,
         logp=logp,
         file_info=sumstats_hdr_dic,
         sort_tracks=sort_track,
-        table_out=table_out,
+        table_out=plt_base,
         signif_threshold=signif_threshold,
         signif_line=signif_line,
         suggest_threshold=suggest_threshold,
         resources=resources,
     )
-    # ------------------------------------------------------------------
-    # ANNOTATE BY
-    # ------------------------------------------------------------------
-    label_col = 'SNP'
-    if annotate:
-        if str(annotate).upper() == "GENE" and 'top_gene' in hits_table.columns:
-            label_col = 'top_gene'
-        elif label_col in hits_table.columns:
-            label_col = annotate
-        logger.info(f"Anotate by: {label_col}")
+    merged_assoc_sector_sizes = pycmplot_dict["sectors"]
+    sumstats_loaded = pycmplot_dict["dfs"]
+    hits_table = pycmplot_dict["annot"]
+    signif_lines = pycmplot_dict["lines"]
+    pval_dict = pycmplot_dict["pvals"]
     # ------------------------------------------------------------------
     # CIRCULAR MANHATTAN
@@ -260,7 +250,6 @@ def main() -> None:
             track_label_size = track_label_size,
             track_label_orientation = track_label_orientation,
             annotate = annotate,
-            label_col = label_col if annotate else None,
             annotation_size = annotation_size,
             hits_table = hits_table,
             sector_sizes = merged_assoc_sector_sizes,
@@ -280,6 +269,9 @@ def main() -> None:
     # ------------------------------------------------------------------
     else:
         logger.info("Generating LINEAR MANHATTAN Plot ...")
+        fsize = figure_size.strip(" ").split(",")
+        fsize = [int(v) for v in fsize]
+        logger.info(f"FIGURE SIZE: {fsize}")
         plot_linear(
             sumstats_loaded=sumstats_loaded,
             track_heights=t_heights,
@@ -291,19 +283,19 @@ def main() -> None:
             highlight_color=highlight_color,
             highlight_line=highlight_line,
             highlight_line_color=highlight_line_color,
-            annotate=annotate,
+            annotate=annotate,
             hits_table=hits_table if not hits_table.empty else None,
-            label_col=label_col if annotate else None,
             chr_spacing=chr_spacing,
             linear_track_spacing=linear_track_spacing,
             colors=colors,
             signif_lines=signif_lines,
             plot_title=plot_title,
             no_track_labels=no_track_labels,
+            ylabel=ylabel,
             dpi=dpi,
             output_format=output_format,
             output_dir=output_dir,
-            figsize=(15, 9)
+            figsize=fsize
         )
     # ------------------------------------------------------------------
@@ -316,6 +308,7 @@ def main() -> None:
         if qq_separate:
             plot_qq_separate(
                 pval_dict=pval_dict,
+                base_name=plot_title,
                 thin=qq_thin,
                 thin_below=thin_below,
                 max_points=qq_max_points,

{pycmplot-0.2.4 → pycmplot-0.2.6}/pycmplot/annotation.py RENAMED Viewed

@@ -1,6 +1,4 @@
-from __future__ import annotations
-MODULE_DOCSTRING = """
+"""
 pycmplot.annotation
 ====================
@@ -22,6 +20,8 @@ paths can be supplied via the ``PYCMPLOT_GENEINFO_HG38`` /
 ``PYCMPLOT_GENEINFO_HG19`` environment variables.
 """
+from __future__ import annotations
 import bisect
 import logging
 from typing import Optional
@@ -41,7 +41,7 @@ logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 def _build_genes_dict(genes_df: pd.DataFrame) -> dict:
-    BUILD_GENES_DICT = """Build a chromosome-keyed interval dictionary with sorted start positions.
+    """Build a chromosome-keyed interval dictionary with sorted start positions.
     Pre-processes the gene reference DataFrame into a structure that supports
     efficient O(log N) binary-search lookup of genes near a query position.
@@ -98,7 +98,7 @@ def _annotate_variant(
     window: int = 500_000,
     promoter_window: int = 2_000,
 ) -> dict:
-    ANNOTATE_VARIANT = """Return strand-aware nearest-gene annotation for a single variant.
+    """Return strand-aware nearest-gene annotation for a single variant.
     Searches the pre-built *genes_dict* within *window* bp of *pos* on
     *chrom*.  Reports the nearest upstream and downstream genes (relative to
@@ -238,8 +238,7 @@ def _annotate_and_prioritize_variant(
     promoter_window: int = 2_000,
     biotype_weights: Optional[dict] = None,
 ) -> Optional[dict]:
-    ANNOTATE_PRIORITIZE = """Score and rank candidate genes for a single variant using a composite
-    priority metric.
+    """Score and rank candidate genes for a single variant using a composite priority metric.
     Builds a candidate gene set within *window* bp of *pos* on *chrom*, then
     scores each candidate on four additive components:
@@ -386,7 +385,7 @@ def _annotate_and_prioritize_variant(
 # ---------------------------------------------------------------------------
 def _clump_by_distance(df: pd.DataFrame, window_kb: int = 500) -> pd.DataFrame:
-    CLUMP_BY_DISTANCE = """Reduce a lead-SNP table to one representative SNP per locus.
+    """Reduce a lead-SNP table to one representative SNP per locus.
     Applies greedy distance-based clumping within each chromosome group,
     starting from the most significant SNP (lowest ``P`` or highest ``logP``).
@@ -438,7 +437,7 @@ def get_hits_summary_table(
     table_out: Optional[str] = None,
     resources: Optional[ResourceConfig] = None,
 ) -> pd.DataFrame:
-    GET_HITS_SUMMARY_TABLE = """Annotate lead SNPs with nearest genes and write the locus summary table.
+    """Annotate lead SNPs with nearest genes and write the locus summary table.
     For each lead SNP in *leads_df*, runs two complementary annotation passes:
@@ -475,33 +474,21 @@ def get_hits_summary_table(
         Clumped locus summary table.  Contains all columns from *leads_df*
         plus annotation fields from both passes, including:
-        .. list-table::
-        :widths: 30 70
-        :header-rows: 1
-        * - Column
-            - Description
-        * - ``genic``
-            - ``True`` when the lead SNP overlaps a gene body
-        * - ``nearest_upstream_gene``
-            - Nearest upstream gene symbol (strand-aware)
-        * - ``upstream_distance``
-            - Distance to ``nearest_upstream_gene`` in bp
-        * - ``nearest_downstream_gene``
-            - Nearest downstream gene symbol (strand-aware)
-        * - ``downstream_distance``
-            - Distance to ``nearest_downstream_gene`` in bp
-        * - ``promoter_upstream_flag``
-            - ``True`` when the SNP is within 2 kb upstream of a TSS
-        * - ``gene_density``
-            - Number of genes within the search window
-        * - ``top_gene``
-            - Top-priority gene from the scoring pass
-        * - ``biotype``
-            - Ensembl biotype of ``top_gene`` (``'intergenic'`` when no
-            genic overlap)
-        * - ``priority_score``
-            - Composite priority score (genic hits only)
+        - ``genic`` — ``True`` when the lead SNP overlaps a gene body.
+        - ``nearest_upstream_gene`` — nearest upstream gene symbol
+          (strand-aware).
+        - ``upstream_distance`` — distance to ``nearest_upstream_gene`` in bp.
+        - ``nearest_downstream_gene`` — nearest downstream gene symbol
+          (strand-aware).
+        - ``downstream_distance`` — distance to ``nearest_downstream_gene`` in
+          bp.
+        - ``promoter_upstream_flag`` — ``True`` when the SNP is within 2 kb
+          upstream of a TSS.
+        - ``gene_density`` — number of genes within the search window.
+        - ``top_gene`` — top-priority gene from the scoring pass.
+        - ``biotype`` — Ensembl biotype of ``top_gene`` (``'intergenic'`` when
+          no genic overlap).
+        - ``priority_score`` — composite priority score (genic hits only).
     Notes
     -----
@@ -578,7 +565,41 @@ def get_hits_summary_table(
         locus_table = leads_df
     if table_out is not None:
-        locus_table.to_csv(table_out, index=False, sep="\t", na_rep="None")
-        logger.info("Locus summary written to: %s", table_out)
+        outpath = table_out.replace(" ", "_").lower() + '.tsv'
+        locus_table.to_csv(outpath, index=False, sep="\t", na_rep="None")
+        logger.info("Locus summary written to: %s", outpath)
     return _clump_by_distance(locus_table, window_kb=window_kb)
+def get_annotation_column(
+    annotate: str = None,
+    hits_table: pd.DataFrame = None,
+    label_col: str = None,
+):
+    if annotate and not hits_table.empty:
+        if label_col is not None and label_col in hits_table.columns:
+            label_clm = label_col
+        elif annotate in hits_table.columns:
+            label_clm = annotate
+        else:
+            if str(annotate).upper() == "GENE":
+                for i, (_, row) in enumerate(hits_table.iterrows()):
+                    try:
+                        if row["genic"]:
+                            label_clm = "nearest_upstream_gene"
+                            label_msg = "'POS' is genic"
+                        else:
+                            label_clm = "top_gene"
+                            label_msg = "'POS' is not genic"
+                        logger.info("%s", label_msg)
+                    except Exception:
+                        logger.warning(
+                            "Annotation columns '%s' and '%s' not found in hits table: %s; "
+                            "falling back to 'SNP'.", annotate, label_col, hits_table.columns.values,
+                        )
+                        label_clm = 'SNP'
+    logger.info("Annotating by: %s", label_clm)
+    return label_clm

pycmplot 0.2.4__tar.gz → 0.2.6__tar.gz

pycmplot 0.2.4tar.gz → 0.2.6tar.gz