PyPI - pylocuszoom - Versions diffs - 0.8.0__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

pylocuszoom 0.8.0py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

pylocuszoom/__init__.py +27 -7
pylocuszoom/_plotter_utils.py +66 -0
pylocuszoom/backends/base.py +56 -0
pylocuszoom/backends/bokeh_backend.py +141 -29
pylocuszoom/backends/matplotlib_backend.py +60 -0
pylocuszoom/backends/plotly_backend.py +297 -88
pylocuszoom/config.py +365 -0
pylocuszoom/ensembl.py +6 -11
pylocuszoom/eqtl.py +3 -7
pylocuszoom/exceptions.py +33 -0
pylocuszoom/finemapping.py +2 -7
pylocuszoom/forest.py +1 -0
pylocuszoom/gene_track.py +10 -31
pylocuszoom/labels.py +6 -2
pylocuszoom/manhattan.py +246 -0
pylocuszoom/manhattan_plotter.py +760 -0
pylocuszoom/plotter.py +401 -327
pylocuszoom/qq.py +123 -0
pylocuszoom/recombination.py +7 -7
pylocuszoom/schemas.py +1 -6
pylocuszoom/stats_plotter.py +319 -0
pylocuszoom/utils.py +2 -4
pylocuszoom/validation.py +51 -0
{pylocuszoom-0.8.0.dist-info → pylocuszoom-1.1.0.dist-info}/METADATA +159 -25
pylocuszoom-1.1.0.dist-info/RECORD +36 -0
pylocuszoom-0.8.0.dist-info/RECORD +0 -29
{pylocuszoom-0.8.0.dist-info → pylocuszoom-1.1.0.dist-info}/WHEEL +0 -0
{pylocuszoom-0.8.0.dist-info → pylocuszoom-1.1.0.dist-info}/licenses/LICENSE.md +0 -0

pylocuszoom/plotter.py CHANGED Viewed

@@ -15,7 +15,9 @@ from typing import Any, List, Optional, Tuple
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+import requests
+from ._plotter_utils import DEFAULT_GENOMEWIDE_THRESHOLD
 from .backends import BackendType, get_backend
 from .backends.hover import HoverConfig, HoverDataBuilder
 from .colors import (
@@ -28,32 +30,31 @@ from .colors import (
     get_eqtl_color,
     get_ld_bin,
     get_ld_color_palette,
-    get_phewas_category_palette,
 )
+from .config import PlotConfig, StackedPlotConfig
 from .ensembl import get_genes_for_region
 from .eqtl import validate_eqtl_df
 from .finemapping import (
     get_credible_sets,
     prepare_finemapping_for_plotting,
 )
-from .forest import validate_forest_df
 from .gene_track import (
     assign_gene_positions,
     plot_gene_track_generic,
 )
 from .ld import calculate_ld, find_plink
 from .logging import enable_logging, logger
-from .phewas import validate_phewas_df
+from .manhattan_plotter import ManhattanPlotter
 from .recombination import (
     RECOMB_COLOR,
     download_canine_recombination_maps,
     get_default_data_dir,
     get_recombination_rate_for_region,
 )
+from .stats_plotter import StatsPlotter
 from .utils import normalize_chrom, validate_genes_df, validate_gwas_df
-# Default significance threshold: 5e-8 (genome-wide significance)
-DEFAULT_GENOMEWIDE_THRESHOLD = 5e-8
+# Precomputed significance line value (used for plotting)
 DEFAULT_GENOMEWIDE_LINE = -np.log10(DEFAULT_GENOMEWIDE_THRESHOLD)
@@ -138,6 +139,7 @@ class LocusZoomPlotter:
             genome_build if genome_build else self._default_build(species)
         )
         self._backend = get_backend(backend)
+        self._backend_name = backend  # Store for delegation to child plotters
         self.plink_path = plink_path or find_plink()
         self.recomb_data_dir = recomb_data_dir
         self.genomewide_threshold = genomewide_threshold
@@ -147,6 +149,27 @@ class LocusZoomPlotter:
         # Cache for loaded data
         self._recomb_cache = {}
+    @property
+    def _manhattan_plotter(self) -> ManhattanPlotter:
+        """Lazy-load ManhattanPlotter with shared configuration."""
+        if not hasattr(self, "_manhattan_plotter_instance"):
+            self._manhattan_plotter_instance = ManhattanPlotter(
+                species=self.species,
+                backend=self._backend_name,
+                genomewide_threshold=self.genomewide_threshold,
+            )
+        return self._manhattan_plotter_instance
+    @property
+    def _stats_plotter(self) -> StatsPlotter:
+        """Lazy-load StatsPlotter with shared configuration."""
+        if not hasattr(self, "_stats_plotter_instance"):
+            self._stats_plotter_instance = StatsPlotter(
+                backend=self._backend_name,
+                genomewide_threshold=self.genomewide_threshold,
+            )
+        return self._stats_plotter_instance
     @staticmethod
     def _default_build(species: str) -> Optional[str]:
         """Get default genome build for species."""
@@ -171,9 +194,17 @@ class LocusZoomPlotter:
             # Download
             try:
                 return download_canine_recombination_maps()
-            except Exception as e:
+            except (requests.RequestException, OSError, IOError) as e:
+                # Expected network/file errors - graceful fallback
                 logger.warning(f"Could not download recombination maps: {e}")
                 return None
+            except Exception as e:
+                # JUSTIFICATION: Download failure should not prevent plotting.
+                # We catch broadly here because graceful degradation is acceptable
+                # for optional recombination map downloads. Error-level logging
+                # ensures the issue is visible.
+                logger.error(f"Unexpected error downloading recombination maps: {e}")
+                return None
         elif self.recomb_data_dir:
             return Path(self.recomb_data_dir)
         return None
@@ -204,56 +235,96 @@ class LocusZoomPlotter:
         except FileNotFoundError:
             return None
+    def _transform_pvalues(self, df: pd.DataFrame, p_col: str) -> pd.DataFrame:
+        """Add neglog10p column with -log10 transformed p-values.
+        Delegates to shared utility function. Assumes df is already a copy.
+        Args:
+            df: DataFrame with p-value column (should be a copy).
+            p_col: Name of p-value column.
+        Returns:
+            DataFrame with neglog10p column added.
+        """
+        # Use shared utility - note: df should already be a copy at call sites
+        df["neglog10p"] = -np.log10(df[p_col].clip(lower=1e-300))
+        return df
     def plot(
         self,
         gwas_df: pd.DataFrame,
+        *,
         chrom: int,
         start: int,
         end: int,
+        pos_col: str = "ps",
+        p_col: str = "p_wald",
+        rs_col: str = "rs",
+        snp_labels: bool = True,
+        label_top_n: int = 5,
+        show_recombination: bool = True,
+        figsize: Tuple[float, float] = (12.0, 8.0),
         lead_pos: Optional[int] = None,
         ld_reference_file: Optional[str] = None,
         ld_col: Optional[str] = None,
         genes_df: Optional[pd.DataFrame] = None,
         exons_df: Optional[pd.DataFrame] = None,
         recomb_df: Optional[pd.DataFrame] = None,
-        show_recombination: bool = True,
-        snp_labels: bool = True,
-        label_top_n: int = 5,
-        pos_col: str = "ps",
-        p_col: str = "p_wald",
-        rs_col: str = "rs",
-        figsize: Tuple[int, int] = (12, 8),
     ) -> Any:
         """Create a regional association plot.
         Args:
             gwas_df: GWAS results DataFrame.
             chrom: Chromosome number.
-            start: Start position of the region.
-            end: End position of the region.
-            lead_pos: Position of the lead/index SNP to highlight.
-            ld_reference_file: PLINK binary fileset for LD calculation.
-                If provided with lead_pos, calculates LD on the fly.
-            ld_col: Column name for pre-computed LD (R²) values.
-                Use this if LD was calculated externally.
+            start: Start position in base pairs.
+            end: End position in base pairs.
+            pos_col: Column name for genomic position.
+            p_col: Column name for p-value.
+            rs_col: Column name for SNP identifier.
+            snp_labels: Whether to show SNP labels on plot.
+            label_top_n: Number of top SNPs to label.
+            show_recombination: Whether to show recombination rate overlay.
+            figsize: Figure size as (width, height) in inches.
+            lead_pos: Position of lead SNP to highlight. For stacked plots with
+                multiple regions, use plot_stacked() with lead_positions (plural).
+            ld_reference_file: Path to PLINK binary fileset for LD calculation.
+            ld_col: Column name for pre-computed LD (R^2) values.
             genes_df: Gene annotations with chr, start, end, gene_name.
             exons_df: Exon annotations with chr, start, end, gene_name.
             recomb_df: Pre-loaded recombination rate data.
                 If None and show_recombination=True, loads from species default.
-            show_recombination: Whether to show recombination rate overlay.
-            snp_labels: Whether to label top SNPs.
-            label_top_n: Number of top SNPs to label.
-            pos_col: Column name for position.
-            p_col: Column name for p-value.
-            rs_col: Column name for SNP ID.
-            figsize: Figure size.
         Returns:
-            Matplotlib Figure object.
+            Figure object (type depends on backend).
         Raises:
-            ValidationError: If required DataFrame columns are missing.
+            ValidationError: If parameters or DataFrame columns are invalid.
+        Example:
+            >>> fig = plotter.plot(
+            ...     gwas_df,
+            ...     chrom=1, start=1000000, end=2000000,
+            ...     lead_pos=1500000, snp_labels=True,
+            ... )
         """
+        # Validate parameters via Pydantic
+        PlotConfig.from_kwargs(
+            chrom=chrom,
+            start=start,
+            end=end,
+            pos_col=pos_col,
+            p_col=p_col,
+            rs_col=rs_col,
+            snp_labels=snp_labels,
+            label_top_n=label_top_n,
+            show_recombination=show_recombination,
+            figsize=figsize,
+            lead_pos=lead_pos,
+            ld_reference_file=ld_reference_file,
+            ld_col=ld_col,
+        )
         # Validate inputs
         validate_gwas_df(gwas_df, pos_col=pos_col, p_col=p_col)
@@ -282,7 +353,24 @@ class LocusZoomPlotter:
         # Prepare data
         df = gwas_df.copy()
-        df["neglog10p"] = -np.log10(df[p_col].clip(lower=1e-300))
+        # Validate p-values and warn about issues
+        p_values = df[p_col]
+        nan_count = p_values.isna().sum()
+        if nan_count > 0:
+            logger.warning(
+                f"GWAS data contains {nan_count} NaN p-values which will be excluded"
+            )
+        invalid_count = ((p_values < 0) | (p_values > 1)).sum()
+        if invalid_count > 0:
+            logger.warning(
+                f"GWAS data contains {invalid_count} p-values outside [0, 1] range"
+            )
+        clipped_count = (p_values < 1e-300).sum()
+        if clipped_count > 0:
+            logger.debug(f"Clipping {clipped_count} p-values below 1e-300 to 1e-300")
+        df = self._transform_pvalues(df, p_col)
         # Calculate LD if reference file provided
         if ld_reference_file and lead_pos and ld_col is None:
@@ -351,7 +439,12 @@ class LocusZoomPlotter:
         # Format axes
         self._backend.set_ylabel(ax, r"$-\log_{10}$ P")
         self._backend.set_xlim(ax, start, end)
-        self._backend.hide_spines(ax, ["top", "right"])
+        # When recombination overlay is present, keep right spine for secondary y-axis
+        has_recomb = recomb_df is not None and not recomb_df.empty
+        if has_recomb and self._backend.supports_secondary_axis:
+            self._backend.hide_spines(ax, ["top"])
+        else:
+            self._backend.hide_spines(ax, ["top", "right"])
         # Add LD legend (all backends)
         if ld_col is not None and ld_col in df.columns:
@@ -364,10 +457,12 @@ class LocusZoomPlotter:
             )
             self._backend.set_xlabel(gene_ax, f"Chromosome {chrom} (Mb)")
             self._backend.hide_spines(gene_ax, ["top", "right", "left"])
+            # Format both axes for interactive backends (they don't share x-axis)
+            self._backend.format_xaxis_mb(gene_ax)
         else:
             self._backend.set_xlabel(ax, f"Chromosome {chrom} (Mb)")
-        # Format x-axis with Mb labels
+        # Format x-axis with Mb labels (association axis always needs formatting)
         self._backend.format_xaxis_mb(ax)
         # Adjust layout
@@ -516,18 +611,29 @@ class LocusZoomPlotter:
             return
         # Create secondary y-axis
-        yaxis_name = self._backend.create_twin_axis(ax)
-        # For plotly, yaxis_name is a tuple (fig, row, secondary_y)
-        # For bokeh, yaxis_name is just a string
-        if isinstance(yaxis_name, tuple):
-            _, _, secondary_y = yaxis_name
+        twin_result = self._backend.create_twin_axis(ax)
+        # Matplotlib returns the twin Axes object itself - use it for drawing
+        # Plotly returns tuple (fig, row, secondary_y_name)
+        # Bokeh returns string "secondary"
+        from matplotlib.axes import Axes
+        if isinstance(twin_result, Axes):
+            # Matplotlib: use the twin axis for all secondary axis operations
+            secondary_ax = twin_result
+            secondary_y = None  # Not used for matplotlib
+        elif isinstance(twin_result, tuple):
+            # Plotly: use original ax, specify y-axis via yaxis_name
+            secondary_ax = ax
+            _, _, secondary_y = twin_result
         else:
-            secondary_y = yaxis_name
+            # Bokeh: use original ax, specify y-axis via yaxis_name
+            secondary_ax = ax
+            secondary_y = twin_result
         # Plot fill under curve
         self._backend.fill_between_secondary(
-            ax,
+            secondary_ax,
             region_recomb["pos"],
             0,
             region_recomb["rate"],
@@ -538,28 +644,32 @@ class LocusZoomPlotter:
         # Plot recombination rate line
         self._backend.line_secondary(
-            ax,
+            secondary_ax,
             region_recomb["pos"],
             region_recomb["rate"],
             color=RECOMB_COLOR,
-            linewidth=1.5,
-            alpha=0.7,
+            linewidth=2.5,
+            alpha=0.8,
             yaxis_name=secondary_y,
         )
-        # Set y-axis limits and label
+        # Set y-axis limits and label - scale to fit data with headroom
         max_rate = region_recomb["rate"].max()
         self._backend.set_secondary_ylim(
-            ax, 0, max(max_rate * 1.2, 20), yaxis_name=secondary_y
+            secondary_ax, 0, max(max_rate * 1.3, 10), yaxis_name=secondary_y
         )
         self._backend.set_secondary_ylabel(
-            ax,
+            secondary_ax,
             "Recombination rate (cM/Mb)",
-            color=RECOMB_COLOR,
+            color="black",  # Use black for readability (line/fill color remains light blue)
             fontsize=9,
             yaxis_name=secondary_y,
         )
+        # Hide top spine on the secondary axis (matplotlib twin axis has its own frame)
+        if isinstance(twin_result, Axes):
+            secondary_ax.spines["top"].set_visible(False)
     def _plot_finemapping(
         self,
         ax: Any,
@@ -664,14 +774,22 @@ class LocusZoomPlotter:
     def plot_stacked(
         self,
         gwas_dfs: List[pd.DataFrame],
+        *,
         chrom: int,
         start: int,
         end: int,
+        pos_col: str = "ps",
+        p_col: str = "p_wald",
+        rs_col: str = "rs",
+        snp_labels: bool = True,
+        label_top_n: int = 3,
+        show_recombination: bool = True,
+        figsize: Tuple[float, float] = (12.0, 8.0),
+        ld_reference_file: Optional[str] = None,
+        ld_col: Optional[str] = None,
         lead_positions: Optional[List[int]] = None,
         panel_labels: Optional[List[str]] = None,
-        ld_reference_file: Optional[str] = None,
         ld_reference_files: Optional[List[str]] = None,
-        ld_col: Optional[str] = None,
         genes_df: Optional[pd.DataFrame] = None,
         exons_df: Optional[pd.DataFrame] = None,
         eqtl_df: Optional[pd.DataFrame] = None,
@@ -679,13 +797,6 @@ class LocusZoomPlotter:
         finemapping_df: Optional[pd.DataFrame] = None,
         finemapping_cs_col: Optional[str] = "cs",
         recomb_df: Optional[pd.DataFrame] = None,
-        show_recombination: bool = True,
-        snp_labels: bool = True,
-        label_top_n: int = 3,
-        pos_col: str = "ps",
-        p_col: str = "p_wald",
-        rs_col: str = "rs",
-        figsize: Tuple[float, Optional[float]] = (12, None),
     ) -> Any:
         """Create stacked regional association plots for multiple GWAS.
@@ -695,30 +806,29 @@ class LocusZoomPlotter:
         Args:
             gwas_dfs: List of GWAS results DataFrames to stack.
             chrom: Chromosome number.
-            start: Start position of the region.
-            end: End position of the region.
-            lead_positions: List of lead SNP positions (one per GWAS).
-                If None, auto-detects from lowest p-value.
-            panel_labels: Labels for each panel (e.g., phenotype names).
-            ld_reference_file: Single PLINK fileset for all panels.
+            start: Start position in base pairs.
+            end: End position in base pairs.
+            pos_col: Column name for genomic position.
+            p_col: Column name for p-value.
+            rs_col: Column name for SNP identifier.
+            snp_labels: Whether to show SNP labels on plot.
+            label_top_n: Number of top SNPs to label (default 3 for stacked).
+            show_recombination: Whether to show recombination rate overlay.
+            figsize: Figure size as (width, height) in inches.
+            ld_reference_file: Single PLINK fileset (broadcast to all panels).
+            ld_col: Column name for pre-computed LD (R^2) values.
+            lead_positions: List of lead SNP positions, one per region. For single
+                region plots, use plot() with lead_pos (singular).
+            panel_labels: List of panel labels (one per panel).
             ld_reference_files: List of PLINK filesets (one per panel).
-            ld_col: Column name for pre-computed LD (R²) values in each DataFrame.
-                Use this if LD was calculated externally.
             genes_df: Gene annotations for bottom track.
             exons_df: Exon annotations for gene track.
             eqtl_df: eQTL data to display as additional panel.
             eqtl_gene: Filter eQTL data to this target gene.
             finemapping_df: Fine-mapping/SuSiE results with pos and pip columns.
                 Displayed as PIP line with optional credible set coloring.
-            finemapping_cs_col: Column name for credible set assignment in finemapping_df.
+            finemapping_cs_col: Column name for credible set assignment.
             recomb_df: Pre-loaded recombination rate data.
-            show_recombination: Whether to show recombination overlay.
-            snp_labels: Whether to label top SNPs.
-            label_top_n: Number of top SNPs to label per panel.
-            pos_col: Column name for position.
-            p_col: Column name for p-value.
-            rs_col: Column name for SNP ID.
-            figsize: Figure size (width, height). If height is None, auto-calculates.
         Returns:
             Figure object (type depends on backend).
@@ -728,9 +838,27 @@ class LocusZoomPlotter:
             ...     [gwas_height, gwas_bmi, gwas_whr],
             ...     chrom=1, start=1000000, end=2000000,
             ...     panel_labels=["Height", "BMI", "WHR"],
-            ...     genes_df=genes_df,
             ... )
         """
+        # Validate parameters via Pydantic
+        StackedPlotConfig.from_kwargs(
+            chrom=chrom,
+            start=start,
+            end=end,
+            pos_col=pos_col,
+            p_col=p_col,
+            rs_col=rs_col,
+            snp_labels=snp_labels,
+            label_top_n=label_top_n,
+            show_recombination=show_recombination,
+            figsize=figsize,
+            ld_reference_file=ld_reference_file,
+            ld_col=ld_col,
+            lead_positions=lead_positions,
+            panel_labels=panel_labels,
+            ld_reference_files=ld_reference_files,
+        )
         n_gwas = len(gwas_dfs)
         if n_gwas == 0:
             raise ValueError("At least one GWAS DataFrame required")
@@ -766,8 +894,16 @@ class LocusZoomPlotter:
             for df in gwas_dfs:
                 region_df = df[(df[pos_col] >= start) & (df[pos_col] <= end)]
                 if not region_df.empty:
-                    lead_idx = region_df[p_col].idxmin()
-                    lead_positions.append(int(region_df.loc[lead_idx, pos_col]))
+                    # Filter out NaN p-values for lead SNP detection
+                    valid_p = region_df[p_col].dropna()
+                    if valid_p.empty:
+                        logger.warning(
+                            "All p-values in region are NaN, cannot determine lead SNP"
+                        )
+                        lead_positions.append(None)
+                    else:
+                        lead_idx = valid_p.idxmin()
+                        lead_positions.append(int(region_df.loc[lead_idx, pos_col]))
                 else:
                     lead_positions.append(None)
@@ -841,24 +977,34 @@ class LocusZoomPlotter:
         for i, (gwas_df, lead_pos) in enumerate(zip(gwas_dfs, lead_positions)):
             ax = axes[i]
             df = gwas_df.copy()
-            df["neglog10p"] = -np.log10(df[p_col].clip(lower=1e-300))
+            df = self._transform_pvalues(df, p_col)
             # Use pre-computed LD or calculate from reference
             panel_ld_col = ld_col
             if ld_reference_files and ld_reference_files[i] and lead_pos and not ld_col:
-                lead_snp_row = df[df[pos_col] == lead_pos]
-                if not lead_snp_row.empty and rs_col in df.columns:
-                    lead_snp_id = lead_snp_row[rs_col].iloc[0]
-                    ld_df = calculate_ld(
-                        bfile_path=ld_reference_files[i],
-                        lead_snp=lead_snp_id,
-                        window_kb=max((end - start) // 1000, 500),
-                        plink_path=self.plink_path,
-                        species=self.species,
+                # Check if rs_col exists before attempting LD calculation
+                if rs_col not in df.columns:
+                    logger.warning(
+                        f"Cannot calculate LD for panel {i + 1}: column '{rs_col}' "
+                        f"not found in GWAS data. "
+                        f"Provide rs_col parameter or add SNP IDs to DataFrame."
                     )
-                    if not ld_df.empty:
-                        df = df.merge(ld_df, left_on=rs_col, right_on="SNP", how="left")
-                        panel_ld_col = "R2"
+                else:
+                    lead_snp_row = df[df[pos_col] == lead_pos]
+                    if not lead_snp_row.empty:
+                        lead_snp_id = lead_snp_row[rs_col].iloc[0]
+                        ld_df = calculate_ld(
+                            bfile_path=ld_reference_files[i],
+                            lead_snp=lead_snp_id,
+                            window_kb=max((end - start) // 1000, 500),
+                            plink_path=self.plink_path,
+                            species=self.species,
+                        )
+                        if not ld_df.empty:
+                            df = df.merge(
+                                ld_df, left_on=rs_col, right_on="SNP", how="left"
+                            )
+                            panel_ld_col = "R2"
             # Plot association
             self._plot_association(
@@ -953,8 +1099,16 @@ class LocusZoomPlotter:
             eqtl_data = eqtl_df.copy()
             # Filter by gene if specified
-            if eqtl_gene and "gene" in eqtl_data.columns:
-                eqtl_data = eqtl_data[eqtl_data["gene"] == eqtl_gene]
+            eqtl_gene_filtered = False
+            if eqtl_gene:
+                if "gene" in eqtl_data.columns:
+                    eqtl_data = eqtl_data[eqtl_data["gene"] == eqtl_gene]
+                    eqtl_gene_filtered = True
+                else:
+                    logger.warning(
+                        f"eqtl_gene='{eqtl_gene}' specified but eQTL data has no 'gene' column; "
+                        "showing all eQTL data unfiltered"
+                    )
             # Filter by region (position and chromosome)
             if "pos" in eqtl_data.columns:
@@ -969,9 +1123,7 @@ class LocusZoomPlotter:
                 eqtl_data = eqtl_data[mask]
             if not eqtl_data.empty:
-                eqtl_data["neglog10p"] = -np.log10(
-                    eqtl_data["p_value"].clip(lower=1e-300)
-                )
+                eqtl_data = self._transform_pvalues(eqtl_data, "p_value")
                 # Build hover data using HoverDataBuilder
                 eqtl_extra_cols = {}
@@ -990,47 +1142,49 @@ class LocusZoomPlotter:
                 has_effect = "effect_size" in eqtl_data.columns
                 if has_effect:
-                    # Plot triangles by effect direction (batch by sign for efficiency)
+                    # Vectorized plotting: split by sign, assign colors in bulk
                     pos_effects = eqtl_data[eqtl_data["effect_size"] >= 0]
                     neg_effects = eqtl_data[eqtl_data["effect_size"] < 0]
-                    # Plot positive effects (up triangles)
-                    for _, row in pos_effects.iterrows():
-                        row_df = pd.DataFrame([row])
+                    # Vectorized color assignment using apply
+                    if not pos_effects.empty:
+                        pos_colors = pos_effects["effect_size"].apply(get_eqtl_color)
                         self._backend.scatter(
                             ax,
-                            pd.Series([row["pos"]]),
-                            pd.Series([row["neglog10p"]]),
-                            colors=get_eqtl_color(row["effect_size"]),
+                            pos_effects["pos"],
+                            pos_effects["neglog10p"],
+                            colors=pos_colors.tolist(),
                             sizes=50,
                             marker="^",
                             edgecolor="black",
                             linewidth=0.5,
                             zorder=2,
-                            hover_data=eqtl_hover_builder.build_dataframe(row_df),
+                            hover_data=eqtl_hover_builder.build_dataframe(pos_effects),
                         )
-                    # Plot negative effects (down triangles)
-                    for _, row in neg_effects.iterrows():
-                        row_df = pd.DataFrame([row])
+                    if not neg_effects.empty:
+                        neg_colors = neg_effects["effect_size"].apply(get_eqtl_color)
                         self._backend.scatter(
                             ax,
-                            pd.Series([row["pos"]]),
-                            pd.Series([row["neglog10p"]]),
-                            colors=get_eqtl_color(row["effect_size"]),
+                            neg_effects["pos"],
+                            neg_effects["neglog10p"],
+                            colors=neg_colors.tolist(),
                             sizes=50,
                             marker="v",
                             edgecolor="black",
                             linewidth=0.5,
                             zorder=2,
-                            hover_data=eqtl_hover_builder.build_dataframe(row_df),
+                            hover_data=eqtl_hover_builder.build_dataframe(neg_effects),
                         )
                     # Add eQTL effect legend (all backends)
                     self._backend.add_eqtl_legend(
                         ax, EQTL_POSITIVE_BINS, EQTL_NEGATIVE_BINS
                     )
                 else:
                     # No effect sizes - plot as diamonds
-                    label = f"eQTL ({eqtl_gene})" if eqtl_gene else "eQTL"
+                    # Only show gene in label if filtering was actually applied
+                    label = f"eQTL ({eqtl_gene})" if eqtl_gene_filtered else "eQTL"
                     self._backend.scatter(
                         ax,
                         eqtl_data["pos"],
@@ -1090,124 +1244,17 @@ class LocusZoomPlotter:
         significance_threshold: float = 5e-8,
         figsize: Tuple[float, float] = (10, 8),
     ) -> Any:
-        """Create a PheWAS (Phenome-Wide Association Study) plot.
-        Shows associations of a single variant across multiple phenotypes,
-        with phenotypes grouped by category and colored accordingly.
-        Args:
-            phewas_df: DataFrame with phenotype associations.
-            variant_id: Variant identifier (e.g., "rs12345") for plot title.
-            phenotype_col: Column name for phenotype names.
-            p_col: Column name for p-values.
-            category_col: Column name for phenotype categories.
-            effect_col: Optional column name for effect direction (beta/OR).
-            significance_threshold: P-value threshold for significance line.
-            figsize: Figure size as (width, height).
-        Returns:
-            Figure object (type depends on backend).
-        Example:
-            >>> fig = plotter.plot_phewas(
-            ...     phewas_df,
-            ...     variant_id="rs12345",
-            ...     category_col="category",
-            ... )
-        """
-        validate_phewas_df(phewas_df, phenotype_col, p_col, category_col)
-        df = phewas_df.copy()
-        df["neglog10p"] = -np.log10(df[p_col].clip(lower=1e-300))
-        # Sort by category then by p-value for consistent ordering
-        if category_col in df.columns:
-            df = df.sort_values([category_col, p_col])
-            categories = df[category_col].unique().tolist()
-            palette = get_phewas_category_palette(categories)
-        else:
-            df = df.sort_values(p_col)
-            categories = []
-            palette = {}
-        # Create figure
-        fig, axes = self._backend.create_figure(
-            n_panels=1,
-            height_ratios=[1.0],
+        """Create a PheWAS plot. See StatsPlotter.plot_phewas for docs."""
+        return self._stats_plotter.plot_phewas(
+            phewas_df=phewas_df,
+            variant_id=variant_id,
+            phenotype_col=phenotype_col,
+            p_col=p_col,
+            category_col=category_col,
+            effect_col=effect_col,
+            significance_threshold=significance_threshold,
             figsize=figsize,
         )
-        ax = axes[0]
-        # Assign y-positions (one per phenotype)
-        df["y_pos"] = range(len(df))
-        # Plot points by category
-        if categories:
-            for cat in categories:
-                cat_data = df[df[category_col] == cat]
-                # Use upward triangles for positive effects, circles otherwise
-                if effect_col and effect_col in cat_data.columns:
-                    for _, row in cat_data.iterrows():
-                        marker = "^" if row[effect_col] >= 0 else "v"
-                        self._backend.scatter(
-                            ax,
-                            pd.Series([row["neglog10p"]]),
-                            pd.Series([row["y_pos"]]),
-                            colors=palette[cat],
-                            sizes=60,
-                            marker=marker,
-                            edgecolor="black",
-                            linewidth=0.5,
-                            zorder=2,
-                        )
-                else:
-                    self._backend.scatter(
-                        ax,
-                        cat_data["neglog10p"],
-                        cat_data["y_pos"],
-                        colors=palette[cat],
-                        sizes=60,
-                        marker="o",
-                        edgecolor="black",
-                        linewidth=0.5,
-                        zorder=2,
-                    )
-        else:
-            self._backend.scatter(
-                ax,
-                df["neglog10p"],
-                df["y_pos"],
-                colors="#4169E1",
-                sizes=60,
-                edgecolor="black",
-                linewidth=0.5,
-                zorder=2,
-            )
-        # Add significance threshold line
-        sig_line = -np.log10(significance_threshold)
-        self._backend.axvline(
-            ax, x=sig_line, color="red", linestyle="--", linewidth=1, alpha=0.7
-        )
-        # Set axis labels and limits
-        self._backend.set_xlabel(ax, r"$-\log_{10}$ P")
-        self._backend.set_ylabel(ax, "Phenotype")
-        self._backend.set_ylim(ax, -0.5, len(df) - 0.5)
-        # Set y-tick labels to phenotype names
-        self._backend.set_yticks(
-            ax,
-            positions=df["y_pos"].tolist(),
-            labels=df[phenotype_col].tolist(),
-            fontsize=8,
-        )
-        self._backend.set_title(ax, f"PheWAS: {variant_id}")
-        self._backend.hide_spines(ax, ["top", "right"])
-        self._backend.finalize_layout(fig)
-        return fig
     def plot_forest(
         self,
@@ -1222,116 +1269,143 @@ class LocusZoomPlotter:
         effect_label: str = "Effect Size",
         figsize: Tuple[float, float] = (8, 6),
     ) -> Any:
-        """Create a forest plot showing effect sizes with confidence intervals.
-        Args:
-            forest_df: DataFrame with effect sizes and confidence intervals.
-            variant_id: Variant identifier for plot title.
-            study_col: Column name for study/phenotype names.
-            effect_col: Column name for effect sizes.
-            ci_lower_col: Column name for lower confidence interval.
-            ci_upper_col: Column name for upper confidence interval.
-            weight_col: Optional column for study weights (affects marker size).
-            null_value: Reference value for null effect (0 for beta, 1 for OR).
-            effect_label: X-axis label.
-            figsize: Figure size as (width, height).
-        Returns:
-            Figure object (type depends on backend).
-        Example:
-            >>> fig = plotter.plot_forest(
-            ...     forest_df,
-            ...     variant_id="rs12345",
-            ...     effect_label="Odds Ratio",
-            ...     null_value=1.0,
-            ... )
-        """
-        validate_forest_df(forest_df, study_col, effect_col, ci_lower_col, ci_upper_col)
-        df = forest_df.copy()
-        # Create figure
-        fig, axes = self._backend.create_figure(
-            n_panels=1,
-            height_ratios=[1.0],
+        """Create a forest plot. See StatsPlotter.plot_forest for docs."""
+        return self._stats_plotter.plot_forest(
+            forest_df=forest_df,
+            variant_id=variant_id,
+            study_col=study_col,
+            effect_col=effect_col,
+            ci_lower_col=ci_lower_col,
+            ci_upper_col=ci_upper_col,
+            weight_col=weight_col,
+            null_value=null_value,
+            effect_label=effect_label,
             figsize=figsize,
         )
-        ax = axes[0]
-        # Assign y-positions (reverse so first study is at top)
-        df["y_pos"] = range(len(df) - 1, -1, -1)
-        # Calculate marker sizes from weights
-        if weight_col and weight_col in df.columns:
-            # Scale weights to marker sizes (min 40, max 200)
-            weights = df[weight_col]
-            min_size, max_size = 40, 200
-            weight_range = weights.max() - weights.min()
-            if weight_range > 0:
-                sizes = min_size + (weights - weights.min()) / weight_range * (
-                    max_size - min_size
-                )
-            else:
-                sizes = (min_size + max_size) / 2
-        else:
-            sizes = 80
-        # Calculate error bar extents
-        xerr_lower = df[effect_col] - df[ci_lower_col]
-        xerr_upper = df[ci_upper_col] - df[effect_col]
-        # Plot error bars (confidence intervals)
-        self._backend.errorbar_h(
-            ax,
-            x=df[effect_col],
-            y=df["y_pos"],
-            xerr_lower=xerr_lower,
-            xerr_upper=xerr_upper,
-            color="black",
-            linewidth=1.5,
-            capsize=3,
-            zorder=2,
+    def plot_manhattan(
+        self,
+        df: pd.DataFrame,
+        chrom_col: str = "chrom",
+        pos_col: str = "pos",
+        p_col: str = "p",
+        custom_chrom_order: Optional[List[str]] = None,
+        category_col: Optional[str] = None,
+        category_order: Optional[List[str]] = None,
+        significance_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
+        figsize: Tuple[float, float] = (12, 5),
+        title: Optional[str] = None,
+    ) -> Any:
+        """Create a Manhattan plot. See ManhattanPlotter.plot_manhattan for docs."""
+        return self._manhattan_plotter.plot_manhattan(
+            df=df,
+            chrom_col=chrom_col,
+            pos_col=pos_col,
+            p_col=p_col,
+            custom_chrom_order=custom_chrom_order,
+            category_col=category_col,
+            category_order=category_order,
+            significance_threshold=significance_threshold,
+            figsize=figsize,
+            title=title,
         )
-        # Plot effect size markers
-        self._backend.scatter(
-            ax,
-            df[effect_col],
-            df["y_pos"],
-            colors="#4169E1",
-            sizes=sizes,
-            marker="s",  # square markers typical for forest plots
-            edgecolor="black",
-            linewidth=0.5,
-            zorder=3,
+    def plot_qq(
+        self,
+        df: pd.DataFrame,
+        p_col: str = "p",
+        show_confidence_band: bool = True,
+        show_lambda: bool = True,
+        figsize: Tuple[float, float] = (6, 6),
+        title: Optional[str] = None,
+    ) -> Any:
+        """Create a QQ plot. See ManhattanPlotter.plot_qq for docs."""
+        return self._manhattan_plotter.plot_qq(
+            df=df,
+            p_col=p_col,
+            show_confidence_band=show_confidence_band,
+            show_lambda=show_lambda,
+            figsize=figsize,
+            title=title,
         )
-        # Add null effect line
-        self._backend.axvline(
-            ax, x=null_value, color="grey", linestyle="--", linewidth=1, alpha=0.7
+    def plot_manhattan_stacked(
+        self,
+        gwas_dfs: List[pd.DataFrame],
+        chrom_col: str = "chrom",
+        pos_col: str = "pos",
+        p_col: str = "p",
+        custom_chrom_order: Optional[List[str]] = None,
+        significance_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
+        panel_labels: Optional[List[str]] = None,
+        figsize: Tuple[float, float] = (12, 8),
+        title: Optional[str] = None,
+    ) -> Any:
+        """Create stacked Manhattan plots. See ManhattanPlotter.plot_manhattan_stacked for docs."""
+        return self._manhattan_plotter.plot_manhattan_stacked(
+            gwas_dfs=gwas_dfs,
+            chrom_col=chrom_col,
+            pos_col=pos_col,
+            p_col=p_col,
+            custom_chrom_order=custom_chrom_order,
+            significance_threshold=significance_threshold,
+            panel_labels=panel_labels,
+            figsize=figsize,
+            title=title,
         )
-        # Set axis labels and limits
-        self._backend.set_xlabel(ax, effect_label)
-        self._backend.set_ylim(ax, -0.5, len(df) - 0.5)
-        # Ensure x-axis includes the null value with some padding
-        x_min = min(df[ci_lower_col].min(), null_value)
-        x_max = max(df[ci_upper_col].max(), null_value)
-        x_padding = (x_max - x_min) * 0.1
-        self._backend.set_xlim(ax, x_min - x_padding, x_max + x_padding)
-        # Set y-tick labels to study names
-        self._backend.set_yticks(
-            ax,
-            positions=df["y_pos"].tolist(),
-            labels=df[study_col].tolist(),
-            fontsize=10,
+    def plot_manhattan_qq(
+        self,
+        df: pd.DataFrame,
+        chrom_col: str = "chrom",
+        pos_col: str = "pos",
+        p_col: str = "p",
+        custom_chrom_order: Optional[List[str]] = None,
+        significance_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
+        show_confidence_band: bool = True,
+        show_lambda: bool = True,
+        figsize: Tuple[float, float] = (14, 5),
+        title: Optional[str] = None,
+    ) -> Any:
+        """Create side-by-side Manhattan and QQ plots. See ManhattanPlotter.plot_manhattan_qq for docs."""
+        return self._manhattan_plotter.plot_manhattan_qq(
+            df=df,
+            chrom_col=chrom_col,
+            pos_col=pos_col,
+            p_col=p_col,
+            custom_chrom_order=custom_chrom_order,
+            significance_threshold=significance_threshold,
+            show_confidence_band=show_confidence_band,
+            show_lambda=show_lambda,
+            figsize=figsize,
+            title=title,
         )
-        self._backend.set_title(ax, f"Forest Plot: {variant_id}")
-        self._backend.hide_spines(ax, ["top", "right"])
-        self._backend.finalize_layout(fig)
-        return fig
+    def plot_manhattan_qq_stacked(
+        self,
+        gwas_dfs: List[pd.DataFrame],
+        chrom_col: str = "chrom",
+        pos_col: str = "pos",
+        p_col: str = "p",
+        custom_chrom_order: Optional[List[str]] = None,
+        significance_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
+        show_confidence_band: bool = True,
+        show_lambda: bool = True,
+        panel_labels: Optional[List[str]] = None,
+        figsize: Tuple[float, float] = (14, 8),
+        title: Optional[str] = None,
+    ) -> Any:
+        """Create stacked Manhattan+QQ plots. See ManhattanPlotter.plot_manhattan_qq_stacked for docs."""
+        return self._manhattan_plotter.plot_manhattan_qq_stacked(
+            gwas_dfs=gwas_dfs,
+            chrom_col=chrom_col,
+            pos_col=pos_col,
+            p_col=p_col,
+            custom_chrom_order=custom_chrom_order,
+            significance_threshold=significance_threshold,
+            show_confidence_band=show_confidence_band,
+            show_lambda=show_lambda,
+            panel_labels=panel_labels,
+            figsize=figsize,
+            title=title,
+        )

pylocuszoom 0.8.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

pylocuszoom 0.8.0py3-none-any.whl → 1.1.0py3-none-any.whl