PyPI - pylocuszoom - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

pylocuszoom 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

pylocuszoom/__init__.py +39 -20
pylocuszoom/backends/__init__.py +1 -5
pylocuszoom/backends/base.py +1 -1
pylocuszoom/backends/bokeh_backend.py +4 -7
pylocuszoom/backends/matplotlib_backend.py +6 -1
pylocuszoom/backends/plotly_backend.py +11 -12
pylocuszoom/colors.py +132 -0
pylocuszoom/eqtl.py +3 -2
pylocuszoom/finemapping.py +224 -0
pylocuszoom/gene_track.py +44 -31
pylocuszoom/labels.py +32 -33
pylocuszoom/ld.py +8 -7
pylocuszoom/plotter.py +381 -66
pylocuszoom/recombination.py +14 -14
pylocuszoom/utils.py +3 -1
{pylocuszoom-0.1.0.dist-info → pylocuszoom-0.2.0.dist-info}/METADATA +20 -25
pylocuszoom-0.2.0.dist-info/RECORD +21 -0
pylocuszoom-0.1.0.dist-info/RECORD +0 -20
{pylocuszoom-0.1.0.dist-info → pylocuszoom-0.2.0.dist-info}/WHEEL +0 -0
{pylocuszoom-0.1.0.dist-info → pylocuszoom-0.2.0.dist-info}/licenses/LICENSE.md +0 -0

pylocuszoom/__init__.py CHANGED Viewed

@@ -3,20 +3,21 @@
 This package provides LocusZoom-style regional association plots with:
 - LD coloring based on R² with lead variant
 - Gene and exon tracks
-- Recombination rate overlays (dog built-in, or user-provided)
+- Recombination rate overlays (canine built-in, or user-provided)
 - Automatic SNP labeling
 - Multiple backends: matplotlib (static), plotly (interactive), bokeh (dashboards)
 - eQTL overlay support
+- Fine-mapping/SuSiE visualization (PIP line with credible set coloring)
 - PySpark DataFrame support for large-scale data
 Example:
     >>> from pylocuszoom import LocusZoomPlotter
-    >>> plotter = LocusZoomPlotter(species="dog")
+    >>> plotter = LocusZoomPlotter(species="canine")
     >>> fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000)
     >>> fig.savefig("regional_plot.png", dpi=150)
 Interactive example:
-    >>> plotter = LocusZoomPlotter(species="dog", backend="plotly")
+    >>> plotter = LocusZoomPlotter(species="canine", backend="plotly")
     >>> fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000)
     >>> fig.write_html("regional_plot.html")
@@ -28,22 +29,42 @@ Stacked plots:
     ... )
 Species Support:
-    - Dog (Canis lupus familiaris): Full features including built-in recombination maps
-    - Cat (Felis catus): LD coloring and gene tracks (user provides recombination data)
+    - Canine (Canis lupus familiaris): Full features including built-in recombination maps
+    - Feline (Felis catus): LD coloring and gene tracks (user provides recombination data)
     - Custom: User provides all reference data
 """
 __version__ = "0.1.0"
 # Main plotter class
-from .plotter import LocusZoomPlotter
 # Backend types
 from .backends import BackendType, get_backend
 # Colors and LD
 from .colors import LEAD_SNP_COLOR, get_ld_bin, get_ld_color, get_ld_color_palette
+# eQTL support
+from .eqtl import (
+    EQTLValidationError,
+    calculate_colocalization_overlap,
+    filter_eqtl_by_gene,
+    filter_eqtl_by_region,
+    get_eqtl_genes,
+    prepare_eqtl_for_plotting,
+    validate_eqtl_df,
+)
+# Fine-mapping/SuSiE support
+from .finemapping import (
+    FinemappingValidationError,
+    filter_by_credible_set,
+    filter_finemapping_by_region,
+    get_credible_sets,
+    get_top_pip_variants,
+    prepare_finemapping_for_plotting,
+    validate_finemapping_df,
+)
 # Gene track
 from .gene_track import get_nearest_gene, plot_gene_track
@@ -55,26 +76,16 @@ from .ld import calculate_ld
 # Logging configuration
 from .logging import disable_logging, enable_logging
+from .plotter import LocusZoomPlotter
 # Reference data management
 from .recombination import (
     add_recombination_overlay,
-    download_dog_recombination_maps,
+    download_canine_recombination_maps,
     get_recombination_rate_for_region,
     load_recombination_map,
 )
-# eQTL support
-from .eqtl import (
-    EQTLValidationError,
-    calculate_colocalization_overlap,
-    filter_eqtl_by_gene,
-    filter_eqtl_by_region,
-    get_eqtl_genes,
-    prepare_eqtl_for_plotting,
-    validate_eqtl_df,
-)
 # Validation utilities
 from .utils import ValidationError, to_pandas
@@ -86,7 +97,7 @@ __all__ = [
     "BackendType",
     "get_backend",
     # Reference data
-    "download_dog_recombination_maps",
+    "download_canine_recombination_maps",
     # Colors
     "get_ld_color",
     "get_ld_bin",
@@ -111,6 +122,14 @@ __all__ = [
     "get_eqtl_genes",
     "calculate_colocalization_overlap",
     "EQTLValidationError",
+    # Fine-mapping/SuSiE
+    "validate_finemapping_df",
+    "filter_finemapping_by_region",
+    "filter_by_credible_set",
+    "get_credible_sets",
+    "get_top_pip_variants",
+    "prepare_finemapping_for_plotting",
+    "FinemappingValidationError",
     # Logging
     "enable_logging",
     "disable_logging",

pylocuszoom/backends/__init__.py CHANGED Viewed

@@ -3,15 +3,11 @@
 Supports matplotlib (default), plotly, and bokeh backends.
 """
-from typing import TYPE_CHECKING, Literal
+from typing import Literal
 from .base import PlotBackend
 from .matplotlib_backend import MatplotlibBackend
-if TYPE_CHECKING:
-    from .bokeh_backend import BokehBackend
-    from .plotly_backend import PlotlyBackend
 BackendType = Literal["matplotlib", "plotly", "bokeh"]
 _BACKENDS: dict[str, type[PlotBackend]] = {

pylocuszoom/backends/base.py CHANGED Viewed

@@ -3,7 +3,7 @@
 Defines the interface that matplotlib, plotly, and bokeh backends must implement.
 """
-from typing import Any, Dict, List, Optional, Protocol, Tuple, Union
+from typing import Any, List, Optional, Protocol, Tuple, Union
 import pandas as pd

pylocuszoom/backends/bokeh_backend.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import Any, List, Optional, Tuple, Union
 import pandas as pd
 from bokeh.io import export_png, export_svgs, output_file, save, show
 from bokeh.layouts import column
-from bokeh.models import ColumnDataSource, HoverTool, Legend, LegendItem, Span
+from bokeh.models import ColumnDataSource, HoverTool, Span
 from bokeh.plotting import figure
@@ -108,10 +108,10 @@ class BokehBackend:
         # Handle sizes (convert from area to diameter)
         if isinstance(sizes, (int, float)):
-            bokeh_size = max(6, sizes ** 0.5)
+            bokeh_size = max(6, sizes**0.5)
             data["size"] = [bokeh_size] * len(x)
         else:
-            data["size"] = [max(6, s ** 0.5) for s in sizes]
+            data["size"] = [max(6, s**0.5) for s in sizes]
         # Add hover data
         tooltips = []
@@ -289,7 +289,6 @@ class BokehBackend:
         zorder: int = 2,
     ) -> Any:
         """Add a rectangle to the figure."""
-        from bokeh.models import Rect
         x_center = xy[0] + width / 2
         y_center = xy[1] + height / 2
@@ -389,9 +388,7 @@ class BokehBackend:
         # For now, assume values are already in bp and need /1e6
         from bokeh.models import FuncTickFormatter
-        ax.xaxis.formatter = FuncTickFormatter(
-            code="return (tick / 1e6).toFixed(2);"
-        )
+        ax.xaxis.formatter = FuncTickFormatter(code="return (tick / 1e6).toFixed(2);")
     def save(
         self,

pylocuszoom/backends/matplotlib_backend.py CHANGED Viewed

@@ -205,7 +205,12 @@ class MatplotlibBackend:
     def set_title(self, ax: Axes, title: str, fontsize: int = 14) -> None:
         """Set panel title."""
-        ax.set_title(title, fontsize=fontsize)
+        ax.set_title(
+            title,
+            fontsize=fontsize,
+            fontweight="bold",
+            fontfamily="sans-serif",
+        )
     def create_twin_axis(self, ax: Axes) -> Axes:
         """Create a secondary y-axis sharing the same x-axis."""

pylocuszoom/backends/plotly_backend.py CHANGED Viewed

@@ -100,9 +100,9 @@ class PlotlyBackend:
         # Convert size (matplotlib uses area, plotly uses diameter)
         if isinstance(sizes, (int, float)):
-            size = max(6, sizes ** 0.5)  # Approximate conversion
+            size = max(6, sizes**0.5)  # Approximate conversion
         else:
-            size = [max(6, s ** 0.5) for s in sizes]
+            size = [max(6, s**0.5) for s in sizes]
         # Build hover template
         if hover_data is not None:
@@ -317,7 +317,9 @@ class PlotlyBackend:
         """Set x-axis label."""
         fig, row = ax
         xaxis = f"xaxis{row}" if row > 1 else "xaxis"
-        fig.update_layout(**{xaxis: dict(title=dict(text=label, font=dict(size=fontsize)))})
+        fig.update_layout(
+            **{xaxis: dict(title=dict(text=label, font=dict(size=fontsize)))}
+        )
     def set_ylabel(
         self, ax: Tuple[go.Figure, int], label: str, fontsize: int = 12
@@ -325,7 +327,9 @@ class PlotlyBackend:
         """Set y-axis label."""
         fig, row = ax
         yaxis = f"yaxis{row}" if row > 1 else "yaxis"
-        fig.update_layout(**{yaxis: dict(title=dict(text=label, font=dict(size=fontsize)))})
+        fig.update_layout(
+            **{yaxis: dict(title=dict(text=label, font=dict(size=fontsize)))}
+        )
     def set_title(
         self, ax: Tuple[go.Figure, int], title: str, fontsize: int = 14
@@ -395,14 +399,9 @@ class PlotlyBackend:
         Plotly doesn't have spines, but we can hide axis lines.
         """
-        fig, row = ax
-        xaxis = f"xaxis{row}" if row > 1 else "xaxis"
-        yaxis = f"yaxis{row}" if row > 1 else "yaxis"
-        if "top" in spines or "right" in spines:
-            # Plotly's template "plotly_white" already hides these
-            pass
+        # Plotly's template "plotly_white" already hides top/right lines
+        # No action needed - method exists for API compatibility
+        pass
     def format_xaxis_mb(self, ax: Tuple[go.Figure, int]) -> None:
         """Format x-axis to show megabase values."""

pylocuszoom/colors.py CHANGED Viewed

@@ -29,6 +29,101 @@ LD_NA_LABEL = "NA"
 # Lead SNP color (purple diamond)
 LEAD_SNP_COLOR = "#7D26CD"  # purple3
+# Fine-mapping/SuSiE credible set colors
+# Colors for up to 10 credible sets, matching locuszoomr style
+CREDIBLE_SET_COLORS: List[str] = [
+    "#FF7F00",  # orange (CS1)
+    "#1F78B4",  # blue (CS2)
+    "#33A02C",  # green (CS3)
+    "#E31A1C",  # red (CS4)
+    "#6A3D9A",  # purple (CS5)
+    "#B15928",  # brown (CS6)
+    "#FB9A99",  # pink (CS7)
+    "#A6CEE3",  # light blue (CS8)
+    "#B2DF8A",  # light green (CS9)
+    "#FDBF6F",  # light orange (CS10)
+]
+# PIP line color (when not showing credible sets)
+PIP_LINE_COLOR = "#FF7F00"  # orange
+# eQTL effect size bins - matches locuszoomr color scheme
+# Format: (min_threshold, max_threshold, label, color)
+# Positive effects (upward triangles)
+EQTL_POSITIVE_BINS: List[Tuple[float, float, str, str]] = [
+    (0.3, 0.4, "0.3 : 0.4", "#8B1A1A"),  # dark red/maroon
+    (0.2, 0.3, "0.2 : 0.3", "#FF6600"),  # orange
+    (0.1, 0.2, "0.1 : 0.2", "#FFB347"),  # light orange
+]
+# Negative effects (downward triangles)
+EQTL_NEGATIVE_BINS: List[Tuple[float, float, str, str]] = [
+    (-0.2, -0.1, "-0.2 : -0.1", "#66CDAA"),  # medium aquamarine
+    (-0.3, -0.2, "-0.3 : -0.2", "#4682B4"),  # steel blue
+    (-0.4, -0.3, "-0.4 : -0.3", "#00008B"),  # dark blue
+]
+def get_eqtl_color(effect: Optional[float]) -> str:
+    """Get color based on eQTL effect size.
+    Args:
+        effect: Effect size (beta coefficient).
+    Returns:
+        Hex color code string.
+    """
+    if _is_missing(effect):
+        return LD_NA_COLOR
+    if effect >= 0:
+        for min_t, max_t, _, color in EQTL_POSITIVE_BINS:
+            if min_t <= effect < max_t or (max_t == 0.4 and effect >= max_t):
+                return color
+        return EQTL_POSITIVE_BINS[-1][3]  # smallest positive bin
+    else:
+        for min_t, max_t, _, color in EQTL_NEGATIVE_BINS:
+            if min_t < effect <= max_t or (min_t == -0.4 and effect <= min_t):
+                return color
+        return EQTL_NEGATIVE_BINS[-1][3]  # smallest negative bin
+def get_eqtl_bin(effect: Optional[float]) -> str:
+    """Get eQTL effect bin label.
+    Args:
+        effect: Effect size (beta coefficient).
+    Returns:
+        Bin label string.
+    """
+    if _is_missing(effect):
+        return LD_NA_LABEL
+    if effect >= 0:
+        for min_t, max_t, label, _ in EQTL_POSITIVE_BINS:
+            if min_t <= effect < max_t or (max_t == 0.4 and effect >= max_t):
+                return label
+        return EQTL_POSITIVE_BINS[-1][2]
+    else:
+        for min_t, max_t, label, _ in EQTL_NEGATIVE_BINS:
+            if min_t < effect <= max_t or (min_t == -0.4 and effect <= min_t):
+                return label
+        return EQTL_NEGATIVE_BINS[-1][2]
+def get_eqtl_color_palette() -> dict[str, str]:
+    """Get color palette for eQTL effect bins.
+    Returns:
+        Dictionary mapping bin labels to hex colors.
+    """
+    palette = {}
+    for _, _, label, color in EQTL_POSITIVE_BINS:
+        palette[label] = color
+    for _, _, label, color in EQTL_NEGATIVE_BINS:
+        palette[label] = color
+    return palette
 def get_ld_color(r2: Optional[float]) -> str:
     """Get LocusZoom-style color based on LD R² value.
@@ -105,3 +200,40 @@ def get_ld_color_palette() -> dict[str, str]:
     palette = {label: color for _, label, color in LD_BINS}
     palette[LD_NA_LABEL] = LD_NA_COLOR
     return palette
+def get_credible_set_color(cs_id: int) -> str:
+    """Get color for a credible set.
+    Args:
+        cs_id: Credible set ID (1-indexed).
+    Returns:
+        Hex color code string.
+    Example:
+        >>> get_credible_set_color(1)
+        '#FF7F00'
+    """
+    if cs_id < 1:
+        return LD_NA_COLOR
+    # Use modulo to cycle through colors if more than 10 credible sets
+    idx = (cs_id - 1) % len(CREDIBLE_SET_COLORS)
+    return CREDIBLE_SET_COLORS[idx]
+def get_credible_set_color_palette(n_sets: int = 10) -> dict[int, str]:
+    """Get color palette for credible sets.
+    Args:
+        n_sets: Number of credible sets to include.
+    Returns:
+        Dictionary mapping credible set IDs (1-indexed) to hex colors.
+    Example:
+        >>> palette = get_credible_set_color_palette(3)
+        >>> palette[1]
+        '#FF7F00'
+    """
+    return {i + 1: CREDIBLE_SET_COLORS[i % len(CREDIBLE_SET_COLORS)] for i in range(n_sets)}

pylocuszoom/eqtl.py CHANGED Viewed

@@ -11,7 +11,6 @@ import pandas as pd
 from .logging import logger
 REQUIRED_EQTL_COLS = ["pos", "p_value"]
 OPTIONAL_EQTL_COLS = ["gene", "effect_size", "rs", "se"]
@@ -109,7 +108,9 @@ def filter_eqtl_by_region(
         mask = mask & (df_chrom == chrom_str)
     filtered = df[mask].copy()
-    logger.debug(f"Filtered eQTL data to {len(filtered)} variants in region chr{chrom}:{start}-{end}")
+    logger.debug(
+        f"Filtered eQTL data to {len(filtered)} variants in region chr{chrom}:{start}-{end}"
+    )
     return filtered

pylocuszoom/finemapping.py ADDED Viewed

@@ -0,0 +1,224 @@
+"""Fine-mapping/SuSiE data handling for pyLocusZoom.
+Provides utilities for loading, validating, and preparing statistical
+fine-mapping results (SuSiE, FINEMAP, etc.) for visualization.
+"""
+from typing import List, Optional
+import numpy as np
+import pandas as pd
+from .logging import logger
+# Required columns for fine-mapping data
+REQUIRED_FINEMAPPING_COLS = ["pos", "pip"]
+OPTIONAL_FINEMAPPING_COLS = ["rs", "cs", "cs_id", "effect", "se"]
+class FinemappingValidationError(ValueError):
+    """Raised when fine-mapping DataFrame validation fails."""
+    pass
+def validate_finemapping_df(
+    df: pd.DataFrame,
+    pos_col: str = "pos",
+    pip_col: str = "pip",
+) -> None:
+    """Validate fine-mapping DataFrame has required columns.
+    Args:
+        df: Fine-mapping DataFrame to validate.
+        pos_col: Column name for genomic position.
+        pip_col: Column name for posterior inclusion probability.
+    Raises:
+        FinemappingValidationError: If required columns are missing.
+    """
+    missing = []
+    if pos_col not in df.columns:
+        missing.append(pos_col)
+    if pip_col not in df.columns:
+        missing.append(pip_col)
+    if missing:
+        raise FinemappingValidationError(
+            f"Fine-mapping DataFrame missing required columns: {missing}. "
+            f"Required: {pos_col} (position), {pip_col} (posterior inclusion probability)"
+        )
+    # Validate PIP values are in [0, 1]
+    if not df[pip_col].between(0, 1).all():
+        invalid_count = (~df[pip_col].between(0, 1)).sum()
+        raise FinemappingValidationError(
+            f"PIP values must be between 0 and 1. Found {invalid_count} invalid values."
+        )
+def filter_finemapping_by_region(
+    df: pd.DataFrame,
+    chrom: int,
+    start: int,
+    end: int,
+    pos_col: str = "pos",
+    chrom_col: Optional[str] = "chr",
+) -> pd.DataFrame:
+    """Filter fine-mapping data to a genomic region.
+    Args:
+        df: Fine-mapping DataFrame.
+        chrom: Chromosome number.
+        start: Start position.
+        end: End position.
+        pos_col: Column name for position.
+        chrom_col: Column name for chromosome (if present).
+    Returns:
+        Filtered DataFrame containing only variants in the region.
+    """
+    mask = (df[pos_col] >= start) & (df[pos_col] <= end)
+    # Filter by chromosome if column exists
+    if chrom_col and chrom_col in df.columns:
+        chrom_str = str(chrom).replace("chr", "")
+        df_chrom = df[chrom_col].astype(str).str.replace("chr", "", regex=False)
+        mask = mask & (df_chrom == chrom_str)
+    filtered = df[mask].copy()
+    logger.debug(
+        f"Filtered fine-mapping data to {len(filtered)} variants in region "
+        f"chr{chrom}:{start}-{end}"
+    )
+    return filtered
+def get_credible_sets(
+    df: pd.DataFrame,
+    cs_col: str = "cs",
+) -> List[int]:
+    """Get list of unique credible set IDs.
+    Args:
+        df: Fine-mapping DataFrame.
+        cs_col: Column containing credible set assignments.
+    Returns:
+        Sorted list of unique credible set IDs (excluding 0/NA).
+    """
+    if cs_col not in df.columns:
+        return []
+    # Filter out variants not in a credible set (typically cs=0 or NA)
+    cs_values = df[cs_col].dropna()
+    cs_values = cs_values[cs_values != 0]
+    return sorted(cs_values.unique().tolist())
+def filter_by_credible_set(
+    df: pd.DataFrame,
+    cs_id: int,
+    cs_col: str = "cs",
+) -> pd.DataFrame:
+    """Filter to variants in a specific credible set.
+    Args:
+        df: Fine-mapping DataFrame.
+        cs_id: Credible set ID to filter for.
+        cs_col: Column containing credible set assignments.
+    Returns:
+        Filtered DataFrame containing only variants in the credible set.
+    """
+    if cs_col not in df.columns:
+        raise FinemappingValidationError(
+            f"Cannot filter by credible set: column '{cs_col}' not found. "
+            f"Available columns: {list(df.columns)}"
+        )
+    return df[df[cs_col] == cs_id].copy()
+def prepare_finemapping_for_plotting(
+    df: pd.DataFrame,
+    pos_col: str = "pos",
+    pip_col: str = "pip",
+    chrom: Optional[int] = None,
+    start: Optional[int] = None,
+    end: Optional[int] = None,
+) -> pd.DataFrame:
+    """Prepare fine-mapping data for plotting.
+    Validates, filters, and sorts data for plotting as a line or scatter.
+    Args:
+        df: Raw fine-mapping DataFrame.
+        pos_col: Column name for position.
+        pip_col: Column name for PIP.
+        chrom: Optional chromosome for region filtering.
+        start: Optional start position for region filtering.
+        end: Optional end position for region filtering.
+    Returns:
+        Prepared DataFrame sorted by position.
+    """
+    validate_finemapping_df(df, pos_col=pos_col, pip_col=pip_col)
+    result = df.copy()
+    # Filter by region if specified
+    if chrom is not None and start is not None and end is not None:
+        result = filter_finemapping_by_region(
+            result, chrom, start, end, pos_col=pos_col
+        )
+    # Sort by position for line plotting
+    result = result.sort_values(pos_col)
+    return result
+def get_top_pip_variants(
+    df: pd.DataFrame,
+    n: int = 5,
+    pip_col: str = "pip",
+    pip_threshold: float = 0.0,
+) -> pd.DataFrame:
+    """Get top variants by posterior inclusion probability.
+    Args:
+        df: Fine-mapping DataFrame.
+        n: Number of top variants to return.
+        pip_col: Column containing PIP values.
+        pip_threshold: Minimum PIP threshold.
+    Returns:
+        DataFrame with top N variants by PIP.
+    """
+    filtered = df[df[pip_col] >= pip_threshold]
+    return filtered.nlargest(n, pip_col)
+def calculate_credible_set_coverage(
+    df: pd.DataFrame,
+    cs_col: str = "cs",
+    pip_col: str = "pip",
+) -> dict:
+    """Calculate cumulative PIP for each credible set.
+    Args:
+        df: Fine-mapping DataFrame.
+        cs_col: Column containing credible set assignments.
+        pip_col: Column containing PIP values.
+    Returns:
+        Dictionary mapping credible set ID to cumulative PIP.
+    """
+    if cs_col not in df.columns:
+        return {}
+    coverage = {}
+    for cs_id in get_credible_sets(df, cs_col):
+        cs_data = filter_by_credible_set(df, cs_id, cs_col)
+        coverage[cs_id] = cs_data[pip_col].sum()
+    return coverage

pylocuszoom 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

pylocuszoom 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl