PyPI - pylocuszoom - Versions diffs - 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

pylocuszoom 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

pylocuszoom/__init__.py +9 -1
pylocuszoom/_plotter_utils.py +66 -0
pylocuszoom/backends/base.py +56 -0
pylocuszoom/backends/bokeh_backend.py +141 -29
pylocuszoom/backends/matplotlib_backend.py +60 -0
pylocuszoom/backends/plotly_backend.py +297 -88
pylocuszoom/ensembl.py +6 -11
pylocuszoom/gene_track.py +2 -24
pylocuszoom/labels.py +6 -2
pylocuszoom/manhattan.py +246 -0
pylocuszoom/manhattan_plotter.py +760 -0
pylocuszoom/plotter.py +236 -270
pylocuszoom/qq.py +123 -0
pylocuszoom/recombination.py +7 -7
pylocuszoom/stats_plotter.py +319 -0
{pylocuszoom-1.0.0.dist-info → pylocuszoom-1.1.0.dist-info}/METADATA +124 -14
pylocuszoom-1.1.0.dist-info/RECORD +36 -0
pylocuszoom-1.0.0.dist-info/RECORD +0 -31
{pylocuszoom-1.0.0.dist-info → pylocuszoom-1.1.0.dist-info}/WHEEL +0 -0
{pylocuszoom-1.0.0.dist-info → pylocuszoom-1.1.0.dist-info}/licenses/LICENSE.md +0 -0

pylocuszoom/ensembl.py CHANGED Viewed

@@ -18,7 +18,7 @@ import pandas as pd
 import requests
 from .logging import logger
-from .utils import ValidationError
+from .utils import ValidationError, normalize_chrom
 # Ensembl API limits regions to 5Mb
 ENSEMBL_MAX_REGION_SIZE = 5_000_000
@@ -47,11 +47,6 @@ ENSEMBL_MAX_RETRIES = 3
 ENSEMBL_RETRY_DELAY = 1.0  # seconds, doubles on each retry
-def _normalize_chrom(chrom: str | int) -> str:
-    """Normalize chromosome name by removing 'chr' prefix."""
-    return str(chrom).replace("chr", "")
 def _validate_region_size(start: int, end: int, context: str) -> None:
     """Validate region size is within Ensembl API limits.
@@ -129,7 +124,7 @@ def get_cached_genes(
         DataFrame if cache hit, None if cache miss.
     """
     ensembl_species = get_ensembl_species_name(species)
-    chrom_str = _normalize_chrom(chrom)
+    chrom_str = normalize_chrom(chrom)
     cache_key = _cache_key(ensembl_species, chrom_str, start, end)
     species_dir = cache_dir / ensembl_species
@@ -161,7 +156,7 @@ def save_cached_genes(
         end: Region end position.
     """
     ensembl_species = get_ensembl_species_name(species)
-    chrom_str = _normalize_chrom(chrom)
+    chrom_str = normalize_chrom(chrom)
     cache_key = _cache_key(ensembl_species, chrom_str, start, end)
     species_dir = cache_dir / ensembl_species
@@ -266,7 +261,7 @@ def fetch_genes_from_ensembl(
     _validate_region_size(start, end, "genes_df")
     ensembl_species = get_ensembl_species_name(species)
-    chrom_str = _normalize_chrom(chrom)
+    chrom_str = normalize_chrom(chrom)
     # Build region string
     region = f"{chrom_str}:{start}-{end}"
@@ -334,7 +329,7 @@ def fetch_exons_from_ensembl(
     _validate_region_size(start, end, "exons_df")
     ensembl_species = get_ensembl_species_name(species)
-    chrom_str = _normalize_chrom(chrom)
+    chrom_str = normalize_chrom(chrom)
     region = f"{chrom_str}:{start}-{end}"
     url = f"{ENSEMBL_REST_URL}/overlap/region/{ensembl_species}/{region}"
@@ -408,7 +403,7 @@ def get_genes_for_region(
     if cache_dir is None:
         cache_dir = get_ensembl_cache_dir()
-    chrom_str = _normalize_chrom(chrom)
+    chrom_str = normalize_chrom(chrom)
     # Check cache first
     if use_cache:

pylocuszoom/gene_track.py CHANGED Viewed

@@ -175,17 +175,6 @@ def _draw_strand_arrows_matplotlib(
         gene_start, gene_end, region_width, strand
     )
-    # Draw connecting line between arrow centers
-    if len(arrow_tip_positions) > 1:
-        ax.plot(
-            [arrow_tip_positions[0], arrow_tip_positions[-1]],
-            [y_gene, y_gene],
-            color=arrow_color,
-            linewidth=1.0,
-            zorder=4,
-            solid_capstyle="butt",
-        )
     for tip_x in arrow_tip_positions:
         if strand == "+":
             base_x = tip_x - tri_width
@@ -224,17 +213,6 @@ def _draw_strand_arrows_generic(
         gene_start, gene_end, region_width, strand
     )
-    # Draw connecting line between arrow centers
-    if len(arrow_tip_positions) > 1:
-        backend.line(
-            ax,
-            x=pd.Series([arrow_tip_positions[0], arrow_tip_positions[-1]]),
-            y=pd.Series([y_gene, y_gene]),
-            color=arrow_color,
-            linewidth=1.0,
-            zorder=4,
-        )
     for tip_x in arrow_tip_positions:
         if strand == "+":
             base_x = tip_x - tri_width
@@ -406,7 +384,7 @@ def plot_gene_track(
                 gene_name,
                 ha="center",
                 va="bottom",
-                fontsize=7,
+                fontsize=9,
                 color="#000000",
                 fontweight="medium",
                 style="italic",
@@ -553,7 +531,7 @@ def plot_gene_track_generic(
                 label_pos,
                 y_label,
                 gene_name,
-                fontsize=7,
+                fontsize=9,
                 ha="center",
                 va="bottom",
                 color="#000000",

pylocuszoom/labels.py CHANGED Viewed

@@ -11,6 +11,8 @@ import pandas as pd
 from matplotlib.axes import Axes
 from matplotlib.text import Annotation
+from pylocuszoom.logging import logger
 def add_snp_labels(
     ax: Axes,
@@ -111,7 +113,9 @@ def add_snp_labels(
                 expand_points=(1.5, 1.5),
             )
         except ImportError:
-            # adjustText not installed, labels may overlap
-            pass
+            logger.warning(
+                "adjustText not installed - SNP labels may overlap. "
+                "Install with: pip install adjustText"
+            )
     return texts

pylocuszoom/manhattan.py ADDED Viewed

@@ -0,0 +1,246 @@
+"""Manhattan plot data preparation and chromosome ordering."""
+from typing import Literal
+import colorcet as cc
+import numpy as np
+import pandas as pd
+# Species aliases
+SPECIES_ALIASES: dict[str, str] = {
+    "dog": "canine",
+    "cat": "feline",
+}
+# Chromosome orders for supported species
+CHROMOSOME_ORDERS: dict[str, list[str]] = {
+    "canine": [str(i) for i in range(1, 39)] + ["X", "Y", "MT"],
+    "feline": [
+        "A1",
+        "A2",
+        "A3",
+        "B1",
+        "B2",
+        "B3",
+        "B4",
+        "C1",
+        "C2",
+        "D1",
+        "D2",
+        "D3",
+        "D4",
+        "E1",
+        "E2",
+        "E3",
+        "X",
+        "Y",
+        "MT",
+    ],
+    "human": [str(i) for i in range(1, 23)] + ["X", "Y", "MT"],
+}
+def get_chromosome_order(
+    species: Literal["canine", "feline", "human", "dog", "cat"] | None = None,
+    custom_order: list[str] | None = None,
+) -> list[str]:
+    """Get chromosome order for a species.
+    Args:
+        species: Species name for built-in order. Supports aliases:
+            'dog' -> 'canine', 'cat' -> 'feline'.
+        custom_order: Custom chromosome order (overrides species).
+    Returns:
+        List of chromosome names in display order.
+    Raises:
+        ValueError: If neither species nor custom_order provided,
+            or if species is unknown.
+    """
+    if custom_order is not None:
+        return custom_order
+    if species is not None:
+        # Resolve aliases
+        resolved_species = SPECIES_ALIASES.get(species, species)
+        if resolved_species not in CHROMOSOME_ORDERS:
+            raise ValueError(
+                f"Unknown species '{species}'. "
+                f"Use one of {list(CHROMOSOME_ORDERS.keys())} "
+                f"(or aliases: {list(SPECIES_ALIASES.keys())}) "
+                f"or provide custom_order."
+            )
+        return CHROMOSOME_ORDERS[resolved_species]
+    raise ValueError("Must provide either species or custom_order")
+def get_chromosome_colors(n_chromosomes: int) -> list[str]:
+    """Get perceptually distinct colors for chromosomes.
+    Uses colorcet glasbey_dark palette for good visual
+    separation with saturated colors.
+    Args:
+        n_chromosomes: Number of chromosomes to color.
+    Returns:
+        List of hex color strings.
+    """
+    palette = cc.b_glasbey_bw_minc_20_maxl_70
+    return [palette[i % len(palette)] for i in range(n_chromosomes)]
+def prepare_manhattan_data(
+    df: pd.DataFrame,
+    chrom_col: str = "chrom",
+    pos_col: str = "pos",
+    p_col: str = "p",
+    species: Literal["canine", "feline", "human", "dog", "cat"] | None = None,
+    custom_order: list[str] | None = None,
+) -> pd.DataFrame:
+    """Prepare DataFrame for Manhattan plot rendering.
+    Computes cumulative positions for x-axis and assigns chromosome colors.
+    Args:
+        df: GWAS results DataFrame.
+        chrom_col: Column name for chromosome.
+        pos_col: Column name for position.
+        p_col: Column name for p-value.
+        species: Species for chromosome ordering.
+        custom_order: Custom chromosome order.
+    Returns:
+        DataFrame with additional columns:
+        - _chrom_idx: Integer index for chromosome
+        - _cumulative_pos: X-axis position
+        - _neg_log_p: -log10(p-value)
+        - _color: Hex color for chromosome
+    """
+    # Validate required columns
+    for col, name in [(chrom_col, "chrom"), (pos_col, "pos"), (p_col, "p")]:
+        if col not in df.columns:
+            raise ValueError(f"Column '{col}' not found in DataFrame (for {name})")
+    # Get chromosome order
+    chrom_order = get_chromosome_order(species, custom_order)
+    # Create working copy
+    result = df.copy()
+    # Normalize chromosome names (handle int vs str)
+    result["_chrom_str"] = result[chrom_col].astype(str)
+    # Map chromosomes to order index (-1 for unknown)
+    chrom_to_idx = {chrom: i for i, chrom in enumerate(chrom_order)}
+    result["_chrom_idx"] = result["_chrom_str"].map(
+        lambda x: chrom_to_idx.get(x, len(chrom_order))
+    )
+    # Sort by chromosome index then position
+    result = result.sort_values(["_chrom_idx", pos_col])
+    # Calculate cumulative positions
+    # First get max position per chromosome
+    chrom_offsets = {}
+    cumulative = 0
+    for chrom in chrom_order:
+        chrom_data = result[result["_chrom_str"] == chrom]
+        if len(chrom_data) > 0:
+            chrom_offsets[chrom] = cumulative
+            cumulative += chrom_data[pos_col].max() + 1_000_000  # 1Mb gap
+    # Handle chromosomes not in order
+    unknown_chroms = set(result["_chrom_str"]) - set(chrom_order)
+    for chrom in sorted(unknown_chroms):
+        chrom_data = result[result["_chrom_str"] == chrom]
+        if len(chrom_data) > 0:
+            chrom_offsets[chrom] = cumulative
+            cumulative += chrom_data[pos_col].max() + 1_000_000
+    # Calculate cumulative position
+    result["_cumulative_pos"] = result.apply(
+        lambda row: chrom_offsets.get(row["_chrom_str"], 0) + row[pos_col], axis=1
+    )
+    # Calculate -log10(p)
+    result["_neg_log_p"] = -np.log10(result[p_col].clip(lower=1e-300))
+    # Assign colors
+    all_chroms = chrom_order + sorted(unknown_chroms)
+    colors = get_chromosome_colors(len(all_chroms))
+    chrom_to_color = {chrom: colors[i] for i, chrom in enumerate(all_chroms)}
+    result["_color"] = result["_chrom_str"].map(chrom_to_color)
+    # Calculate chromosome centers for x-axis labels
+    chrom_centers = {}
+    for chrom in all_chroms:
+        chrom_data = result[result["_chrom_str"] == chrom]
+        if len(chrom_data) > 0:
+            chrom_centers[chrom] = chrom_data["_cumulative_pos"].mean()
+    result.attrs["chrom_centers"] = chrom_centers
+    result.attrs["chrom_order"] = all_chroms
+    return result
+def prepare_categorical_data(
+    df: pd.DataFrame,
+    category_col: str,
+    p_col: str = "p",
+    category_order: list[str] | None = None,
+) -> pd.DataFrame:
+    """Prepare DataFrame for categorical Manhattan plot (PheWAS-style).
+    Args:
+        df: Results DataFrame with categories and p-values.
+        category_col: Column name for category.
+        p_col: Column name for p-value.
+        category_order: Custom category order.
+    Returns:
+        DataFrame with additional columns for plotting.
+    """
+    # Validate required columns
+    if category_col not in df.columns:
+        raise ValueError(f"Column '{category_col}' not found in DataFrame")
+    if p_col not in df.columns:
+        raise ValueError(f"Column '{p_col}' not found in DataFrame")
+    result = df.copy()
+    # Get category order
+    if category_order is None:
+        # Get unique values, drop NaN, convert to strings for consistent sorting
+        unique_vals = result[category_col].dropna().unique()
+        # Convert all to strings and sort to handle mixed types safely
+        category_order = sorted([str(v) for v in unique_vals])
+    # Convert category column to string for consistent handling
+    result["_cat_str"] = result[category_col].astype(str)
+    # Map categories to index (use string values for lookup)
+    cat_to_idx = {cat: i for i, cat in enumerate(category_order)}
+    result["_cat_idx"] = result["_cat_str"].map(
+        lambda x: cat_to_idx.get(x, len(category_order))
+    )
+    # Use category index as x position (with jitter for multiple points per category)
+    np.random.seed(42)  # Reproducible jitter
+    result["_x_pos"] = result["_cat_idx"] + np.random.uniform(
+        -0.3, 0.3, size=len(result)
+    )
+    # Calculate -log10(p)
+    result["_neg_log_p"] = -np.log10(result[p_col].clip(lower=1e-300))
+    # Assign colors (use string values for lookup)
+    colors = get_chromosome_colors(len(category_order))
+    cat_to_color = {cat: colors[i] for i, cat in enumerate(category_order)}
+    result["_color"] = result["_cat_str"].map(cat_to_color)
+    result.attrs["category_order"] = category_order
+    result.attrs["category_centers"] = {cat: i for i, cat in enumerate(category_order)}
+    return result

pylocuszoom 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

pylocuszoom 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl