PyPI - pylocuszoom - Versions diffs - 0.6.0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

pylocuszoom 0.6.0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

pylocuszoom/__init__.py +34 -7
pylocuszoom/backends/__init__.py +116 -17
pylocuszoom/backends/base.py +363 -60
pylocuszoom/backends/bokeh_backend.py +77 -15
pylocuszoom/backends/hover.py +198 -0
pylocuszoom/backends/matplotlib_backend.py +263 -3
pylocuszoom/backends/plotly_backend.py +73 -16
pylocuszoom/config.py +365 -0
pylocuszoom/ensembl.py +476 -0
pylocuszoom/eqtl.py +17 -25
pylocuszoom/exceptions.py +33 -0
pylocuszoom/finemapping.py +18 -32
pylocuszoom/forest.py +10 -11
pylocuszoom/gene_track.py +169 -142
pylocuszoom/loaders.py +3 -1
pylocuszoom/phewas.py +10 -11
pylocuszoom/plotter.py +311 -277
pylocuszoom/recombination.py +19 -3
pylocuszoom/schemas.py +1 -6
pylocuszoom/utils.py +54 -4
pylocuszoom/validation.py +223 -0
{pylocuszoom-0.6.0.dist-info → pylocuszoom-1.0.0.dist-info}/METADATA +82 -37
pylocuszoom-1.0.0.dist-info/RECORD +31 -0
pylocuszoom-0.6.0.dist-info/RECORD +0 -26
{pylocuszoom-0.6.0.dist-info → pylocuszoom-1.0.0.dist-info}/WHEEL +0 -0
{pylocuszoom-0.6.0.dist-info → pylocuszoom-1.0.0.dist-info}/licenses/LICENSE.md +0 -0

pylocuszoom/recombination.py CHANGED Viewed

@@ -18,6 +18,7 @@ from matplotlib.axes import Axes
 from tqdm import tqdm
 from .logging import logger
+from .utils import filter_by_region
 # Recombination overlay color
 RECOMB_COLOR = "#7FCDFF"  # Light blue
@@ -252,10 +253,20 @@ def download_canine_recombination_maps(
         logger.debug(f"Downloaded {tar_path.stat().st_size / 1024:.1f} KB")
-        # Extract tar.gz
+        # Extract tar.gz with path traversal protection
         logger.debug("Extracting genetic maps...")
         with tarfile.open(tar_path, "r:gz") as tar:
-            tar.extractall(tmpdir)
+            # Filter to prevent path traversal attacks
+            safe_members = []
+            for member in tar.getmembers():
+                # Resolve the path and ensure it stays within tmpdir
+                member_path = Path(tmpdir) / member.name
+                try:
+                    member_path.resolve().relative_to(Path(tmpdir).resolve())
+                    safe_members.append(member)
+                except ValueError:
+                    logger.warning(f"Skipping unsafe path in archive: {member.name}")
+            tar.extractall(tmpdir, members=safe_members)
         # Find and process the extracted files
         extracted_dir = Path(tmpdir)
@@ -374,7 +385,12 @@ def get_recombination_rate_for_region(
         )
     # Filter to region
-    region_df = df[(df["pos"] >= start) & (df["pos"] <= end)].copy()
+    region_df = filter_by_region(
+        df,
+        region=(chrom, start, end),
+        chrom_col="",  # Recomb maps don't have chromosome column
+        pos_col="pos",
+    )
     return region_df[["pos", "rate"]]

pylocuszoom/schemas.py CHANGED Viewed

@@ -10,12 +10,7 @@ from typing import Optional, Union
 import pandas as pd
 from pydantic import BaseModel, ConfigDict, field_validator, model_validator
-class LoaderValidationError(Exception):
-    """Raised when loaded data fails validation."""
-    pass
+from .exceptions import LoaderValidationError
 # =============================================================================
 # GWAS Validation

pylocuszoom/utils.py CHANGED Viewed

@@ -8,6 +8,8 @@ from typing import TYPE_CHECKING, Any, List, Optional, Union
 import pandas as pd
+from .exceptions import ValidationError
 if TYPE_CHECKING:
     from pyspark.sql import DataFrame as SparkDataFrame
@@ -15,10 +17,6 @@ if TYPE_CHECKING:
 DataFrameLike = Union[pd.DataFrame, "SparkDataFrame", Any]
-class ValidationError(ValueError):
-    """Raised when input validation fails."""
 def is_spark_dataframe(df: Any) -> bool:
     """Check if object is a PySpark DataFrame.
@@ -106,6 +104,58 @@ def normalize_chrom(chrom: Union[int, str]) -> str:
     return str(chrom).replace("chr", "")
+def filter_by_region(
+    df: pd.DataFrame,
+    region: tuple,
+    chrom_col: str = "chrom",
+    pos_col: str = "pos",
+) -> pd.DataFrame:
+    """Filter DataFrame to genomic region with inclusive bounds.
+    Filters rows where position is within [start, end] (inclusive).
+    If chrom_col exists in DataFrame, also filters by chromosome.
+    Chromosome comparison normalizes types (int/str, chr prefix).
+    Args:
+        df: DataFrame to filter.
+        region: Tuple of (chrom, start, end) defining the region.
+        chrom_col: Column name for chromosome (default: "chrom").
+            If column doesn't exist, filters by position only.
+        pos_col: Column name for position (default: "pos").
+    Returns:
+        Filtered DataFrame (copy, not view).
+    Raises:
+        KeyError: If pos_col is not found in DataFrame.
+    Example:
+        >>> filtered = filter_by_region(df, region=(1, 1000000, 2000000))
+        >>> filtered = filter_by_region(df, region=("chr1", 1e6, 2e6), pos_col="position")
+    """
+    chrom, start, end = region
+    # Validate position column exists
+    if pos_col not in df.columns:
+        raise KeyError(
+            f"Position column '{pos_col}' not found in DataFrame. "
+            f"Available columns: {list(df.columns)}"
+        )
+    # Position filtering (inclusive bounds)
+    mask = (df[pos_col] >= start) & (df[pos_col] <= end)
+    # Chromosome filtering (if column exists)
+    if chrom_col in df.columns:
+        chrom_normalized = normalize_chrom(chrom)
+        df_chrom_normalized = (
+            df[chrom_col].astype(str).str.replace("chr", "", regex=False)
+        )
+        mask = mask & (df_chrom_normalized == chrom_normalized)
+    return df[mask].copy()
 def validate_dataframe(
     df: pd.DataFrame,
     required_cols: List[str],

pylocuszoom/validation.py ADDED Viewed

@@ -0,0 +1,223 @@
+"""DataFrame validation builder for pyLocusZoom.
+Provides a fluent API for validating pandas DataFrames with composable
+validation rules. Accumulates all validation errors before raising.
+"""
+from typing import List, Optional
+import pandas as pd
+from pandas.api.types import is_numeric_dtype
+from .utils import ValidationError
+class DataFrameValidator:
+    """Builder for composable DataFrame validation.
+    Validates DataFrames with method chaining and accumulates all errors
+    before raising. This enables clear, readable validation code with
+    comprehensive error messages.
+    Example:
+        >>> validator = DataFrameValidator(df, name="gwas_df")
+        >>> validator.require_columns(["chr", "pos", "p"])
+        ...     .require_numeric(["pos", "p"])
+        ...     .require_range("p", min_val=0, max_val=1)
+        ...     .validate()
+    """
+    def __init__(self, df: pd.DataFrame, name: str = "DataFrame"):
+        """Initialize validator.
+        Args:
+            df: DataFrame to validate.
+            name: Name for error messages (e.g., "gwas_df", "genes_df").
+        """
+        self._df = df
+        self._name = name
+        self._errors: List[str] = []
+    def require_columns(self, columns: List[str]) -> "DataFrameValidator":
+        """Check that required columns exist in DataFrame.
+        Args:
+            columns: List of required column names.
+        Returns:
+            Self for method chaining.
+        """
+        if not columns:
+            return self
+        missing = [col for col in columns if col not in self._df.columns]
+        if missing:
+            available = list(self._df.columns)
+            self._errors.append(f"Missing columns: {missing}. Available: {available}")
+        return self
+    def require_numeric(self, columns: List[str]) -> "DataFrameValidator":
+        """Check that columns have numeric dtype.
+        Skips columns that don't exist (checked separately by require_columns).
+        Args:
+            columns: List of column names that should be numeric.
+        Returns:
+            Self for method chaining.
+        """
+        for col in columns:
+            # Skip missing columns - let require_columns handle that
+            if col not in self._df.columns:
+                continue
+            if not is_numeric_dtype(self._df[col]):
+                actual_dtype = self._df[col].dtype
+                self._errors.append(
+                    f"Column '{col}' must be numeric, got {actual_dtype}"
+                )
+        return self
+    def require_range(
+        self,
+        column: str,
+        min_val: Optional[float] = None,
+        max_val: Optional[float] = None,
+        exclusive_min: bool = False,
+        exclusive_max: bool = False,
+    ) -> "DataFrameValidator":
+        """Check that column values are within specified range.
+        Args:
+            column: Column name to check.
+            min_val: Minimum allowed value (inclusive by default).
+            max_val: Maximum allowed value (inclusive by default).
+            exclusive_min: If True, minimum is exclusive (values must be > min_val).
+            exclusive_max: If True, maximum is exclusive (values must be < max_val).
+        Returns:
+            Self for method chaining.
+        """
+        # Skip missing columns
+        if column not in self._df.columns:
+            return self
+        col_data = self._df[column]
+        # Check minimum bound
+        if min_val is not None:
+            if exclusive_min:
+                invalid_count = (col_data <= min_val).sum()
+                if invalid_count > 0:
+                    self._errors.append(
+                        f"Column '{column}': {invalid_count} values <= {min_val}"
+                    )
+            else:
+                invalid_count = (col_data < min_val).sum()
+                if invalid_count > 0:
+                    self._errors.append(
+                        f"Column '{column}': {invalid_count} values < {min_val}"
+                    )
+        # Check maximum bound
+        if max_val is not None:
+            if exclusive_max:
+                invalid_count = (col_data >= max_val).sum()
+                if invalid_count > 0:
+                    self._errors.append(
+                        f"Column '{column}': {invalid_count} values >= {max_val}"
+                    )
+            else:
+                invalid_count = (col_data > max_val).sum()
+                if invalid_count > 0:
+                    self._errors.append(
+                        f"Column '{column}': {invalid_count} values > {max_val}"
+                    )
+        return self
+    def require_not_null(self, columns: List[str]) -> "DataFrameValidator":
+        """Check that columns have no null (NaN or None) values.
+        Args:
+            columns: List of column names to check for nulls.
+        Returns:
+            Self for method chaining.
+        """
+        for col in columns:
+            # Skip missing columns
+            if col not in self._df.columns:
+                continue
+            null_count = self._df[col].isna().sum()
+            if null_count > 0:
+                self._errors.append(f"Column '{col}' has {null_count} null values")
+        return self
+    def require_ci_ordering(
+        self,
+        ci_lower_col: str,
+        effect_col: str,
+        ci_upper_col: str,
+    ) -> "DataFrameValidator":
+        """Check that confidence intervals are properly ordered.
+        Validates that ci_lower <= effect <= ci_upper for all rows.
+        Invalid ordering would produce negative error bar lengths.
+        Args:
+            ci_lower_col: Column name for lower CI bound.
+            effect_col: Column name for effect size (point estimate).
+            ci_upper_col: Column name for upper CI bound.
+        Returns:
+            Self for method chaining.
+        """
+        # Skip if any column is missing
+        for col in [ci_lower_col, effect_col, ci_upper_col]:
+            if col not in self._df.columns:
+                return self
+        lower = self._df[ci_lower_col]
+        effect = self._df[effect_col]
+        upper = self._df[ci_upper_col]
+        # Check ci_lower <= effect
+        lower_gt_effect = (lower > effect).sum()
+        if lower_gt_effect > 0:
+            self._errors.append(
+                f"{lower_gt_effect} rows have {ci_lower_col} > {effect_col}"
+            )
+        # Check effect <= ci_upper
+        effect_gt_upper = (effect > upper).sum()
+        if effect_gt_upper > 0:
+            self._errors.append(
+                f"{effect_gt_upper} rows have {effect_col} > {ci_upper_col}"
+            )
+        # Check ci_lower <= ci_upper (implicit from above, but explicit is clearer)
+        lower_gt_upper = (lower > upper).sum()
+        if lower_gt_upper > 0:
+            self._errors.append(
+                f"{lower_gt_upper} rows have {ci_lower_col} > {ci_upper_col}"
+            )
+        return self
+    def validate(self) -> None:
+        """Raise ValidationError if any validation rules failed.
+        Raises:
+            ValidationError: If any validation errors were accumulated.
+                Error message includes all accumulated errors.
+        """
+        if self._errors:
+            error_msg = f"{self._name} validation failed:\n"
+            error_msg += "\n".join(f"  - {error}" for error in self._errors)
+            raise ValidationError(error_msg)

{pylocuszoom-0.6.0.dist-info → pylocuszoom-1.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,15 +1,15 @@
 Metadata-Version: 2.4
 Name: pylocuszoom
-Version: 0.6.0
+Version: 1.0.0
 Summary: Publication-ready regional association plots with LD coloring, gene tracks, and recombination overlays
 Project-URL: Homepage, https://github.com/michael-denyer/pylocuszoom
 Project-URL: Documentation, https://github.com/michael-denyer/pylocuszoom#readme
 Project-URL: Repository, https://github.com/michael-denyer/pylocuszoom
-Author: Michael Denyer
+Author-email: Michael Denyer <code.denyer@gmail.com>
 License-Expression: GPL-3.0-or-later
 License-File: LICENSE.md
 Keywords: genetics,gwas,locus-zoom,locuszoom,regional-plot,visualization
-Classifier: Development Status :: 3 - Alpha
+Classifier: Development Status :: 4 - Beta
 Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
 Classifier: Programming Language :: Python :: 3
@@ -44,20 +44,18 @@ Requires-Dist: pyspark>=3.0.0; extra == 'spark'
 Description-Content-Type: text/markdown
 [![CI](https://github.com/michael-denyer/pyLocusZoom/actions/workflows/ci.yml/badge.svg)](https://github.com/michael-denyer/pyLocusZoom/actions/workflows/ci.yml)
-[![codecov](https://codecov.io/gh/michael-denyer/pyLocusZoom/graph/badge.svg)](https://codecov.io/gh/michael-denyer/pyLocusZoom)
 [![PyPI](https://img.shields.io/pypi/v/pylocuszoom)](https://pypi.org/project/pylocuszoom/)
-[![Bioconda](https://img.shields.io/conda/vn/bioconda/pylocuszoom)](https://anaconda.org/bioconda/pylocuszoom)
 [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-red.svg)](https://www.gnu.org/licenses/gpl-3.0)
 [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
 [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
 [![Matplotlib](https://img.shields.io/badge/Matplotlib-3.5+-11557c.svg)](https://matplotlib.org/)
-[![Plotly](https://img.shields.io/badge/Plotly-5.0+-3F4F75.svg)](https://plotly.com/python/)
+[![Plotly](https://img.shields.io/badge/Plotly-5.15+-3F4F75.svg)](https://plotly.com/python/)
 [![Bokeh](https://img.shields.io/badge/Bokeh-3.8+-E6526F.svg)](https://bokeh.org/)
 [![Pandas](https://img.shields.io/badge/Pandas-1.4+-150458.svg)](https://pandas.pydata.org/)
 <img src="logo.svg" alt="pyLocusZoom logo" width="120" align="right">
 # pyLocusZoom
-Publication-ready regional association plots with LD coloring, gene tracks, and recombination overlays.
+Designed for publication-ready GWAS visualization with regional association plots, gene tracks, eQTL, PheWAS, fine-mapping, and forest plots.
 Inspired by [LocusZoom](http://locuszoom.org/) and [locuszoomr](https://github.com/myles-lewis/locuszoomr).
@@ -68,20 +66,22 @@ Inspired by [LocusZoom](http://locuszoom.org/) and [locuszoomr](https://github.c
     - **Multi-species support**: Built-in reference data for *Canis lupus familiaris* (CanFam3.1/CanFam4) and *Felis catus* (FelCat9), or optionally provide your own for any species
     - **LD coloring**: SNPs colored by linkage disequilibrium (R²) with lead variant
     - **Gene tracks**: Annotated gene/exon positions below the association plot
-    - **Recombination rate**: Overlay showing recombination rate across region (*Canis lupus familiaris* only)
-    - **SNP labels (matplotlib)**: Automatic labeling of lead SNPs with RS ID
-    - **Tooltips (Bokeh and Plotly)**: Mouseover for detailed SNP data
+    - **Recombination rate**: Optional overlay across region (*Canis lupus familiaris* built-in, not shown in example image)
+    - **SNP labels (matplotlib)**: Automatic labeling of top SNPs by p-value (RS IDs)
+    - **Hover tooltips (Plotly and Bokeh)**: Detailed SNP data on hover
-![Example regional association plot](examples/regional_plot.png)
+![Example regional association plot with LD coloring and gene track](examples/regional_plot.png)
+*Regional association plot with LD coloring, gene/exon track, and top SNP labels (recombination overlay disabled in example).*
 2. **Stacked plots**: Compare multiple GWAS/phenotypes vertically
 3. **eQTL plot**: Expression QTL data aligned with association plots and gene tracks
 4. **Fine-mapping plots**: Visualize SuSiE credible sets with posterior inclusion probabilities
 5. **PheWAS plots**: Phenome-wide association study visualization across multiple phenotypes
 6. **Forest plots**: Meta-analysis effect size visualization with confidence intervals
-7. **Multiple charting libraries**: matplotlib (static), plotly (interactive), bokeh (dashboards)
+7. **Multiple backends**: matplotlib (publication-ready), plotly (interactive), bokeh (dashboard integration)
 8. **Pandas and PySpark support**: Works with both Pandas and PySpark DataFrames for large-scale genomics data
 9. **Convenience data file loaders**: Load and validate common GWAS, eQTL and fine-mapping file formats
+10. **Automatic gene annotations**: Fetch gene/exon data from Ensembl REST API with caching (human, mouse, rat, canine, feline, and any Ensembl species)
 ## Installation
@@ -109,15 +109,14 @@ from pylocuszoom import LocusZoomPlotter
 # Initialize plotter (loads reference data for canine)
 plotter = LocusZoomPlotter(species="canine")
-# Create regional plot
+# Plot with parameters passed directly
 fig = plotter.plot(
-    gwas_df,                    # DataFrame with ps, p_wald, rs columns
+    gwas_df,                        # DataFrame with ps, p_wald, rs columns
     chrom=1,
     start=1000000,
     end=2000000,
-    lead_pos=1500000,           # Highlight lead SNP
+    lead_pos=1500000,               # Highlight lead SNP
 )
 fig.savefig("regional_plot.png", dpi=150)
 ```
@@ -137,9 +136,7 @@ fig = plotter.plot(
     start=1000000,
     end=2000000,
     lead_pos=1500000,
-    ld_reference_file="genotypes.bed",  # For LD calculation
-    genes_df=genes_df,                  # Gene annotations
-    exons_df=exons_df,                  # Exon annotations
+    ld_reference_file="genotypes",      # PLINK fileset (without extension)
     show_recombination=True,            # Overlay recombination rate
     snp_labels=True,                    # Label top SNPs
     label_top_n=5,                      # How many to label
@@ -147,6 +144,8 @@ fig = plotter.plot(
     p_col="p_wald",                     # Column name for p-value
     rs_col="rs",                        # Column name for SNP ID
     figsize=(12, 8),
+    genes_df=genes_df,                  # Gene annotations
+    exons_df=exons_df,                  # Exon annotations
 )
 ```
@@ -163,6 +162,8 @@ Recombination maps are automatically lifted over from CanFam3.1 to CanFam4 coord
 ## Using with Other Species
 ```python
+from pylocuszoom import LocusZoomPlotter
 # Feline (LD and gene tracks, user provides recombination data)
 plotter = LocusZoomPlotter(species="feline")
@@ -172,37 +173,61 @@ plotter = LocusZoomPlotter(
     recomb_data_dir="/path/to/recomb_maps/",
 )
-# Or provide data per-plot
+# Provide data per-plot
 fig = plotter.plot(
     gwas_df,
-    chrom=1, start=1000000, end=2000000,
+    chrom=1,
+    start=1000000,
+    end=2000000,
     recomb_df=my_recomb_dataframe,
     genes_df=my_genes_df,
 )
 ```
+## Automatic Gene Annotations
+pyLocusZoom can automatically fetch gene annotations from Ensembl for any species:
+```python
+from pylocuszoom import LocusZoomPlotter
+# Enable automatic gene fetching
+plotter = LocusZoomPlotter(species="human", auto_genes=True)
+# No need to provide genes_df - fetched automatically
+fig = plotter.plot(gwas_df, chrom=13, start=32000000, end=33000000)
+```
+Supported species aliases: `human`, `mouse`, `rat`, `canine`/`dog`, `feline`/`cat`, or any Ensembl species name.
+Data is cached locally for fast subsequent plots. Maximum region size is 5Mb (Ensembl API limit).
 ## Backends
-pyLocusZoom supports multiple rendering backends:
+pyLocusZoom supports multiple rendering backends (set at initialization):
 ```python
+from pylocuszoom import LocusZoomPlotter
 # Static publication-quality plot (default)
-fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000, backend="matplotlib")
+plotter = LocusZoomPlotter(species="canine", backend="matplotlib")
+fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000)
 fig.savefig("plot.png", dpi=150)
 # Interactive Plotly (hover tooltips, pan/zoom)
-fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000, backend="plotly")
+plotter = LocusZoomPlotter(species="canine", backend="plotly")
+fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000)
 fig.write_html("plot.html")
 # Interactive Bokeh (dashboard-ready)
-fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000, backend="bokeh")
+plotter = LocusZoomPlotter(species="canine", backend="bokeh")
+fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000)
 ```
 | Backend | Output | Best For | Features |
 |---------|--------|----------|----------|
-| `matplotlib` | Static PNG/PDF/SVG | Publications, presentations | Full feature set with SNP labels |
-| `plotly` | Interactive HTML | Web reports, data exploration | Hover tooltips, pan/zoom |
-| `bokeh` | Interactive HTML | Dashboards, web apps | Hover tooltips, pan/zoom |
+| `matplotlib` | Static PNG/PDF/SVG | Publication-ready figures | Full feature set with SNP labels |
+| `plotly` | Interactive HTML | Web reports, exploration | Hover tooltips, pan/zoom |
+| `bokeh` | Interactive HTML | Dashboard integration | Hover tooltips, pan/zoom |
 > **Note:** All backends support scatter plots, gene tracks, recombination overlay, and LD legend. SNP labels (auto-positioned with adjustText) are matplotlib-only; interactive backends use hover tooltips instead.
@@ -211,6 +236,10 @@ fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000, backend="bokeh"
 Compare multiple GWAS results vertically with shared x-axis:
 ```python
+from pylocuszoom import LocusZoomPlotter
+plotter = LocusZoomPlotter(species="canine")
 fig = plotter.plot_stacked(
     [gwas_height, gwas_bmi, gwas_whr],
     chrom=1,
@@ -221,22 +250,29 @@ fig = plotter.plot_stacked(
 )
 ```
-![Example stacked plot](examples/stacked_plot.png)
+![Example stacked plot comparing two phenotypes](examples/stacked_plot.png)
+*Stacked plot comparing two phenotypes with LD coloring and shared gene track.*
 ## eQTL Overlay
 Add expression QTL data as a separate panel:
 ```python
+from pylocuszoom import LocusZoomPlotter
 eqtl_df = pd.DataFrame({
     "pos": [1000500, 1001200, 1002000],
     "p_value": [1e-6, 1e-4, 0.01],
     "gene": ["BRCA1", "BRCA1", "BRCA1"],
 })
+plotter = LocusZoomPlotter(species="canine")
 fig = plotter.plot_stacked(
     [gwas_df],
-    chrom=1, start=1000000, end=2000000,
+    chrom=1,
+    start=1000000,
+    end=2000000,
     eqtl_df=eqtl_df,
     eqtl_gene="BRCA1",
     genes_df=genes_df,
@@ -244,21 +280,28 @@ fig = plotter.plot_stacked(
 ```
 ![Example eQTL overlay plot](examples/eqtl_overlay.png)
+*eQTL overlay with effect direction (up/down triangles) and magnitude binning.*
 ## Fine-mapping Visualization
 Visualize SuSiE or other fine-mapping results with credible set coloring:
 ```python
+from pylocuszoom import LocusZoomPlotter
 finemapping_df = pd.DataFrame({
     "pos": [1000500, 1001200, 1002000, 1003500],
     "pip": [0.85, 0.12, 0.02, 0.45],  # Posterior inclusion probability
     "cs": [1, 1, 0, 2],               # Credible set assignment (0 = not in CS)
 })
+plotter = LocusZoomPlotter(species="canine")
 fig = plotter.plot_stacked(
     [gwas_df],
-    chrom=1, start=1000000, end=2000000,
+    chrom=1,
+    start=1000000,
+    end=2000000,
     finemapping_df=finemapping_df,
     finemapping_cs_col="cs",
     genes_df=genes_df,
@@ -266,6 +309,7 @@ fig = plotter.plot_stacked(
 ```
 ![Example fine-mapping plot](examples/finemapping_plot.png)
+*Fine-mapping visualization with PIP line and credible set coloring (CS1/CS2).*
 ## PheWAS Plots
@@ -286,6 +330,7 @@ fig = plotter.plot_phewas(
 ```
 ![Example PheWAS plot](examples/phewas_plot.png)
+*PheWAS plot showing associations across phenotype categories with significance threshold.*
 ## Forest Plots
@@ -308,19 +353,18 @@ fig = plotter.plot_forest(
 ```
 ![Example forest plot](examples/forest_plot.png)
+*Forest plot with effect sizes, confidence intervals, and weight-proportional markers.*
 ## PySpark Support
-For large-scale genomics data, pass PySpark DataFrames directly:
+For large-scale genomics data, convert PySpark DataFrames with `to_pandas()` before plotting:
 ```python
 from pylocuszoom import LocusZoomPlotter, to_pandas
-# PySpark DataFrame (automatically converted)
-fig = plotter.plot(spark_gwas_df, chrom=1, start=1000000, end=2000000)
-# Or convert manually with sampling for very large data
+# Convert PySpark DataFrame (optionally sampled for very large data)
 pandas_df = to_pandas(spark_gwas_df, sample_size=100000)
+fig = plotter.plot(pandas_df, chrom=1, start=1000000, end=2000000)
 ```
 Install PySpark support: `uv add pylocuszoom[spark]`
@@ -393,7 +437,7 @@ gwas_df = pd.DataFrame({
 |--------|------|----------|-------------|
 | `chr` | str or int | Yes | Chromosome identifier. Accepts "1", "chr1", or 1. The "chr" prefix is stripped for matching. |
 | `start` | int | Yes | Gene start position (bp, 1-based). Transcript start for strand-aware genes. |
-| `end` | int | Yes | Gene end position (bp, 1-based). Must be ≥ start. |
+| `end` | int | Yes | Gene end position (bp, 1-based). Must be >= start. |
 | `gene_name` | str | Yes | Gene symbol displayed in track (e.g., "BRCA1", "TP53"). Keep short for readability. |
 Example:
@@ -495,6 +539,7 @@ Optional:
 ## Documentation
 - [User Guide](docs/USER_GUIDE.md) - Comprehensive documentation with API reference
+- [Code Map](docs/CODEMAP.md) - Architecture diagram with source code links
 - [Architecture](docs/ARCHITECTURE.md) - Design decisions and component overview
 - [Example Notebook](examples/getting_started.ipynb) - Interactive tutorial
 - [CHANGELOG](CHANGELOG.md) - Version history

pylocuszoom 0.6.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

pylocuszoom 0.6.0py3-none-any.whl → 1.0.0py3-none-any.whl