pylocuszoom 0.2.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pylocuszoom/py.typed ADDED
File without changes
@@ -42,7 +42,7 @@ def _normalize_build(build: Optional[str]) -> Optional[str]:
42
42
  if build is None:
43
43
  return None
44
44
  build_lower = build.lower().replace(".", "").replace("_", "")
45
- if "canfam4" in build_lower or "uucfamgsd" in build_lower:
45
+ if any(x in build_lower for x in ("canfam4", "uucfamgsd")):
46
46
  return "canfam4"
47
47
  if "canfam3" in build_lower:
48
48
  return "canfam3"
@@ -158,9 +158,9 @@ def get_default_data_dir() -> Path:
158
158
  """Get default directory for recombination map data.
159
159
 
160
160
  Returns platform-appropriate cache directory:
161
- - macOS: ~/Library/Caches/snp-scope-plot
162
- - Linux: ~/.cache/snp-scope-plot
161
+ - macOS/Linux: ~/.cache/snp-scope-plot (or $XDG_CACHE_HOME if set)
163
162
  - Windows: %LOCALAPPDATA%/snp-scope-plot
163
+ - Databricks: /dbfs/FileStore/reference_data/recombination_maps
164
164
  """
165
165
  if os.name == "nt": # Windows
166
166
  base = Path(os.environ.get("LOCALAPPDATA", Path.home()))
@@ -207,7 +207,7 @@ def download_canine_recombination_maps(
207
207
  # Check if already downloaded
208
208
  if output_path.exists() and not force:
209
209
  existing_files = list(output_path.glob("chr*_recomb.tsv"))
210
- if len(existing_files) >= 38: # 38 autosomes + X
210
+ if len(existing_files) >= 39: # 38 autosomes + X
211
211
  return output_path
212
212
 
213
213
  # Create output directory
pylocuszoom/schemas.py ADDED
@@ -0,0 +1,395 @@
1
+ """Pydantic validation schemas for loaded data.
2
+
3
+ Provides validation models for GWAS, eQTL, fine-mapping, and gene annotation
4
+ DataFrames to ensure data quality before plotting.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import Optional, Union
9
+
10
+ import pandas as pd
11
+ from pydantic import BaseModel, ConfigDict, field_validator, model_validator
12
+
13
+
14
+ class LoaderValidationError(Exception):
15
+ """Raised when loaded data fails validation."""
16
+
17
+ pass
18
+
19
+
20
+ # =============================================================================
21
+ # GWAS Validation
22
+ # =============================================================================
23
+
24
+
25
+ class GWASRowModel(BaseModel):
26
+ """Validation model for a single GWAS row."""
27
+
28
+ model_config = ConfigDict(extra="allow")
29
+
30
+ ps: int
31
+ p_wald: float
32
+ rs: Optional[str] = None
33
+ chr: Optional[Union[str, int]] = None
34
+
35
+ @field_validator("ps")
36
+ @classmethod
37
+ def position_positive(cls, v: int) -> int:
38
+ """Position must be positive."""
39
+ if v <= 0:
40
+ raise ValueError(f"Position must be positive, got {v}")
41
+ return v
42
+
43
+ @field_validator("p_wald")
44
+ @classmethod
45
+ def pvalue_in_range(cls, v: float) -> float:
46
+ """P-value must be between 0 and 1."""
47
+ if not (0 < v <= 1):
48
+ raise ValueError(f"P-value must be in range (0, 1], got {v}")
49
+ return v
50
+
51
+
52
+ def validate_gwas_dataframe(
53
+ df: pd.DataFrame,
54
+ pos_col: str = "ps",
55
+ p_col: str = "p_wald",
56
+ rs_col: str = "rs",
57
+ strict: bool = False,
58
+ ) -> pd.DataFrame:
59
+ """Validate a GWAS DataFrame.
60
+
61
+ Args:
62
+ df: DataFrame to validate.
63
+ pos_col: Column name for position.
64
+ p_col: Column name for p-value.
65
+ rs_col: Column name for SNP ID.
66
+ strict: If True, validate every row. If False (default), validate schema only.
67
+
68
+ Returns:
69
+ Validated DataFrame.
70
+
71
+ Raises:
72
+ LoaderValidationError: If validation fails.
73
+ """
74
+ errors = []
75
+
76
+ # Check required columns exist
77
+ if pos_col not in df.columns:
78
+ errors.append(f"Missing required column: '{pos_col}'")
79
+ if p_col not in df.columns:
80
+ errors.append(f"Missing required column: '{p_col}'")
81
+
82
+ if errors:
83
+ raise LoaderValidationError(
84
+ "GWAS validation failed:\n - " + "\n - ".join(errors)
85
+ )
86
+
87
+ # Check data types
88
+ if not pd.api.types.is_numeric_dtype(df[pos_col]):
89
+ errors.append(f"Column '{pos_col}' must be numeric, got {df[pos_col].dtype}")
90
+
91
+ if not pd.api.types.is_numeric_dtype(df[p_col]):
92
+ errors.append(f"Column '{p_col}' must be numeric, got {df[p_col].dtype}")
93
+
94
+ # Check value ranges
95
+ if (df[pos_col] <= 0).any():
96
+ n_invalid = (df[pos_col] <= 0).sum()
97
+ errors.append(f"Column '{pos_col}' has {n_invalid} non-positive values")
98
+
99
+ if ((df[p_col] <= 0) | (df[p_col] > 1)).any():
100
+ n_invalid = ((df[p_col] <= 0) | (df[p_col] > 1)).sum()
101
+ errors.append(f"Column '{p_col}' has {n_invalid} values outside range (0, 1]")
102
+
103
+ # Check for NaN in required columns
104
+ if df[pos_col].isna().any():
105
+ n_na = df[pos_col].isna().sum()
106
+ errors.append(f"Column '{pos_col}' has {n_na} missing values")
107
+
108
+ if df[p_col].isna().any():
109
+ n_na = df[p_col].isna().sum()
110
+ errors.append(f"Column '{p_col}' has {n_na} missing values")
111
+
112
+ if errors:
113
+ raise LoaderValidationError(
114
+ "GWAS validation failed:\n - " + "\n - ".join(errors)
115
+ )
116
+
117
+ return df
118
+
119
+
120
+ # =============================================================================
121
+ # eQTL Validation
122
+ # =============================================================================
123
+
124
+
125
+ class EQTLRowModel(BaseModel):
126
+ """Validation model for a single eQTL row."""
127
+
128
+ model_config = ConfigDict(extra="allow")
129
+
130
+ pos: int
131
+ p_value: float
132
+ gene: str
133
+ effect: Optional[float] = None
134
+
135
+ @field_validator("pos")
136
+ @classmethod
137
+ def position_positive(cls, v: int) -> int:
138
+ """Position must be positive."""
139
+ if v <= 0:
140
+ raise ValueError(f"Position must be positive, got {v}")
141
+ return v
142
+
143
+ @field_validator("p_value")
144
+ @classmethod
145
+ def pvalue_in_range(cls, v: float) -> float:
146
+ """P-value must be between 0 and 1."""
147
+ if not (0 < v <= 1):
148
+ raise ValueError(f"P-value must be in range (0, 1], got {v}")
149
+ return v
150
+
151
+
152
+ def validate_eqtl_dataframe(
153
+ df: pd.DataFrame,
154
+ strict: bool = False,
155
+ ) -> pd.DataFrame:
156
+ """Validate an eQTL DataFrame.
157
+
158
+ Args:
159
+ df: DataFrame to validate.
160
+ strict: If True, validate every row.
161
+
162
+ Returns:
163
+ Validated DataFrame.
164
+
165
+ Raises:
166
+ LoaderValidationError: If validation fails.
167
+ """
168
+ errors = []
169
+
170
+ # Check required columns
171
+ required = ["pos", "p_value", "gene"]
172
+ for col in required:
173
+ if col not in df.columns:
174
+ errors.append(f"Missing required column: '{col}'")
175
+
176
+ if errors:
177
+ raise LoaderValidationError(
178
+ "eQTL validation failed:\n - " + "\n - ".join(errors)
179
+ )
180
+
181
+ # Check data types and ranges
182
+ if not pd.api.types.is_numeric_dtype(df["pos"]):
183
+ errors.append(f"Column 'pos' must be numeric, got {df['pos'].dtype}")
184
+ elif (df["pos"] <= 0).any():
185
+ n_invalid = (df["pos"] <= 0).sum()
186
+ errors.append(f"Column 'pos' has {n_invalid} non-positive values")
187
+
188
+ if not pd.api.types.is_numeric_dtype(df["p_value"]):
189
+ errors.append(f"Column 'p_value' must be numeric, got {df['p_value'].dtype}")
190
+ elif ((df["p_value"] <= 0) | (df["p_value"] > 1)).any():
191
+ n_invalid = ((df["p_value"] <= 0) | (df["p_value"] > 1)).sum()
192
+ errors.append(f"Column 'p_value' has {n_invalid} values outside range (0, 1]")
193
+
194
+ if errors:
195
+ raise LoaderValidationError(
196
+ "eQTL validation failed:\n - " + "\n - ".join(errors)
197
+ )
198
+
199
+ return df
200
+
201
+
202
+ # =============================================================================
203
+ # Fine-mapping Validation
204
+ # =============================================================================
205
+
206
+
207
+ class FinemappingRowModel(BaseModel):
208
+ """Validation model for a single fine-mapping row."""
209
+
210
+ model_config = ConfigDict(extra="allow")
211
+
212
+ pos: int
213
+ pip: float
214
+ cs: Optional[int] = None
215
+
216
+ @field_validator("pos")
217
+ @classmethod
218
+ def position_positive(cls, v: int) -> int:
219
+ """Position must be positive."""
220
+ if v <= 0:
221
+ raise ValueError(f"Position must be positive, got {v}")
222
+ return v
223
+
224
+ @field_validator("pip")
225
+ @classmethod
226
+ def pip_in_range(cls, v: float) -> float:
227
+ """PIP must be between 0 and 1."""
228
+ if not (0 <= v <= 1):
229
+ raise ValueError(f"PIP must be in range [0, 1], got {v}")
230
+ return v
231
+
232
+
233
+ def validate_finemapping_dataframe(
234
+ df: pd.DataFrame,
235
+ cs_col: str = "cs",
236
+ strict: bool = False,
237
+ ) -> pd.DataFrame:
238
+ """Validate a fine-mapping DataFrame.
239
+
240
+ Args:
241
+ df: DataFrame to validate.
242
+ cs_col: Column name for credible set.
243
+ strict: If True, validate every row.
244
+
245
+ Returns:
246
+ Validated DataFrame.
247
+
248
+ Raises:
249
+ LoaderValidationError: If validation fails.
250
+ """
251
+ errors = []
252
+
253
+ # Check required columns
254
+ if "pos" not in df.columns:
255
+ errors.append("Missing required column: 'pos'")
256
+ if "pip" not in df.columns:
257
+ errors.append("Missing required column: 'pip'")
258
+
259
+ if errors:
260
+ raise LoaderValidationError(
261
+ "Fine-mapping validation failed:\n - " + "\n - ".join(errors)
262
+ )
263
+
264
+ # Check data types and ranges
265
+ if not pd.api.types.is_numeric_dtype(df["pos"]):
266
+ errors.append(f"Column 'pos' must be numeric, got {df['pos'].dtype}")
267
+ elif (df["pos"] <= 0).any():
268
+ n_invalid = (df["pos"] <= 0).sum()
269
+ errors.append(f"Column 'pos' has {n_invalid} non-positive values")
270
+
271
+ if not pd.api.types.is_numeric_dtype(df["pip"]):
272
+ errors.append(f"Column 'pip' must be numeric, got {df['pip'].dtype}")
273
+ elif ((df["pip"] < 0) | (df["pip"] > 1)).any():
274
+ n_invalid = ((df["pip"] < 0) | (df["pip"] > 1)).sum()
275
+ errors.append(f"Column 'pip' has {n_invalid} values outside range [0, 1]")
276
+
277
+ if errors:
278
+ raise LoaderValidationError(
279
+ "Fine-mapping validation failed:\n - " + "\n - ".join(errors)
280
+ )
281
+
282
+ return df
283
+
284
+
285
+ # =============================================================================
286
+ # Gene Annotation Validation
287
+ # =============================================================================
288
+
289
+
290
+ class GeneRowModel(BaseModel):
291
+ """Validation model for a single gene annotation row."""
292
+
293
+ model_config = ConfigDict(extra="allow")
294
+
295
+ chr: Union[str, int]
296
+ start: int
297
+ end: int
298
+ gene_name: str
299
+ strand: Optional[str] = None
300
+
301
+ @field_validator("start", "end")
302
+ @classmethod
303
+ def position_positive(cls, v: int) -> int:
304
+ """Position must be positive."""
305
+ if v < 0:
306
+ raise ValueError(f"Position must be non-negative, got {v}")
307
+ return v
308
+
309
+ @model_validator(mode="after")
310
+ def start_before_end(self):
311
+ """Start must be <= end."""
312
+ if self.start > self.end:
313
+ raise ValueError(f"Start ({self.start}) must be <= end ({self.end})")
314
+ return self
315
+
316
+
317
+ def validate_genes_dataframe(
318
+ df: pd.DataFrame,
319
+ strict: bool = False,
320
+ ) -> pd.DataFrame:
321
+ """Validate a genes DataFrame.
322
+
323
+ Args:
324
+ df: DataFrame to validate.
325
+ strict: If True, validate every row.
326
+
327
+ Returns:
328
+ Validated DataFrame.
329
+
330
+ Raises:
331
+ LoaderValidationError: If validation fails.
332
+ """
333
+ errors = []
334
+
335
+ # Check required columns
336
+ required = ["chr", "start", "end", "gene_name"]
337
+ for col in required:
338
+ if col not in df.columns:
339
+ errors.append(f"Missing required column: '{col}'")
340
+
341
+ if errors:
342
+ raise LoaderValidationError(
343
+ "Gene annotation validation failed:\n - " + "\n - ".join(errors)
344
+ )
345
+
346
+ # Check data types
347
+ if not pd.api.types.is_numeric_dtype(df["start"]):
348
+ errors.append(f"Column 'start' must be numeric, got {df['start'].dtype}")
349
+
350
+ if not pd.api.types.is_numeric_dtype(df["end"]):
351
+ errors.append(f"Column 'end' must be numeric, got {df['end'].dtype}")
352
+
353
+ # Check ranges
354
+ if (df["start"] < 0).any():
355
+ n_invalid = (df["start"] < 0).sum()
356
+ errors.append(f"Column 'start' has {n_invalid} negative values")
357
+
358
+ if (df["end"] < df["start"]).any():
359
+ n_invalid = (df["end"] < df["start"]).sum()
360
+ errors.append(f"Found {n_invalid} genes where end < start")
361
+
362
+ if errors:
363
+ raise LoaderValidationError(
364
+ "Gene annotation validation failed:\n - " + "\n - ".join(errors)
365
+ )
366
+
367
+ return df
368
+
369
+
370
+ # =============================================================================
371
+ # File Path Validation
372
+ # =============================================================================
373
+
374
+
375
+ def validate_file_path(filepath: Union[str, Path]) -> Path:
376
+ """Validate that a file path exists and is readable.
377
+
378
+ Args:
379
+ filepath: Path to validate.
380
+
381
+ Returns:
382
+ Validated Path object.
383
+
384
+ Raises:
385
+ LoaderValidationError: If file doesn't exist or isn't readable.
386
+ """
387
+ path = Path(filepath)
388
+
389
+ if not path.exists():
390
+ raise LoaderValidationError(f"File not found: {path}")
391
+
392
+ if not path.is_file():
393
+ raise LoaderValidationError(f"Not a file: {path}")
394
+
395
+ return path
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pylocuszoom
3
- Version: 0.2.0
4
- Summary: Regional association plots for GWAS results with LD coloring, gene tracks, and recombination rate overlays
3
+ Version: 0.5.0
4
+ Summary: Publication-ready regional association plots with LD coloring, gene tracks, and recombination overlays
5
5
  Project-URL: Homepage, https://github.com/michael-denyer/pylocuszoom
6
6
  Project-URL: Documentation, https://github.com/michael-denyer/pylocuszoom#readme
7
7
  Project-URL: Repository, https://github.com/michael-denyer/pylocuszoom
@@ -19,6 +19,7 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
20
20
  Classifier: Topic :: Scientific/Engineering :: Visualization
21
21
  Requires-Python: >=3.10
22
+ Requires-Dist: adjusttext>=0.8
22
23
  Requires-Dist: bokeh>=3.8.2
23
24
  Requires-Dist: kaleido>=0.2.0
24
25
  Requires-Dist: loguru>=0.7.0
@@ -26,63 +27,72 @@ Requires-Dist: matplotlib>=3.5.0
26
27
  Requires-Dist: numpy>=1.21.0
27
28
  Requires-Dist: pandas>=1.4.0
28
29
  Requires-Dist: plotly>=5.0.0
30
+ Requires-Dist: pydantic>=2.0.0
29
31
  Requires-Dist: pyliftover>=0.4
30
32
  Provides-Extra: all
31
- Requires-Dist: adjusttext>=0.8; extra == 'all'
32
33
  Requires-Dist: pyspark>=3.0.0; extra == 'all'
33
34
  Provides-Extra: dev
34
35
  Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
35
36
  Requires-Dist: pytest>=7.0.0; extra == 'dev'
36
37
  Requires-Dist: ruff>=0.1.0; extra == 'dev'
37
- Provides-Extra: labels
38
- Requires-Dist: adjusttext>=0.8; extra == 'labels'
39
38
  Provides-Extra: spark
40
39
  Requires-Dist: pyspark>=3.0.0; extra == 'spark'
41
40
  Description-Content-Type: text/markdown
42
41
 
43
- # pyLocusZoom
44
-
45
42
  [![CI](https://github.com/michael-denyer/pyLocusZoom/actions/workflows/ci.yml/badge.svg)](https://github.com/michael-denyer/pyLocusZoom/actions/workflows/ci.yml)
46
- [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
43
+ [![codecov](https://codecov.io/gh/michael-denyer/pyLocusZoom/graph/badge.svg)](https://codecov.io/gh/michael-denyer/pyLocusZoom)
44
+ [![PyPI](https://img.shields.io/pypi/v/pylocuszoom)](https://pypi.org/project/pylocuszoom/)
45
+ [![Bioconda](https://img.shields.io/conda/vn/bioconda/pylocuszoom)](https://anaconda.org/bioconda/pylocuszoom)
46
+ [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-red.svg)](https://www.gnu.org/licenses/gpl-3.0)
47
47
  [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
48
48
  [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
49
-
50
49
  [![Matplotlib](https://img.shields.io/badge/Matplotlib-3.5+-11557c.svg)](https://matplotlib.org/)
51
50
  [![Plotly](https://img.shields.io/badge/Plotly-5.0+-3F4F75.svg)](https://plotly.com/python/)
52
51
  [![Bokeh](https://img.shields.io/badge/Bokeh-3.8+-E6526F.svg)](https://bokeh.org/)
53
52
  [![Pandas](https://img.shields.io/badge/Pandas-1.4+-150458.svg)](https://pandas.pydata.org/)
54
-
55
53
  <img src="logo.svg" alt="pyLocusZoom logo" width="120" align="right">
54
+ # pyLocusZoom
56
55
 
57
- Regional association plots for GWAS results with LD coloring, gene tracks, and recombination rate overlays.
56
+ Publication-ready regional association plots with LD coloring, gene tracks, and recombination overlays.
58
57
 
59
58
  Inspired by [LocusZoom](http://locuszoom.org/) and [locuszoomr](https://github.com/myles-lewis/locuszoomr).
60
59
 
61
60
  ## Features
62
61
 
63
- - **LD coloring**: SNPs colored by linkage disequilibrium (R²) with lead variant
64
- - **Gene track**: Annotated gene/exon positions below the association plot
65
- - **Recombination rate**: Overlay showing recombination rate across region (*Canis lupus familiaris* only)
66
- - **SNP labels**: Automatic labeling of top SNPs with RS ID or nearest gene
67
- - **Species support**: Built-in *Canis lupus familiaris* (CanFam3.1/CanFam4), *Felis catus* (FelCat9), or custom species
68
- - **CanFam4 support**: Automatic coordinate liftover for recombination maps
69
- - **Multiple backends**: matplotlib (static), plotly (interactive), bokeh (dashboards)
70
- - **Stacked plots**: Compare multiple GWAS/phenotypes vertically
71
- - **eQTL overlay**: Expression QTL data as separate panel
72
- - **PySpark support**: Handles large-scale genomics DataFrames
62
+ 1. **Regional association plot**:
63
+
64
+ - **Multi-species support**: Built-in reference data for *Canis lupus familiaris* (CanFam3.1/CanFam4) and *Felis catus* (FelCat9), or optionally provide your own for any species
65
+ - **LD coloring**: SNPs colored by linkage disequilibrium (R²) with lead variant
66
+ - **Gene tracks**: Annotated gene/exon positions below the association plot
67
+ - **Recombination rate**: Overlay showing recombination rate across region (*Canis lupus familiaris* only)
68
+ - **SNP labels (matplotlib)**: Automatic labeling of lead SNPs with RS ID
69
+ - **Tooltips (Bokeh and Plotly)**: Mouseover for detailed SNP data
73
70
 
74
71
  ![Example regional association plot](examples/regional_plot.png)
75
72
 
73
+ 2. **Stacked plots**: Compare multiple GWAS/phenotypes vertically
74
+ 3. **eQTL plot**: Expression QTL data aligned with association plots and gene tracks
75
+ 4. **Fine-mapping plots**: Visualize SuSiE credible sets with posterior inclusion probabilities
76
+ 5. **Multiple charting libraries**: matplotlib (static), plotly (interactive), bokeh (dashboards)
77
+ 6. **Pandas and PySpark support**: Works with both Pandas and PySpark DataFrames for large-scale genomics data
78
+ 7. **Convenience data file loaders**: Load and validate common GWAS, eQTL and fine-mapping file formats
79
+
76
80
  ## Installation
77
81
 
82
+ ```bash
83
+ pip install pylocuszoom
84
+ ```
85
+
86
+ Or with uv:
87
+
78
88
  ```bash
79
89
  uv add pylocuszoom
80
90
  ```
81
91
 
82
- Or with pip:
92
+ Or with conda (Bioconda):
83
93
 
84
94
  ```bash
85
- pip install pylocuszoom
95
+ conda install -c bioconda pylocuszoom
86
96
  ```
87
97
 
88
98
  ## Quick Start
@@ -165,20 +175,30 @@ fig = plotter.plot(
165
175
  )
166
176
  ```
167
177
 
168
- ## Interactive Backends (Coming Soon)
178
+ ## Backends
169
179
 
170
- > **Note:** Interactive backends (plotly, bokeh) are planned but not yet fully integrated. Currently all plots use matplotlib.
180
+ pyLocusZoom supports multiple rendering backends:
171
181
 
172
182
  ```python
173
- # Static publication-quality plot (default, currently only supported backend)
174
- plotter = LocusZoomPlotter(species="canine", backend="matplotlib")
175
- fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000)
183
+ # Static publication-quality plot (default)
184
+ fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000, backend="matplotlib")
176
185
  fig.savefig("plot.png", dpi=150)
186
+
187
+ # Interactive Plotly (hover tooltips, pan/zoom)
188
+ fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000, backend="plotly")
189
+ fig.write_html("plot.html")
190
+
191
+ # Interactive Bokeh (dashboard-ready)
192
+ fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000, backend="bokeh")
177
193
  ```
178
194
 
179
- Future releases will support:
180
- - **Plotly**: Interactive plots with hover tooltips, zoom/pan
181
- - **Bokeh**: Dashboard-friendly interactive plots
195
+ | Backend | Output | Best For | Features |
196
+ |---------|--------|----------|----------|
197
+ | `matplotlib` | Static PNG/PDF/SVG | Publications, presentations | Full feature set with SNP labels |
198
+ | `plotly` | Interactive HTML | Web reports, data exploration | Hover tooltips, pan/zoom |
199
+ | `bokeh` | Interactive HTML | Dashboards, web apps | Hover tooltips, pan/zoom |
200
+
201
+ > **Note:** All backends support scatter plots, gene tracks, recombination overlay, and LD legend. SNP labels (auto-positioned with adjustText) are matplotlib-only; interactive backends use hover tooltips instead.
182
202
 
183
203
  ## Stacked Plots
184
204
 
@@ -195,6 +215,8 @@ fig = plotter.plot_stacked(
195
215
  )
196
216
  ```
197
217
 
218
+ ![Example stacked plot](examples/stacked_plot.png)
219
+
198
220
  ## eQTL Overlay
199
221
 
200
222
  Add expression QTL data as a separate panel:
@@ -215,6 +237,30 @@ fig = plotter.plot_stacked(
215
237
  )
216
238
  ```
217
239
 
240
+ ![Example eQTL overlay plot](examples/eqtl_overlay.png)
241
+
242
+ ## Fine-mapping Visualization
243
+
244
+ Visualize SuSiE or other fine-mapping results with credible set coloring:
245
+
246
+ ```python
247
+ finemapping_df = pd.DataFrame({
248
+ "pos": [1000500, 1001200, 1002000, 1003500],
249
+ "pip": [0.85, 0.12, 0.02, 0.45], # Posterior inclusion probability
250
+ "cs": [1, 1, 0, 2], # Credible set assignment (0 = not in CS)
251
+ })
252
+
253
+ fig = plotter.plot_stacked(
254
+ [gwas_df],
255
+ chrom=1, start=1000000, end=2000000,
256
+ finemapping_df=finemapping_df,
257
+ finemapping_cs_col="cs",
258
+ genes_df=genes_df,
259
+ )
260
+ ```
261
+
262
+ ![Example fine-mapping plot](examples/finemapping_plot.png)
263
+
218
264
  ## PySpark Support
219
265
 
220
266
  For large-scale genomics data, pass PySpark DataFrames directly:
@@ -231,6 +277,47 @@ pandas_df = to_pandas(spark_gwas_df, sample_size=100000)
231
277
 
232
278
  Install PySpark support: `uv add pylocuszoom[spark]`
233
279
 
280
+ ## Loading Data from Files
281
+
282
+ pyLocusZoom includes loaders for common GWAS, eQTL, and fine-mapping file formats:
283
+
284
+ ```python
285
+ from pylocuszoom import (
286
+ # GWAS loaders
287
+ load_gwas, # Auto-detect format
288
+ load_plink_assoc, # PLINK .assoc, .assoc.linear, .qassoc
289
+ load_regenie, # REGENIE .regenie
290
+ load_bolt_lmm, # BOLT-LMM .stats
291
+ load_gemma, # GEMMA .assoc.txt
292
+ load_saige, # SAIGE output
293
+ # eQTL loaders
294
+ load_gtex_eqtl, # GTEx significant pairs
295
+ load_eqtl_catalogue, # eQTL Catalogue format
296
+ # Fine-mapping loaders
297
+ load_susie, # SuSiE output
298
+ load_finemap, # FINEMAP .snp output
299
+ # Gene annotations
300
+ load_gtf, # GTF/GFF3 files
301
+ load_bed, # BED files
302
+ )
303
+
304
+ # Auto-detect GWAS format from filename
305
+ gwas_df = load_gwas("results.assoc.linear")
306
+
307
+ # Or use specific loader
308
+ gwas_df = load_regenie("ukb_results.regenie")
309
+
310
+ # Load gene annotations
311
+ genes_df = load_gtf("genes.gtf", feature_type="gene")
312
+ exons_df = load_gtf("genes.gtf", feature_type="exon")
313
+
314
+ # Load eQTL data
315
+ eqtl_df = load_gtex_eqtl("GTEx.signif_pairs.txt.gz", gene="BRCA1")
316
+
317
+ # Load fine-mapping results
318
+ fm_df = load_susie("susie_output.tsv")
319
+ ```
320
+
234
321
  ## Data Formats
235
322
 
236
323
  ### GWAS Results DataFrame
@@ -357,6 +444,13 @@ plotter = LocusZoomPlotter(log_level="DEBUG")
357
444
  Optional:
358
445
  - pyspark >= 3.0.0 (for PySpark DataFrame support) - `uv add pylocuszoom[spark]`
359
446
 
447
+ ## Documentation
448
+
449
+ - [User Guide](docs/USER_GUIDE.md) - Comprehensive documentation with API reference
450
+ - [Architecture](docs/ARCHITECTURE.md) - Design decisions and component overview
451
+ - [Example Notebook](examples/getting_started.ipynb) - Interactive tutorial
452
+ - [CHANGELOG](CHANGELOG.md) - Version history
453
+
360
454
  ## License
361
455
 
362
456
  GPL-3.0-or-later