pylocuszoom 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pylocuszoom/qq.py ADDED
@@ -0,0 +1,123 @@
1
+ """QQ plot data preparation and statistics."""
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from scipy import stats
6
+
7
+
8
+ def calculate_lambda_gc(p_values: np.ndarray) -> float:
9
+ """Calculate genomic inflation factor (lambda GC).
10
+
11
+ Lambda is the ratio of the median observed chi-squared statistic
12
+ to the expected median under the null hypothesis.
13
+
14
+ Args:
15
+ p_values: Array of p-values.
16
+
17
+ Returns:
18
+ Genomic inflation factor (lambda). Returns NaN if no valid p-values.
19
+ """
20
+ # Remove NaN and zero/negative values
21
+ p_clean = p_values[~np.isnan(p_values) & (p_values > 0)]
22
+ if len(p_clean) == 0:
23
+ return np.nan
24
+
25
+ # Convert to chi-squared statistics (1 df)
26
+ chi2 = stats.chi2.ppf(1 - p_clean, df=1)
27
+
28
+ # Expected median for chi-squared with 1 df
29
+ expected_median = stats.chi2.ppf(0.5, df=1)
30
+
31
+ # Lambda = observed median / expected median
32
+ return np.median(chi2) / expected_median
33
+
34
+
35
+ def calculate_confidence_band(
36
+ n_points: int, confidence: float = 0.95
37
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
38
+ """Calculate confidence band for QQ plot.
39
+
40
+ Uses order statistics to compute expected distribution of p-values
41
+ under the null hypothesis.
42
+
43
+ Args:
44
+ n_points: Number of p-values.
45
+ confidence: Confidence level (default 0.95 for 95% CI).
46
+
47
+ Returns:
48
+ Tuple of (expected, lower_bound, upper_bound) arrays in -log10 scale.
49
+ """
50
+ # Expected quantiles
51
+ expected = -np.log10((np.arange(1, n_points + 1)) / (n_points + 1))
52
+
53
+ # Confidence interval using beta distribution
54
+ alpha = 1 - confidence
55
+ ranks = np.arange(1, n_points + 1)
56
+ n_minus_rank = n_points - ranks + 1
57
+
58
+ lower_p = stats.beta.ppf(alpha / 2, ranks, n_minus_rank)
59
+ upper_p = stats.beta.ppf(1 - alpha / 2, ranks, n_minus_rank)
60
+
61
+ # Convert to -log10 scale (swap because -log10 reverses order)
62
+ lower_bound = -np.log10(upper_p)
63
+ upper_bound = -np.log10(lower_p)
64
+
65
+ return expected, lower_bound, upper_bound
66
+
67
+
68
+ def prepare_qq_data(
69
+ df: pd.DataFrame,
70
+ p_col: str = "p",
71
+ ) -> pd.DataFrame:
72
+ """Prepare DataFrame for QQ plot rendering.
73
+
74
+ Args:
75
+ df: DataFrame with p-values.
76
+ p_col: Column name for p-value.
77
+
78
+ Returns:
79
+ DataFrame with columns for QQ plotting:
80
+ - _expected: Expected -log10(p) under null
81
+ - _observed: Observed -log10(p)
82
+ - _ci_lower: Lower confidence bound
83
+ - _ci_upper: Upper confidence bound
84
+
85
+ Attributes stored in DataFrame.attrs:
86
+ - lambda_gc: Genomic inflation factor
87
+ - n_variants: Number of valid p-values
88
+ """
89
+ if p_col not in df.columns:
90
+ raise ValueError(f"Column '{p_col}' not found in DataFrame")
91
+
92
+ # Get p-values and filter invalid
93
+ p_values = df[p_col].values
94
+ valid_mask = ~np.isnan(p_values) & (p_values > 0) & (p_values <= 1)
95
+ p_valid = p_values[valid_mask]
96
+
97
+ if len(p_valid) == 0:
98
+ raise ValueError("No valid p-values found (must be > 0 and <= 1)")
99
+
100
+ # Sort p-values (smallest first -> largest -log10 last)
101
+ p_sorted = np.sort(p_valid)
102
+
103
+ # Calculate observed -log10(p)
104
+ observed = -np.log10(p_sorted)
105
+
106
+ # Calculate expected and confidence bands
107
+ expected, ci_lower, ci_upper = calculate_confidence_band(len(p_sorted))
108
+
109
+ # Create result DataFrame
110
+ result = pd.DataFrame(
111
+ {
112
+ "_expected": expected,
113
+ "_observed": observed,
114
+ "_ci_lower": ci_lower,
115
+ "_ci_upper": ci_upper,
116
+ }
117
+ )
118
+
119
+ # Store statistics in attrs
120
+ result.attrs["lambda_gc"] = calculate_lambda_gc(p_valid)
121
+ result.attrs["n_variants"] = len(p_valid)
122
+
123
+ return result
@@ -432,8 +432,8 @@ def add_recombination_overlay(
432
432
  region_recomb["pos"],
433
433
  region_recomb["rate"],
434
434
  color=RECOMB_COLOR,
435
- linewidth=1.5,
436
- alpha=0.7,
435
+ linewidth=2.5,
436
+ alpha=0.8,
437
437
  zorder=0, # Behind scatter points
438
438
  )
439
439
 
@@ -447,14 +447,14 @@ def add_recombination_overlay(
447
447
  zorder=0,
448
448
  )
449
449
 
450
- # Format secondary axis
451
- recomb_ax.set_ylabel("Recombination rate (cM/Mb)", color=RECOMB_COLOR, fontsize=9)
452
- recomb_ax.tick_params(axis="y", labelcolor=RECOMB_COLOR, labelsize=8)
450
+ # Format secondary axis - use black for label text (more readable)
451
+ recomb_ax.set_ylabel("Recombination rate (cM/Mb)", color="black", fontsize=9)
452
+ recomb_ax.tick_params(axis="y", labelcolor="black", labelsize=8)
453
453
  recomb_ax.set_ylim(bottom=0)
454
454
 
455
- # Don't let recomb rate overwhelm the plot
455
+ # Scale to fit data with headroom
456
456
  max_rate = region_recomb["rate"].max()
457
- recomb_ax.set_ylim(0, max(max_rate * 1.2, 20))
457
+ recomb_ax.set_ylim(0, max(max_rate * 1.3, 10))
458
458
 
459
459
  # Remove top spine for cleaner look
460
460
  recomb_ax.spines["top"].set_visible(False)
@@ -0,0 +1,319 @@
1
+ """Statistical visualization plotter for PheWAS and forest plots.
2
+
3
+ Provides variant-centric visualizations:
4
+ - PheWAS plots showing associations across phenotypes
5
+ - Forest plots showing effect sizes with confidence intervals
6
+ """
7
+
8
+ from typing import Any, Optional, Tuple
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+ from ._plotter_utils import DEFAULT_GENOMEWIDE_THRESHOLD, transform_pvalues
14
+ from .backends import BackendType, get_backend
15
+ from .colors import get_phewas_category_palette
16
+ from .forest import validate_forest_df
17
+ from .phewas import validate_phewas_df
18
+
19
+
20
+ class StatsPlotter:
21
+ """Statistical visualization plotter for PheWAS and forest plots.
22
+
23
+ Creates variant-centric visualizations for phenome-wide associations
24
+ and meta-analysis forest plots.
25
+
26
+ Args:
27
+ backend: Plotting backend ('matplotlib', 'plotly', or 'bokeh').
28
+ genomewide_threshold: P-value threshold for significance line.
29
+
30
+ Example:
31
+ >>> plotter = StatsPlotter()
32
+ >>> fig = plotter.plot_phewas(phewas_df, variant_id="rs12345")
33
+ >>> fig.savefig("phewas.png", dpi=150)
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ backend: BackendType = "matplotlib",
39
+ genomewide_threshold: float = DEFAULT_GENOMEWIDE_THRESHOLD,
40
+ ):
41
+ """Initialize the stats plotter."""
42
+ self._backend = get_backend(backend)
43
+ self.genomewide_threshold = genomewide_threshold
44
+
45
+ def plot_phewas(
46
+ self,
47
+ phewas_df: pd.DataFrame,
48
+ variant_id: str,
49
+ phenotype_col: str = "phenotype",
50
+ p_col: str = "p_value",
51
+ category_col: str = "category",
52
+ effect_col: Optional[str] = None,
53
+ significance_threshold: float = DEFAULT_GENOMEWIDE_THRESHOLD,
54
+ figsize: Tuple[float, float] = (10, 8),
55
+ ) -> Any:
56
+ """Create a PheWAS (Phenome-Wide Association Study) plot.
57
+
58
+ Shows associations of a single variant across multiple phenotypes,
59
+ with phenotypes grouped by category and colored accordingly.
60
+
61
+ Args:
62
+ phewas_df: DataFrame with phenotype associations.
63
+ variant_id: Variant identifier (e.g., "rs12345") for plot title.
64
+ phenotype_col: Column name for phenotype names.
65
+ p_col: Column name for p-values.
66
+ category_col: Column name for phenotype categories.
67
+ effect_col: Optional column name for effect direction (beta/OR).
68
+ significance_threshold: P-value threshold for significance line.
69
+ figsize: Figure size as (width, height).
70
+
71
+ Returns:
72
+ Figure object (type depends on backend).
73
+
74
+ Example:
75
+ >>> fig = plotter.plot_phewas(
76
+ ... phewas_df,
77
+ ... variant_id="rs12345",
78
+ ... category_col="category",
79
+ ... )
80
+ """
81
+ validate_phewas_df(phewas_df, phenotype_col, p_col, category_col)
82
+
83
+ df = phewas_df.copy()
84
+ df = transform_pvalues(df, p_col)
85
+
86
+ # Sort by category then by p-value for consistent ordering
87
+ if category_col in df.columns:
88
+ df = df.sort_values([category_col, p_col])
89
+ categories = df[category_col].unique().tolist()
90
+ palette = get_phewas_category_palette(categories)
91
+ else:
92
+ df = df.sort_values(p_col)
93
+ categories = []
94
+ palette = {}
95
+
96
+ # Create figure
97
+ fig, axes = self._backend.create_figure(
98
+ n_panels=1,
99
+ height_ratios=[1.0],
100
+ figsize=figsize,
101
+ )
102
+ ax = axes[0]
103
+
104
+ # Assign y-positions (one per phenotype)
105
+ df["y_pos"] = range(len(df))
106
+
107
+ # Plot points by category
108
+ if categories:
109
+ for cat in categories:
110
+ # Handle NaN category: NaN == NaN is False in pandas
111
+ if pd.isna(cat):
112
+ cat_data = df[df[category_col].isna()]
113
+ else:
114
+ cat_data = df[df[category_col] == cat]
115
+ # Use upward triangles for positive effects, circles otherwise
116
+ if effect_col and effect_col in cat_data.columns:
117
+ # Vectorized: split by effect sign, 2 scatter calls per category
118
+ pos_data = cat_data[cat_data[effect_col] >= 0]
119
+ neg_data = cat_data[cat_data[effect_col] < 0]
120
+
121
+ if not pos_data.empty:
122
+ self._backend.scatter(
123
+ ax,
124
+ pos_data["neglog10p"],
125
+ pos_data["y_pos"],
126
+ colors=palette[cat],
127
+ sizes=60,
128
+ marker="^",
129
+ edgecolor="black",
130
+ linewidth=0.5,
131
+ zorder=2,
132
+ )
133
+ if not neg_data.empty:
134
+ self._backend.scatter(
135
+ ax,
136
+ neg_data["neglog10p"],
137
+ neg_data["y_pos"],
138
+ colors=palette[cat],
139
+ sizes=60,
140
+ marker="v",
141
+ edgecolor="black",
142
+ linewidth=0.5,
143
+ zorder=2,
144
+ )
145
+ else:
146
+ self._backend.scatter(
147
+ ax,
148
+ cat_data["neglog10p"],
149
+ cat_data["y_pos"],
150
+ colors=palette[cat],
151
+ sizes=60,
152
+ marker="o",
153
+ edgecolor="black",
154
+ linewidth=0.5,
155
+ zorder=2,
156
+ )
157
+ else:
158
+ self._backend.scatter(
159
+ ax,
160
+ df["neglog10p"],
161
+ df["y_pos"],
162
+ colors="#4169E1",
163
+ sizes=60,
164
+ edgecolor="black",
165
+ linewidth=0.5,
166
+ zorder=2,
167
+ )
168
+
169
+ # Add significance threshold line
170
+ sig_line = -np.log10(significance_threshold)
171
+ self._backend.axvline(
172
+ ax, x=sig_line, color="red", linestyle="--", linewidth=1, alpha=0.7
173
+ )
174
+
175
+ # Set axis labels and limits
176
+ self._backend.set_xlabel(ax, r"$-\log_{10}$ P")
177
+ self._backend.set_ylabel(ax, "Phenotype")
178
+ self._backend.set_ylim(ax, -0.5, len(df) - 0.5)
179
+
180
+ # Set y-tick labels to phenotype names
181
+ self._backend.set_yticks(
182
+ ax,
183
+ positions=df["y_pos"].tolist(),
184
+ labels=df[phenotype_col].tolist(),
185
+ fontsize=8,
186
+ )
187
+
188
+ self._backend.set_title(ax, f"PheWAS: {variant_id}")
189
+ self._backend.hide_spines(ax, ["top", "right"])
190
+ self._backend.finalize_layout(fig)
191
+
192
+ return fig
193
+
194
+ def plot_forest(
195
+ self,
196
+ forest_df: pd.DataFrame,
197
+ variant_id: str,
198
+ study_col: str = "study",
199
+ effect_col: str = "effect",
200
+ ci_lower_col: str = "ci_lower",
201
+ ci_upper_col: str = "ci_upper",
202
+ weight_col: Optional[str] = None,
203
+ null_value: float = 0.0,
204
+ effect_label: str = "Effect Size",
205
+ figsize: Tuple[float, float] = (8, 6),
206
+ ) -> Any:
207
+ """Create a forest plot showing effect sizes with confidence intervals.
208
+
209
+ Args:
210
+ forest_df: DataFrame with effect sizes and confidence intervals.
211
+ variant_id: Variant identifier for plot title.
212
+ study_col: Column name for study/phenotype names.
213
+ effect_col: Column name for effect sizes.
214
+ ci_lower_col: Column name for lower confidence interval.
215
+ ci_upper_col: Column name for upper confidence interval.
216
+ weight_col: Optional column for study weights (affects marker size).
217
+ null_value: Reference value for null effect (0 for beta, 1 for OR).
218
+ effect_label: X-axis label.
219
+ figsize: Figure size as (width, height).
220
+
221
+ Returns:
222
+ Figure object (type depends on backend).
223
+
224
+ Example:
225
+ >>> fig = plotter.plot_forest(
226
+ ... forest_df,
227
+ ... variant_id="rs12345",
228
+ ... effect_label="Odds Ratio",
229
+ ... null_value=1.0,
230
+ ... )
231
+ """
232
+ validate_forest_df(forest_df, study_col, effect_col, ci_lower_col, ci_upper_col)
233
+
234
+ df = forest_df.copy()
235
+
236
+ # Create figure
237
+ fig, axes = self._backend.create_figure(
238
+ n_panels=1,
239
+ height_ratios=[1.0],
240
+ figsize=figsize,
241
+ )
242
+ ax = axes[0]
243
+
244
+ # Assign y-positions (reverse so first study is at top)
245
+ df["y_pos"] = range(len(df) - 1, -1, -1)
246
+
247
+ # Calculate marker sizes from weights
248
+ if weight_col and weight_col in df.columns:
249
+ # Scale weights to marker sizes (min 40, max 200)
250
+ weights = df[weight_col]
251
+ min_size, max_size = 40, 200
252
+ weight_range = weights.max() - weights.min()
253
+ if weight_range > 0:
254
+ sizes = min_size + (weights - weights.min()) / weight_range * (
255
+ max_size - min_size
256
+ )
257
+ else:
258
+ sizes = (min_size + max_size) / 2
259
+ else:
260
+ sizes = 80
261
+
262
+ # Calculate error bar extents
263
+ xerr_lower = df[effect_col] - df[ci_lower_col]
264
+ xerr_upper = df[ci_upper_col] - df[effect_col]
265
+
266
+ # Plot error bars (confidence intervals)
267
+ self._backend.errorbar_h(
268
+ ax,
269
+ x=df[effect_col],
270
+ y=df["y_pos"],
271
+ xerr_lower=xerr_lower,
272
+ xerr_upper=xerr_upper,
273
+ color="black",
274
+ linewidth=1.5,
275
+ capsize=3,
276
+ zorder=2,
277
+ )
278
+
279
+ # Plot effect size markers
280
+ self._backend.scatter(
281
+ ax,
282
+ df[effect_col],
283
+ df["y_pos"],
284
+ colors="#4169E1",
285
+ sizes=sizes,
286
+ marker="s", # square markers typical for forest plots
287
+ edgecolor="black",
288
+ linewidth=0.5,
289
+ zorder=3,
290
+ )
291
+
292
+ # Add null effect line
293
+ self._backend.axvline(
294
+ ax, x=null_value, color="grey", linestyle="--", linewidth=1, alpha=0.7
295
+ )
296
+
297
+ # Set axis labels and limits
298
+ self._backend.set_xlabel(ax, effect_label)
299
+ self._backend.set_ylim(ax, -0.5, len(df) - 0.5)
300
+
301
+ # Ensure x-axis includes the null value with some padding
302
+ x_min = min(df[ci_lower_col].min(), null_value)
303
+ x_max = max(df[ci_upper_col].max(), null_value)
304
+ x_padding = (x_max - x_min) * 0.1
305
+ self._backend.set_xlim(ax, x_min - x_padding, x_max + x_padding)
306
+
307
+ # Set y-tick labels to study names
308
+ self._backend.set_yticks(
309
+ ax,
310
+ positions=df["y_pos"].tolist(),
311
+ labels=df[study_col].tolist(),
312
+ fontsize=10,
313
+ )
314
+
315
+ self._backend.set_title(ax, f"Forest Plot: {variant_id}")
316
+ self._backend.hide_spines(ax, ["top", "right"])
317
+ self._backend.finalize_layout(fig)
318
+
319
+ return fig
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pylocuszoom
3
- Version: 1.0.0
3
+ Version: 1.1.0
4
4
  Summary: Publication-ready regional association plots with LD coloring, gene tracks, and recombination overlays
5
5
  Project-URL: Homepage, https://github.com/michael-denyer/pylocuszoom
6
6
  Project-URL: Documentation, https://github.com/michael-denyer/pylocuszoom#readme
@@ -21,6 +21,7 @@ Classifier: Topic :: Scientific/Engineering :: Visualization
21
21
  Requires-Python: >=3.10
22
22
  Requires-Dist: adjusttext>=0.8
23
23
  Requires-Dist: bokeh>=3.8.2
24
+ Requires-Dist: colorcet>=3.0.0
24
25
  Requires-Dist: kaleido>=0.2.0
25
26
  Requires-Dist: loguru>=0.7.0
26
27
  Requires-Dist: matplotlib>=3.5.0
@@ -66,22 +67,24 @@ Inspired by [LocusZoom](http://locuszoom.org/) and [locuszoomr](https://github.c
66
67
  - **Multi-species support**: Built-in reference data for *Canis lupus familiaris* (CanFam3.1/CanFam4) and *Felis catus* (FelCat9), or optionally provide your own for any species
67
68
  - **LD coloring**: SNPs colored by linkage disequilibrium (R²) with lead variant
68
69
  - **Gene tracks**: Annotated gene/exon positions below the association plot
69
- - **Recombination rate**: Optional overlay across region (*Canis lupus familiaris* built-in, not shown in example image)
70
+ - **Recombination rate**: Overlay across region (*Canis lupus familiaris* built-in, or user-provided)
70
71
  - **SNP labels (matplotlib)**: Automatic labeling of top SNPs by p-value (RS IDs)
71
72
  - **Hover tooltips (Plotly and Bokeh)**: Detailed SNP data on hover
72
73
 
73
- ![Example regional association plot with LD coloring and gene track](examples/regional_plot.png)
74
- *Regional association plot with LD coloring, gene/exon track, and top SNP labels (recombination overlay disabled in example).*
74
+ ![Example regional association plot with LD coloring, gene track, and recombination overlay](examples/regional_plot_with_recomb.png)
75
+ *Regional association plot with LD coloring, gene/exon track, recombination rate overlay (blue line), and top SNP labels.*
75
76
 
76
77
  2. **Stacked plots**: Compare multiple GWAS/phenotypes vertically
77
- 3. **eQTL plot**: Expression QTL data aligned with association plots and gene tracks
78
- 4. **Fine-mapping plots**: Visualize SuSiE credible sets with posterior inclusion probabilities
79
- 5. **PheWAS plots**: Phenome-wide association study visualization across multiple phenotypes
80
- 6. **Forest plots**: Meta-analysis effect size visualization with confidence intervals
81
- 7. **Multiple backends**: matplotlib (publication-ready), plotly (interactive), bokeh (dashboard integration)
82
- 8. **Pandas and PySpark support**: Works with both Pandas and PySpark DataFrames for large-scale genomics data
83
- 9. **Convenience data file loaders**: Load and validate common GWAS, eQTL and fine-mapping file formats
84
- 10. **Automatic gene annotations**: Fetch gene/exon data from Ensembl REST API with caching (human, mouse, rat, canine, feline, and any Ensembl species)
78
+ 3. **Manhattan plots**: Genome-wide association visualization with chromosome coloring
79
+ 4. **QQ plots**: Quantile-quantile plots with confidence bands and genomic inflation factor
80
+ 5. **eQTL plot**: Expression QTL data aligned with association plots and gene tracks
81
+ 6. **Fine-mapping plots**: Visualize SuSiE credible sets with posterior inclusion probabilities
82
+ 7. **PheWAS plots**: Phenome-wide association study visualization across multiple phenotypes
83
+ 8. **Forest plots**: Meta-analysis effect size visualization with confidence intervals
84
+ 9. **Multiple backends**: matplotlib (publication-ready), plotly (interactive), bokeh (dashboard integration)
85
+ 10. **Pandas and PySpark support**: Works with both Pandas and PySpark DataFrames for large-scale genomics data
86
+ 11. **Convenience data file loaders**: Load and validate common GWAS, eQTL and fine-mapping file formats
87
+ 12. **Automatic gene annotations**: Fetch gene/exon data from Ensembl REST API with caching (human, mouse, rat, canine, feline, and any Ensembl species)
85
88
 
86
89
  ## Installation
87
90
 
@@ -107,15 +110,16 @@ conda install -c bioconda pylocuszoom
107
110
  from pylocuszoom import LocusZoomPlotter
108
111
 
109
112
  # Initialize plotter (loads reference data for canine)
110
- plotter = LocusZoomPlotter(species="canine")
113
+ plotter = LocusZoomPlotter(species="canine", auto_genes=True)
111
114
 
112
115
  # Plot with parameters passed directly
113
116
  fig = plotter.plot(
114
- gwas_df, # DataFrame with ps, p_wald, rs columns
117
+ gwas_df, # DataFrame with pos, p_value, rs columns
115
118
  chrom=1,
116
119
  start=1000000,
117
120
  end=2000000,
118
121
  lead_pos=1500000, # Highlight lead SNP
122
+ show_recombination=True, # Overlay recombination rate
119
123
  )
120
124
  fig.savefig("regional_plot.png", dpi=150)
121
125
  ```
@@ -355,6 +359,112 @@ fig = plotter.plot_forest(
355
359
  ![Example forest plot](examples/forest_plot.png)
356
360
  *Forest plot with effect sizes, confidence intervals, and weight-proportional markers.*
357
361
 
362
+ ## Manhattan Plots
363
+
364
+ Create genome-wide Manhattan plots showing associations across all chromosomes:
365
+
366
+ ```python
367
+ from pylocuszoom import LocusZoomPlotter
368
+
369
+ plotter = LocusZoomPlotter(species="human")
370
+
371
+ fig = plotter.plot_manhattan(
372
+ gwas_df,
373
+ chrom_col="chrom",
374
+ pos_col="pos",
375
+ p_col="p",
376
+ significance_threshold=5e-8, # Genome-wide significance line
377
+ figsize=(12, 5),
378
+ )
379
+ fig.savefig("manhattan.png", dpi=150)
380
+ ```
381
+
382
+ ![Example Manhattan plot](examples/manhattan_plot.png)
383
+ *Manhattan plot showing genome-wide associations with chromosome coloring and significance threshold.*
384
+
385
+ Categorical Manhattan plots (PheWAS-style) are also supported:
386
+
387
+ ```python
388
+ fig = plotter.plot_manhattan(
389
+ phewas_df,
390
+ category_col="phenotype_category",
391
+ p_col="pvalue",
392
+ )
393
+ ```
394
+
395
+ ## QQ Plots
396
+
397
+ Create quantile-quantile plots to assess p-value distribution:
398
+
399
+ ```python
400
+ from pylocuszoom import LocusZoomPlotter
401
+
402
+ plotter = LocusZoomPlotter()
403
+
404
+ fig = plotter.plot_qq(
405
+ gwas_df,
406
+ p_col="p",
407
+ show_confidence_band=True, # 95% confidence band
408
+ show_lambda=True, # Genomic inflation factor in title
409
+ figsize=(6, 6),
410
+ )
411
+ fig.savefig("qq_plot.png", dpi=150)
412
+ ```
413
+
414
+ ![Example QQ plot](examples/qq_plot.png)
415
+ *QQ plot with 95% confidence band and genomic inflation factor (λ).*
416
+
417
+ ## Stacked Manhattan Plots
418
+
419
+ Compare multiple GWAS results in vertically stacked Manhattan plots:
420
+
421
+ ```python
422
+ from pylocuszoom import LocusZoomPlotter
423
+
424
+ plotter = LocusZoomPlotter()
425
+
426
+ fig = plotter.plot_manhattan_stacked(
427
+ [gwas_study1, gwas_study2, gwas_study3],
428
+ chrom_col="chrom",
429
+ pos_col="pos",
430
+ p_col="p",
431
+ panel_labels=["Study 1", "Study 2", "Study 3"],
432
+ significance_threshold=5e-8,
433
+ figsize=(12, 8),
434
+ title="Multi-study GWAS Comparison",
435
+ )
436
+ fig.savefig("manhattan_stacked.png", dpi=150)
437
+ ```
438
+
439
+ ![Example stacked Manhattan plot](examples/manhattan_stacked.png)
440
+ *Stacked Manhattan plots comparing three GWAS studies with shared chromosome axis.*
441
+
442
+ ## Manhattan and QQ Side-by-Side
443
+
444
+ Create combined Manhattan and QQ plots in a single figure:
445
+
446
+ ```python
447
+ from pylocuszoom import LocusZoomPlotter
448
+
449
+ plotter = LocusZoomPlotter()
450
+
451
+ fig = plotter.plot_manhattan_qq(
452
+ gwas_df,
453
+ chrom_col="chrom",
454
+ pos_col="pos",
455
+ p_col="p",
456
+ significance_threshold=5e-8,
457
+ show_confidence_band=True,
458
+ show_lambda=True,
459
+ figsize=(14, 5),
460
+ title="GWAS Results",
461
+ )
462
+ fig.savefig("manhattan_qq.png", dpi=150)
463
+ ```
464
+
465
+ ![Example Manhattan and QQ side-by-side](examples/manhattan_qq_sidebyside.png)
466
+ *Combined Manhattan and QQ plot showing genome-wide associations and p-value distribution.*
467
+
358
468
  ## PySpark Support
359
469
 
360
470
  For large-scale genomics data, convert PySpark DataFrames with `to_pandas()` before plotting: