pylocuszoom 0.8.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +27 -7
- pylocuszoom/_plotter_utils.py +66 -0
- pylocuszoom/backends/base.py +56 -0
- pylocuszoom/backends/bokeh_backend.py +141 -29
- pylocuszoom/backends/matplotlib_backend.py +60 -0
- pylocuszoom/backends/plotly_backend.py +297 -88
- pylocuszoom/config.py +365 -0
- pylocuszoom/ensembl.py +6 -11
- pylocuszoom/eqtl.py +3 -7
- pylocuszoom/exceptions.py +33 -0
- pylocuszoom/finemapping.py +2 -7
- pylocuszoom/forest.py +1 -0
- pylocuszoom/gene_track.py +10 -31
- pylocuszoom/labels.py +6 -2
- pylocuszoom/manhattan.py +246 -0
- pylocuszoom/manhattan_plotter.py +760 -0
- pylocuszoom/plotter.py +401 -327
- pylocuszoom/qq.py +123 -0
- pylocuszoom/recombination.py +7 -7
- pylocuszoom/schemas.py +1 -6
- pylocuszoom/stats_plotter.py +319 -0
- pylocuszoom/utils.py +2 -4
- pylocuszoom/validation.py +51 -0
- {pylocuszoom-0.8.0.dist-info → pylocuszoom-1.1.0.dist-info}/METADATA +159 -25
- pylocuszoom-1.1.0.dist-info/RECORD +36 -0
- pylocuszoom-0.8.0.dist-info/RECORD +0 -29
- {pylocuszoom-0.8.0.dist-info → pylocuszoom-1.1.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-0.8.0.dist-info → pylocuszoom-1.1.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,760 @@
|
|
|
1
|
+
"""Manhattan and QQ plot generator.
|
|
2
|
+
|
|
3
|
+
Provides genome-wide visualization of GWAS results including:
|
|
4
|
+
- Manhattan plots (standard and categorical)
|
|
5
|
+
- QQ plots with confidence bands
|
|
6
|
+
- Combined Manhattan+QQ layouts
|
|
7
|
+
- Stacked multi-GWAS comparisons
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Any, List, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
from ._plotter_utils import (
|
|
15
|
+
DEFAULT_GENOMEWIDE_THRESHOLD,
|
|
16
|
+
MANHATTAN_CATEGORICAL_POINT_SIZE,
|
|
17
|
+
MANHATTAN_EDGE_WIDTH,
|
|
18
|
+
MANHATTAN_POINT_SIZE,
|
|
19
|
+
POINT_EDGE_COLOR,
|
|
20
|
+
QQ_CI_ALPHA,
|
|
21
|
+
QQ_CI_COLOR,
|
|
22
|
+
QQ_EDGE_WIDTH,
|
|
23
|
+
QQ_POINT_COLOR,
|
|
24
|
+
QQ_POINT_SIZE,
|
|
25
|
+
SIGNIFICANCE_LINE_COLOR,
|
|
26
|
+
add_significance_line,
|
|
27
|
+
)
|
|
28
|
+
from .backends import BackendType, get_backend
|
|
29
|
+
from .manhattan import prepare_categorical_data, prepare_manhattan_data
|
|
30
|
+
from .qq import prepare_qq_data
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ManhattanPlotter:
|
|
34
|
+
"""Manhattan and QQ plot generator for genome-wide visualizations.
|
|
35
|
+
|
|
36
|
+
Creates publication-quality Manhattan plots, QQ plots, and combined
|
|
37
|
+
layouts for GWAS summary statistics.
|
|
38
|
+
|
|
39
|
+
Supports multiple rendering backends:
|
|
40
|
+
- matplotlib (default): Static publication-quality plots
|
|
41
|
+
- plotly: Interactive HTML with hover tooltips
|
|
42
|
+
- bokeh: Interactive HTML for dashboards
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
species: Species name ('canine', 'feline', 'human', or None).
|
|
46
|
+
Used to determine chromosome order.
|
|
47
|
+
backend: Plotting backend ('matplotlib', 'plotly', or 'bokeh').
|
|
48
|
+
genomewide_threshold: P-value threshold for significance line.
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
>>> plotter = ManhattanPlotter(species="human")
|
|
52
|
+
>>> fig = plotter.plot_manhattan(gwas_df)
|
|
53
|
+
>>> fig.savefig("manhattan.png", dpi=150)
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
species: str = "canine",
|
|
59
|
+
backend: BackendType = "matplotlib",
|
|
60
|
+
genomewide_threshold: float = DEFAULT_GENOMEWIDE_THRESHOLD,
|
|
61
|
+
):
|
|
62
|
+
"""Initialize the Manhattan plotter."""
|
|
63
|
+
self.species = species
|
|
64
|
+
self._backend = get_backend(backend)
|
|
65
|
+
self.genomewide_threshold = genomewide_threshold
|
|
66
|
+
|
|
67
|
+
def plot_manhattan(
|
|
68
|
+
self,
|
|
69
|
+
df: pd.DataFrame,
|
|
70
|
+
chrom_col: str = "chrom",
|
|
71
|
+
pos_col: str = "pos",
|
|
72
|
+
p_col: str = "p",
|
|
73
|
+
custom_chrom_order: Optional[List[str]] = None,
|
|
74
|
+
category_col: Optional[str] = None,
|
|
75
|
+
category_order: Optional[List[str]] = None,
|
|
76
|
+
significance_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
|
|
77
|
+
figsize: Tuple[float, float] = (12, 5),
|
|
78
|
+
title: Optional[str] = None,
|
|
79
|
+
) -> Any:
|
|
80
|
+
"""Create a Manhattan plot.
|
|
81
|
+
|
|
82
|
+
Shows associations across the genome with points colored by chromosome.
|
|
83
|
+
Supports both standard Manhattan plots (genomic positions) and
|
|
84
|
+
categorical Manhattan plots (PheWAS-style).
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
df: DataFrame with GWAS results.
|
|
88
|
+
chrom_col: Column name for chromosome.
|
|
89
|
+
pos_col: Column name for position.
|
|
90
|
+
p_col: Column name for p-value.
|
|
91
|
+
custom_chrom_order: Custom chromosome order (overrides species).
|
|
92
|
+
category_col: If provided, creates a categorical Manhattan plot
|
|
93
|
+
(like PheWAS) using this column instead of genomic positions.
|
|
94
|
+
category_order: Custom category order for categorical plots.
|
|
95
|
+
significance_threshold: P-value threshold for genome-wide significance
|
|
96
|
+
line. Set to None to disable.
|
|
97
|
+
figsize: Figure size as (width, height).
|
|
98
|
+
title: Plot title. Defaults to "Manhattan Plot".
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Figure object (type depends on backend).
|
|
102
|
+
|
|
103
|
+
Example:
|
|
104
|
+
>>> # Standard Manhattan plot
|
|
105
|
+
>>> fig = plotter.plot_manhattan(gwas_df, species="human")
|
|
106
|
+
>>>
|
|
107
|
+
>>> # Categorical Manhattan (PheWAS-style)
|
|
108
|
+
>>> fig = plotter.plot_manhattan(
|
|
109
|
+
... phewas_df,
|
|
110
|
+
... category_col="phenotype_category",
|
|
111
|
+
... p_col="pvalue",
|
|
112
|
+
... )
|
|
113
|
+
"""
|
|
114
|
+
# Categorical Manhattan plot
|
|
115
|
+
if category_col is not None:
|
|
116
|
+
return self._plot_manhattan_categorical(
|
|
117
|
+
df=df,
|
|
118
|
+
category_col=category_col,
|
|
119
|
+
p_col=p_col,
|
|
120
|
+
category_order=category_order,
|
|
121
|
+
significance_threshold=significance_threshold,
|
|
122
|
+
figsize=figsize,
|
|
123
|
+
title=title,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Standard Manhattan plot
|
|
127
|
+
prepared_df = prepare_manhattan_data(
|
|
128
|
+
df=df,
|
|
129
|
+
chrom_col=chrom_col,
|
|
130
|
+
pos_col=pos_col,
|
|
131
|
+
p_col=p_col,
|
|
132
|
+
species=self.species,
|
|
133
|
+
custom_order=custom_chrom_order,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Create figure
|
|
137
|
+
fig, axes = self._backend.create_figure(
|
|
138
|
+
n_panels=1,
|
|
139
|
+
height_ratios=[1.0],
|
|
140
|
+
figsize=figsize,
|
|
141
|
+
)
|
|
142
|
+
ax = axes[0]
|
|
143
|
+
|
|
144
|
+
# Plot points and significance line
|
|
145
|
+
chrom_order = prepared_df.attrs["chrom_order"]
|
|
146
|
+
self._render_manhattan_points(ax, prepared_df, chrom_order)
|
|
147
|
+
add_significance_line(self._backend, ax, significance_threshold)
|
|
148
|
+
|
|
149
|
+
# Set x-axis ticks to chromosome centers
|
|
150
|
+
chrom_centers = prepared_df.attrs["chrom_centers"]
|
|
151
|
+
positions = [
|
|
152
|
+
chrom_centers[chrom] for chrom in chrom_order if chrom in chrom_centers
|
|
153
|
+
]
|
|
154
|
+
labels = [chrom for chrom in chrom_order if chrom in chrom_centers]
|
|
155
|
+
self._backend.set_xticks(ax, positions, labels, fontsize=8)
|
|
156
|
+
|
|
157
|
+
# Set limits
|
|
158
|
+
x_min = prepared_df["_cumulative_pos"].min()
|
|
159
|
+
x_max = prepared_df["_cumulative_pos"].max()
|
|
160
|
+
x_padding = (x_max - x_min) * 0.01
|
|
161
|
+
self._backend.set_xlim(ax, x_min - x_padding, x_max + x_padding)
|
|
162
|
+
|
|
163
|
+
y_max = prepared_df["_neg_log_p"].max()
|
|
164
|
+
self._backend.set_ylim(ax, 0, y_max * 1.1)
|
|
165
|
+
|
|
166
|
+
# Labels and title
|
|
167
|
+
self._backend.set_xlabel(ax, "Chromosome", fontsize=12)
|
|
168
|
+
self._backend.set_ylabel(ax, r"$-\log_{10}(p)$", fontsize=12)
|
|
169
|
+
self._backend.set_title(ax, title or "Manhattan Plot", fontsize=14)
|
|
170
|
+
self._backend.hide_spines(ax, ["top", "right"])
|
|
171
|
+
self._backend.finalize_layout(fig)
|
|
172
|
+
|
|
173
|
+
return fig
|
|
174
|
+
|
|
175
|
+
def _render_manhattan_points(
|
|
176
|
+
self,
|
|
177
|
+
ax: Any,
|
|
178
|
+
prepared_df: pd.DataFrame,
|
|
179
|
+
chrom_order: List[str],
|
|
180
|
+
point_size: int = MANHATTAN_POINT_SIZE,
|
|
181
|
+
) -> None:
|
|
182
|
+
"""Render Manhattan plot scatter points grouped by chromosome.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
ax: Axes object from backend.
|
|
186
|
+
prepared_df: DataFrame with _chrom_str, _cumulative_pos, _neg_log_p, _color.
|
|
187
|
+
chrom_order: List of chromosome names in display order.
|
|
188
|
+
point_size: Size of scatter points.
|
|
189
|
+
"""
|
|
190
|
+
for chrom in chrom_order:
|
|
191
|
+
chrom_data = prepared_df[prepared_df["_chrom_str"] == chrom]
|
|
192
|
+
if len(chrom_data) > 0:
|
|
193
|
+
self._backend.scatter(
|
|
194
|
+
ax,
|
|
195
|
+
chrom_data["_cumulative_pos"],
|
|
196
|
+
chrom_data["_neg_log_p"],
|
|
197
|
+
colors=chrom_data["_color"].iloc[0],
|
|
198
|
+
sizes=point_size,
|
|
199
|
+
marker="o",
|
|
200
|
+
edgecolor=POINT_EDGE_COLOR,
|
|
201
|
+
linewidth=MANHATTAN_EDGE_WIDTH,
|
|
202
|
+
zorder=2,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
def _render_qq_plot(
|
|
206
|
+
self,
|
|
207
|
+
ax: Any,
|
|
208
|
+
qq_df: pd.DataFrame,
|
|
209
|
+
show_confidence_band: bool = True,
|
|
210
|
+
) -> None:
|
|
211
|
+
"""Render QQ plot elements on axes.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
ax: Axes object from backend.
|
|
215
|
+
qq_df: Prepared QQ DataFrame with _expected, _observed, _ci_lower, _ci_upper.
|
|
216
|
+
show_confidence_band: Whether to show 95% confidence band.
|
|
217
|
+
"""
|
|
218
|
+
if show_confidence_band:
|
|
219
|
+
self._backend.fill_between(
|
|
220
|
+
ax,
|
|
221
|
+
x=qq_df["_expected"],
|
|
222
|
+
y1=qq_df["_ci_lower"],
|
|
223
|
+
y2=qq_df["_ci_upper"],
|
|
224
|
+
color=QQ_CI_COLOR,
|
|
225
|
+
alpha=QQ_CI_ALPHA,
|
|
226
|
+
zorder=1,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
max_val = max(qq_df["_expected"].max(), qq_df["_observed"].max())
|
|
230
|
+
|
|
231
|
+
# Diagonal reference line
|
|
232
|
+
self._backend.line(
|
|
233
|
+
ax,
|
|
234
|
+
x=pd.Series([0, max_val]),
|
|
235
|
+
y=pd.Series([0, max_val]),
|
|
236
|
+
color=SIGNIFICANCE_LINE_COLOR,
|
|
237
|
+
linestyle="--",
|
|
238
|
+
linewidth=1,
|
|
239
|
+
zorder=2,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# QQ points
|
|
243
|
+
self._backend.scatter(
|
|
244
|
+
ax,
|
|
245
|
+
qq_df["_expected"],
|
|
246
|
+
qq_df["_observed"],
|
|
247
|
+
colors=QQ_POINT_COLOR,
|
|
248
|
+
sizes=QQ_POINT_SIZE,
|
|
249
|
+
marker="o",
|
|
250
|
+
edgecolor=POINT_EDGE_COLOR,
|
|
251
|
+
linewidth=QQ_EDGE_WIDTH,
|
|
252
|
+
zorder=3,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# Set limits
|
|
256
|
+
self._backend.set_xlim(ax, 0, max_val * 1.05)
|
|
257
|
+
self._backend.set_ylim(ax, 0, max_val * 1.05)
|
|
258
|
+
|
|
259
|
+
def _plot_manhattan_categorical(
|
|
260
|
+
self,
|
|
261
|
+
df: pd.DataFrame,
|
|
262
|
+
category_col: str,
|
|
263
|
+
p_col: str = "p",
|
|
264
|
+
category_order: Optional[List[str]] = None,
|
|
265
|
+
significance_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
|
|
266
|
+
figsize: Tuple[float, float] = (12, 5),
|
|
267
|
+
title: Optional[str] = None,
|
|
268
|
+
) -> Any:
|
|
269
|
+
"""Create a categorical Manhattan plot (PheWAS-style).
|
|
270
|
+
|
|
271
|
+
Internal method called by plot_manhattan when category_col is provided.
|
|
272
|
+
"""
|
|
273
|
+
# Prepare data
|
|
274
|
+
prepared_df = prepare_categorical_data(
|
|
275
|
+
df=df,
|
|
276
|
+
category_col=category_col,
|
|
277
|
+
p_col=p_col,
|
|
278
|
+
category_order=category_order,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# Create figure
|
|
282
|
+
fig, axes = self._backend.create_figure(
|
|
283
|
+
n_panels=1,
|
|
284
|
+
height_ratios=[1.0],
|
|
285
|
+
figsize=figsize,
|
|
286
|
+
)
|
|
287
|
+
ax = axes[0]
|
|
288
|
+
|
|
289
|
+
# Plot points by category
|
|
290
|
+
cat_order = prepared_df.attrs["category_order"]
|
|
291
|
+
for cat in cat_order:
|
|
292
|
+
cat_data = prepared_df[prepared_df[category_col] == cat]
|
|
293
|
+
if len(cat_data) > 0:
|
|
294
|
+
self._backend.scatter(
|
|
295
|
+
ax,
|
|
296
|
+
cat_data["_x_pos"],
|
|
297
|
+
cat_data["_neg_log_p"],
|
|
298
|
+
colors=cat_data["_color"].iloc[0],
|
|
299
|
+
sizes=MANHATTAN_CATEGORICAL_POINT_SIZE,
|
|
300
|
+
marker="o",
|
|
301
|
+
edgecolor=POINT_EDGE_COLOR,
|
|
302
|
+
linewidth=MANHATTAN_EDGE_WIDTH,
|
|
303
|
+
zorder=2,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
add_significance_line(self._backend, ax, significance_threshold)
|
|
307
|
+
|
|
308
|
+
# Set x-axis ticks
|
|
309
|
+
cat_centers = prepared_df.attrs["category_centers"]
|
|
310
|
+
positions = [cat_centers[cat] for cat in cat_order]
|
|
311
|
+
self._backend.set_xticks(
|
|
312
|
+
ax, positions, cat_order, fontsize=10, rotation=45, ha="right"
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Set limits
|
|
316
|
+
self._backend.set_xlim(ax, -0.5, len(cat_order) - 0.5)
|
|
317
|
+
|
|
318
|
+
y_max = prepared_df["_neg_log_p"].max()
|
|
319
|
+
self._backend.set_ylim(ax, 0, y_max * 1.1)
|
|
320
|
+
|
|
321
|
+
# Labels and title
|
|
322
|
+
self._backend.set_xlabel(ax, "Category", fontsize=12)
|
|
323
|
+
self._backend.set_ylabel(ax, r"$-\log_{10}(p)$", fontsize=12)
|
|
324
|
+
self._backend.set_title(ax, title or "Categorical Manhattan Plot", fontsize=14)
|
|
325
|
+
self._backend.hide_spines(ax, ["top", "right"])
|
|
326
|
+
self._backend.finalize_layout(fig)
|
|
327
|
+
|
|
328
|
+
return fig
|
|
329
|
+
|
|
330
|
+
def plot_qq(
|
|
331
|
+
self,
|
|
332
|
+
df: pd.DataFrame,
|
|
333
|
+
p_col: str = "p",
|
|
334
|
+
show_confidence_band: bool = True,
|
|
335
|
+
show_lambda: bool = True,
|
|
336
|
+
figsize: Tuple[float, float] = (6, 6),
|
|
337
|
+
title: Optional[str] = None,
|
|
338
|
+
) -> Any:
|
|
339
|
+
"""Create a QQ (quantile-quantile) plot.
|
|
340
|
+
|
|
341
|
+
Shows observed vs expected -log10(p) distribution with optional
|
|
342
|
+
95% confidence band and genomic inflation factor (lambda).
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
df: DataFrame with p-values.
|
|
346
|
+
p_col: Column name for p-value.
|
|
347
|
+
show_confidence_band: If True, show 95% confidence band.
|
|
348
|
+
show_lambda: If True, show genomic inflation factor in title.
|
|
349
|
+
figsize: Figure size as (width, height).
|
|
350
|
+
title: Plot title. If None and show_lambda is True, shows lambda.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
Figure object (type depends on backend).
|
|
354
|
+
|
|
355
|
+
Example:
|
|
356
|
+
>>> fig = plotter.plot_qq(gwas_df, p_col="pvalue")
|
|
357
|
+
"""
|
|
358
|
+
# Prepare data
|
|
359
|
+
prepared_df = prepare_qq_data(df, p_col=p_col)
|
|
360
|
+
|
|
361
|
+
# Create figure
|
|
362
|
+
fig, axes = self._backend.create_figure(
|
|
363
|
+
n_panels=1,
|
|
364
|
+
height_ratios=[1.0],
|
|
365
|
+
figsize=figsize,
|
|
366
|
+
)
|
|
367
|
+
ax = axes[0]
|
|
368
|
+
|
|
369
|
+
# Render QQ plot elements
|
|
370
|
+
self._render_qq_plot(ax, prepared_df, show_confidence_band)
|
|
371
|
+
|
|
372
|
+
# Labels
|
|
373
|
+
self._backend.set_xlabel(ax, r"Expected $-\log_{10}(p)$", fontsize=12)
|
|
374
|
+
self._backend.set_ylabel(ax, r"Observed $-\log_{10}(p)$", fontsize=12)
|
|
375
|
+
|
|
376
|
+
# Title with lambda
|
|
377
|
+
if title:
|
|
378
|
+
plot_title = title
|
|
379
|
+
elif show_lambda:
|
|
380
|
+
lambda_gc = prepared_df.attrs["lambda_gc"]
|
|
381
|
+
plot_title = f"QQ Plot (λ = {lambda_gc:.3f})"
|
|
382
|
+
else:
|
|
383
|
+
plot_title = "QQ Plot"
|
|
384
|
+
self._backend.set_title(ax, plot_title, fontsize=14)
|
|
385
|
+
|
|
386
|
+
self._backend.hide_spines(ax, ["top", "right"])
|
|
387
|
+
self._backend.finalize_layout(fig)
|
|
388
|
+
|
|
389
|
+
return fig
|
|
390
|
+
|
|
391
|
+
def plot_manhattan_stacked(
|
|
392
|
+
self,
|
|
393
|
+
gwas_dfs: List[pd.DataFrame],
|
|
394
|
+
chrom_col: str = "chrom",
|
|
395
|
+
pos_col: str = "pos",
|
|
396
|
+
p_col: str = "p",
|
|
397
|
+
custom_chrom_order: Optional[List[str]] = None,
|
|
398
|
+
significance_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
|
|
399
|
+
panel_labels: Optional[List[str]] = None,
|
|
400
|
+
figsize: Tuple[float, float] = (12, 8),
|
|
401
|
+
title: Optional[str] = None,
|
|
402
|
+
) -> Any:
|
|
403
|
+
"""Create stacked Manhattan plots for multiple GWAS datasets.
|
|
404
|
+
|
|
405
|
+
Vertically stacks multiple Manhattan plots for easy comparison across
|
|
406
|
+
studies or phenotypes.
|
|
407
|
+
|
|
408
|
+
Args:
|
|
409
|
+
gwas_dfs: List of GWAS results DataFrames.
|
|
410
|
+
chrom_col: Column name for chromosome.
|
|
411
|
+
pos_col: Column name for position.
|
|
412
|
+
p_col: Column name for p-value.
|
|
413
|
+
custom_chrom_order: Custom chromosome order (overrides species).
|
|
414
|
+
significance_threshold: P-value threshold for genome-wide significance
|
|
415
|
+
line. Set to None to disable.
|
|
416
|
+
panel_labels: Labels for each panel (one per DataFrame).
|
|
417
|
+
figsize: Figure size as (width, height).
|
|
418
|
+
title: Overall plot title.
|
|
419
|
+
|
|
420
|
+
Returns:
|
|
421
|
+
Figure object (type depends on backend).
|
|
422
|
+
|
|
423
|
+
Example:
|
|
424
|
+
>>> fig = plotter.plot_manhattan_stacked(
|
|
425
|
+
... [gwas1, gwas2, gwas3],
|
|
426
|
+
... panel_labels=["Discovery", "Replication", "Meta-analysis"],
|
|
427
|
+
... )
|
|
428
|
+
"""
|
|
429
|
+
n_gwas = len(gwas_dfs)
|
|
430
|
+
if n_gwas == 0:
|
|
431
|
+
raise ValueError("At least one GWAS DataFrame required")
|
|
432
|
+
|
|
433
|
+
if panel_labels is not None and len(panel_labels) != n_gwas:
|
|
434
|
+
raise ValueError(
|
|
435
|
+
f"panel_labels length ({len(panel_labels)}) must match "
|
|
436
|
+
f"number of GWAS DataFrames ({n_gwas})"
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
# Prepare all data first to get consistent x-axis
|
|
440
|
+
prepared_dfs = []
|
|
441
|
+
for df in gwas_dfs:
|
|
442
|
+
prepared_df = prepare_manhattan_data(
|
|
443
|
+
df=df,
|
|
444
|
+
chrom_col=chrom_col,
|
|
445
|
+
pos_col=pos_col,
|
|
446
|
+
p_col=p_col,
|
|
447
|
+
species=self.species,
|
|
448
|
+
custom_order=custom_chrom_order,
|
|
449
|
+
)
|
|
450
|
+
prepared_dfs.append(prepared_df)
|
|
451
|
+
|
|
452
|
+
# Use first df for chromosome order and centers
|
|
453
|
+
chrom_order = prepared_dfs[0].attrs["chrom_order"]
|
|
454
|
+
chrom_centers = prepared_dfs[0].attrs["chrom_centers"]
|
|
455
|
+
|
|
456
|
+
# Calculate figure layout
|
|
457
|
+
panel_height = figsize[1] / n_gwas
|
|
458
|
+
height_ratios = [panel_height] * n_gwas
|
|
459
|
+
|
|
460
|
+
# Create figure
|
|
461
|
+
fig, axes = self._backend.create_figure(
|
|
462
|
+
n_panels=n_gwas,
|
|
463
|
+
height_ratios=height_ratios,
|
|
464
|
+
figsize=figsize,
|
|
465
|
+
sharex=True,
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
# Get consistent x limits across all panels
|
|
469
|
+
x_min = min(df["_cumulative_pos"].min() for df in prepared_dfs)
|
|
470
|
+
x_max = max(df["_cumulative_pos"].max() for df in prepared_dfs)
|
|
471
|
+
x_padding = (x_max - x_min) * 0.01
|
|
472
|
+
|
|
473
|
+
# Plot each panel
|
|
474
|
+
for i, prepared_df in enumerate(prepared_dfs):
|
|
475
|
+
ax = axes[i]
|
|
476
|
+
|
|
477
|
+
# Plot points and significance line
|
|
478
|
+
self._render_manhattan_points(ax, prepared_df, chrom_order)
|
|
479
|
+
add_significance_line(self._backend, ax, significance_threshold)
|
|
480
|
+
|
|
481
|
+
# Set limits
|
|
482
|
+
self._backend.set_xlim(ax, x_min - x_padding, x_max + x_padding)
|
|
483
|
+
y_max = prepared_df["_neg_log_p"].max()
|
|
484
|
+
self._backend.set_ylim(ax, 0, y_max * 1.1)
|
|
485
|
+
|
|
486
|
+
# Labels
|
|
487
|
+
self._backend.set_ylabel(ax, r"$-\log_{10}(p)$", fontsize=10)
|
|
488
|
+
self._backend.hide_spines(ax, ["top", "right"])
|
|
489
|
+
|
|
490
|
+
# Panel label
|
|
491
|
+
if panel_labels and i < len(panel_labels):
|
|
492
|
+
self._backend.add_panel_label(ax, panel_labels[i])
|
|
493
|
+
|
|
494
|
+
# Set x-axis ticks for all panels (needed for interactive backends)
|
|
495
|
+
positions = [
|
|
496
|
+
chrom_centers[chrom] for chrom in chrom_order if chrom in chrom_centers
|
|
497
|
+
]
|
|
498
|
+
labels = [chrom for chrom in chrom_order if chrom in chrom_centers]
|
|
499
|
+
self._backend.set_xticks(ax, positions, labels, fontsize=8)
|
|
500
|
+
|
|
501
|
+
# Only show x-axis label on bottom panel
|
|
502
|
+
if i == n_gwas - 1:
|
|
503
|
+
self._backend.set_xlabel(ax, "Chromosome", fontsize=12)
|
|
504
|
+
|
|
505
|
+
# Overall title
|
|
506
|
+
if title:
|
|
507
|
+
self._backend.set_title(axes[0], title, fontsize=14)
|
|
508
|
+
|
|
509
|
+
self._backend.finalize_layout(fig, hspace=0.1)
|
|
510
|
+
|
|
511
|
+
return fig
|
|
512
|
+
|
|
513
|
+
def plot_manhattan_qq(
|
|
514
|
+
self,
|
|
515
|
+
df: pd.DataFrame,
|
|
516
|
+
chrom_col: str = "chrom",
|
|
517
|
+
pos_col: str = "pos",
|
|
518
|
+
p_col: str = "p",
|
|
519
|
+
custom_chrom_order: Optional[List[str]] = None,
|
|
520
|
+
significance_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
|
|
521
|
+
show_confidence_band: bool = True,
|
|
522
|
+
show_lambda: bool = True,
|
|
523
|
+
figsize: Tuple[float, float] = (14, 5),
|
|
524
|
+
title: Optional[str] = None,
|
|
525
|
+
) -> Any:
|
|
526
|
+
"""Create side-by-side Manhattan and QQ plots.
|
|
527
|
+
|
|
528
|
+
Displays a Manhattan plot on the left and a QQ plot on the right,
|
|
529
|
+
commonly used for GWAS publication figures.
|
|
530
|
+
|
|
531
|
+
Args:
|
|
532
|
+
df: GWAS results DataFrame.
|
|
533
|
+
chrom_col: Column name for chromosome.
|
|
534
|
+
pos_col: Column name for position.
|
|
535
|
+
p_col: Column name for p-value.
|
|
536
|
+
custom_chrom_order: Custom chromosome order (overrides species).
|
|
537
|
+
significance_threshold: P-value threshold for genome-wide significance.
|
|
538
|
+
show_confidence_band: If True, show 95% confidence band on QQ plot.
|
|
539
|
+
show_lambda: If True, show genomic inflation factor on QQ plot.
|
|
540
|
+
figsize: Figure size as (width, height).
|
|
541
|
+
title: Overall plot title.
|
|
542
|
+
|
|
543
|
+
Returns:
|
|
544
|
+
Figure object (type depends on backend).
|
|
545
|
+
|
|
546
|
+
Example:
|
|
547
|
+
>>> fig = plotter.plot_manhattan_qq(gwas_df)
|
|
548
|
+
>>> fig.savefig("gwas_summary.png", dpi=150)
|
|
549
|
+
"""
|
|
550
|
+
# Prepare Manhattan data
|
|
551
|
+
manhattan_df = prepare_manhattan_data(
|
|
552
|
+
df=df,
|
|
553
|
+
chrom_col=chrom_col,
|
|
554
|
+
pos_col=pos_col,
|
|
555
|
+
p_col=p_col,
|
|
556
|
+
species=self.species,
|
|
557
|
+
custom_order=custom_chrom_order,
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
# Prepare QQ data
|
|
561
|
+
qq_df = prepare_qq_data(df, p_col=p_col)
|
|
562
|
+
|
|
563
|
+
# Create figure with side-by-side layout (Manhattan wider than QQ)
|
|
564
|
+
fig, axes = self._backend.create_figure_grid(
|
|
565
|
+
n_rows=1,
|
|
566
|
+
n_cols=2,
|
|
567
|
+
width_ratios=[2.5, 1],
|
|
568
|
+
figsize=figsize,
|
|
569
|
+
)
|
|
570
|
+
manhattan_ax = axes[0]
|
|
571
|
+
qq_ax = axes[1]
|
|
572
|
+
|
|
573
|
+
# --- Manhattan plot ---
|
|
574
|
+
chrom_order = manhattan_df.attrs["chrom_order"]
|
|
575
|
+
chrom_centers = manhattan_df.attrs["chrom_centers"]
|
|
576
|
+
|
|
577
|
+
self._render_manhattan_points(manhattan_ax, manhattan_df, chrom_order)
|
|
578
|
+
add_significance_line(self._backend, manhattan_ax, significance_threshold)
|
|
579
|
+
|
|
580
|
+
x_min = manhattan_df["_cumulative_pos"].min()
|
|
581
|
+
x_max = manhattan_df["_cumulative_pos"].max()
|
|
582
|
+
x_padding = (x_max - x_min) * 0.01
|
|
583
|
+
self._backend.set_xlim(manhattan_ax, x_min - x_padding, x_max + x_padding)
|
|
584
|
+
|
|
585
|
+
y_max = manhattan_df["_neg_log_p"].max()
|
|
586
|
+
self._backend.set_ylim(manhattan_ax, 0, y_max * 1.1)
|
|
587
|
+
|
|
588
|
+
positions = [
|
|
589
|
+
chrom_centers[chrom] for chrom in chrom_order if chrom in chrom_centers
|
|
590
|
+
]
|
|
591
|
+
labels = [chrom for chrom in chrom_order if chrom in chrom_centers]
|
|
592
|
+
self._backend.set_xticks(manhattan_ax, positions, labels, fontsize=8)
|
|
593
|
+
|
|
594
|
+
self._backend.set_xlabel(manhattan_ax, "Chromosome", fontsize=12)
|
|
595
|
+
self._backend.set_ylabel(manhattan_ax, r"$-\log_{10}(p)$", fontsize=12)
|
|
596
|
+
self._backend.set_title(manhattan_ax, "Manhattan Plot", fontsize=12)
|
|
597
|
+
self._backend.hide_spines(manhattan_ax, ["top", "right"])
|
|
598
|
+
|
|
599
|
+
# --- QQ plot ---
|
|
600
|
+
self._render_qq_plot(qq_ax, qq_df, show_confidence_band)
|
|
601
|
+
|
|
602
|
+
self._backend.set_xlabel(qq_ax, r"Expected $-\log_{10}(p)$", fontsize=12)
|
|
603
|
+
self._backend.set_ylabel(qq_ax, r"Observed $-\log_{10}(p)$", fontsize=12)
|
|
604
|
+
|
|
605
|
+
if show_lambda:
|
|
606
|
+
lambda_gc = qq_df.attrs["lambda_gc"]
|
|
607
|
+
qq_title = f"QQ Plot (λ = {lambda_gc:.3f})"
|
|
608
|
+
else:
|
|
609
|
+
qq_title = "QQ Plot"
|
|
610
|
+
self._backend.set_title(qq_ax, qq_title, fontsize=12)
|
|
611
|
+
self._backend.hide_spines(qq_ax, ["top", "right"])
|
|
612
|
+
|
|
613
|
+
# Overall title
|
|
614
|
+
if title:
|
|
615
|
+
self._backend.set_suptitle(fig, title, fontsize=14)
|
|
616
|
+
self._backend.finalize_layout(fig, top=0.90)
|
|
617
|
+
else:
|
|
618
|
+
self._backend.finalize_layout(fig)
|
|
619
|
+
|
|
620
|
+
return fig
|
|
621
|
+
|
|
622
|
+
def plot_manhattan_qq_stacked(
|
|
623
|
+
self,
|
|
624
|
+
gwas_dfs: List[pd.DataFrame],
|
|
625
|
+
chrom_col: str = "chrom",
|
|
626
|
+
pos_col: str = "pos",
|
|
627
|
+
p_col: str = "p",
|
|
628
|
+
custom_chrom_order: Optional[List[str]] = None,
|
|
629
|
+
significance_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
|
|
630
|
+
show_confidence_band: bool = True,
|
|
631
|
+
show_lambda: bool = True,
|
|
632
|
+
panel_labels: Optional[List[str]] = None,
|
|
633
|
+
figsize: Tuple[float, float] = (14, 8),
|
|
634
|
+
title: Optional[str] = None,
|
|
635
|
+
) -> Any:
|
|
636
|
+
"""Create stacked side-by-side Manhattan and QQ plots for multiple GWAS.
|
|
637
|
+
|
|
638
|
+
Displays Manhattan+QQ pairs for each GWAS dataset, stacked vertically
|
|
639
|
+
for easy comparison across studies.
|
|
640
|
+
|
|
641
|
+
Args:
|
|
642
|
+
gwas_dfs: List of GWAS results DataFrames.
|
|
643
|
+
chrom_col: Column name for chromosome.
|
|
644
|
+
pos_col: Column name for position.
|
|
645
|
+
p_col: Column name for p-value.
|
|
646
|
+
custom_chrom_order: Custom chromosome order (overrides species).
|
|
647
|
+
significance_threshold: P-value threshold for genome-wide significance.
|
|
648
|
+
show_confidence_band: If True, show 95% confidence band on QQ plots.
|
|
649
|
+
show_lambda: If True, show genomic inflation factor on QQ plots.
|
|
650
|
+
panel_labels: List of labels for each GWAS (one per dataset).
|
|
651
|
+
figsize: Figure size as (width, height).
|
|
652
|
+
title: Overall plot title.
|
|
653
|
+
|
|
654
|
+
Returns:
|
|
655
|
+
Figure object (type depends on backend).
|
|
656
|
+
|
|
657
|
+
Example:
|
|
658
|
+
>>> fig = plotter.plot_manhattan_qq_stacked(
|
|
659
|
+
... [discovery_df, replication_df],
|
|
660
|
+
... panel_labels=["Discovery", "Replication"],
|
|
661
|
+
... )
|
|
662
|
+
"""
|
|
663
|
+
n_gwas = len(gwas_dfs)
|
|
664
|
+
if n_gwas == 0:
|
|
665
|
+
raise ValueError("At least one GWAS DataFrame required")
|
|
666
|
+
|
|
667
|
+
# Prepare all data
|
|
668
|
+
manhattan_dfs = []
|
|
669
|
+
qq_dfs = []
|
|
670
|
+
for df in gwas_dfs:
|
|
671
|
+
manhattan_dfs.append(
|
|
672
|
+
prepare_manhattan_data(
|
|
673
|
+
df=df,
|
|
674
|
+
chrom_col=chrom_col,
|
|
675
|
+
pos_col=pos_col,
|
|
676
|
+
p_col=p_col,
|
|
677
|
+
species=self.species,
|
|
678
|
+
custom_order=custom_chrom_order,
|
|
679
|
+
)
|
|
680
|
+
)
|
|
681
|
+
qq_dfs.append(prepare_qq_data(df, p_col=p_col))
|
|
682
|
+
|
|
683
|
+
# Use chromosome order from first dataset
|
|
684
|
+
chrom_order = manhattan_dfs[0].attrs["chrom_order"]
|
|
685
|
+
chrom_centers = manhattan_dfs[0].attrs["chrom_centers"]
|
|
686
|
+
|
|
687
|
+
# Create grid: n_gwas rows, 2 columns (Manhattan | QQ)
|
|
688
|
+
fig, axes = self._backend.create_figure_grid(
|
|
689
|
+
n_rows=n_gwas,
|
|
690
|
+
n_cols=2,
|
|
691
|
+
width_ratios=[2.5, 1],
|
|
692
|
+
figsize=figsize,
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
# Get consistent x limits for Manhattan plots
|
|
696
|
+
x_min = min(df["_cumulative_pos"].min() for df in manhattan_dfs)
|
|
697
|
+
x_max = max(df["_cumulative_pos"].max() for df in manhattan_dfs)
|
|
698
|
+
x_padding = (x_max - x_min) * 0.01
|
|
699
|
+
|
|
700
|
+
# Plot each row
|
|
701
|
+
for i in range(n_gwas):
|
|
702
|
+
manhattan_ax = axes[i * 2] # Even indices: Manhattan
|
|
703
|
+
qq_ax = axes[i * 2 + 1] # Odd indices: QQ
|
|
704
|
+
manhattan_df = manhattan_dfs[i]
|
|
705
|
+
qq_df = qq_dfs[i]
|
|
706
|
+
|
|
707
|
+
# --- Manhattan plot ---
|
|
708
|
+
self._render_manhattan_points(manhattan_ax, manhattan_df, chrom_order)
|
|
709
|
+
add_significance_line(self._backend, manhattan_ax, significance_threshold)
|
|
710
|
+
|
|
711
|
+
self._backend.set_xlim(manhattan_ax, x_min - x_padding, x_max + x_padding)
|
|
712
|
+
y_max = manhattan_df["_neg_log_p"].max()
|
|
713
|
+
self._backend.set_ylim(manhattan_ax, 0, y_max * 1.1)
|
|
714
|
+
|
|
715
|
+
# Panel label
|
|
716
|
+
if panel_labels and i < len(panel_labels):
|
|
717
|
+
self._backend.add_panel_label(manhattan_ax, panel_labels[i])
|
|
718
|
+
|
|
719
|
+
# Y-axis label
|
|
720
|
+
self._backend.set_ylabel(manhattan_ax, r"$-\log_{10}(p)$", fontsize=10)
|
|
721
|
+
self._backend.hide_spines(manhattan_ax, ["top", "right"])
|
|
722
|
+
|
|
723
|
+
# X-axis: set chromosome ticks for all panels
|
|
724
|
+
positions = [
|
|
725
|
+
chrom_centers[chrom] for chrom in chrom_order if chrom in chrom_centers
|
|
726
|
+
]
|
|
727
|
+
chrom_labels = [chrom for chrom in chrom_order if chrom in chrom_centers]
|
|
728
|
+
self._backend.set_xticks(manhattan_ax, positions, chrom_labels, fontsize=8)
|
|
729
|
+
|
|
730
|
+
# Only show "Chromosome" label on bottom row
|
|
731
|
+
if i == n_gwas - 1:
|
|
732
|
+
self._backend.set_xlabel(manhattan_ax, "Chromosome", fontsize=10)
|
|
733
|
+
|
|
734
|
+
# --- QQ plot ---
|
|
735
|
+
self._render_qq_plot(qq_ax, qq_df, show_confidence_band)
|
|
736
|
+
|
|
737
|
+
# Labels for QQ
|
|
738
|
+
if i == n_gwas - 1:
|
|
739
|
+
self._backend.set_xlabel(
|
|
740
|
+
qq_ax, r"Expected $-\log_{10}(p)$", fontsize=10
|
|
741
|
+
)
|
|
742
|
+
self._backend.set_ylabel(qq_ax, r"Observed $-\log_{10}(p)$", fontsize=10)
|
|
743
|
+
|
|
744
|
+
# QQ title with lambda
|
|
745
|
+
if show_lambda:
|
|
746
|
+
lambda_gc = qq_df.attrs["lambda_gc"]
|
|
747
|
+
qq_title = f"λ = {lambda_gc:.3f}"
|
|
748
|
+
else:
|
|
749
|
+
qq_title = "QQ"
|
|
750
|
+
self._backend.set_title(qq_ax, qq_title, fontsize=10)
|
|
751
|
+
self._backend.hide_spines(qq_ax, ["top", "right"])
|
|
752
|
+
|
|
753
|
+
# Overall title
|
|
754
|
+
if title:
|
|
755
|
+
self._backend.set_suptitle(fig, title, fontsize=14)
|
|
756
|
+
self._backend.finalize_layout(fig, top=0.90, hspace=0.15)
|
|
757
|
+
else:
|
|
758
|
+
self._backend.finalize_layout(fig, hspace=0.15)
|
|
759
|
+
|
|
760
|
+
return fig
|