pylocuszoom 1.2.0__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +16 -2
- pylocuszoom/backends/base.py +94 -2
- pylocuszoom/backends/bokeh_backend.py +160 -6
- pylocuszoom/backends/matplotlib_backend.py +142 -2
- pylocuszoom/backends/plotly_backend.py +101 -1
- pylocuszoom/coloc.py +82 -0
- pylocuszoom/coloc_plotter.py +390 -0
- pylocuszoom/colors.py +26 -0
- pylocuszoom/config.py +61 -0
- pylocuszoom/labels.py +41 -16
- pylocuszoom/ld.py +239 -0
- pylocuszoom/ld_heatmap_plotter.py +252 -0
- pylocuszoom/miami_plotter.py +490 -0
- pylocuszoom/plotter.py +472 -6
- {pylocuszoom-1.2.0.dist-info → pylocuszoom-1.3.1.dist-info}/METADATA +166 -21
- {pylocuszoom-1.2.0.dist-info → pylocuszoom-1.3.1.dist-info}/RECORD +18 -14
- pylocuszoom-1.3.1.dist-info/licenses/LICENSE.md +595 -0
- pylocuszoom-1.2.0.dist-info/licenses/LICENSE.md +0 -17
- {pylocuszoom-1.2.0.dist-info → pylocuszoom-1.3.1.dist-info}/WHEEL +0 -0
pylocuszoom/plotter.py
CHANGED
|
@@ -23,7 +23,9 @@ from .colors import (
|
|
|
23
23
|
EQTL_NEGATIVE_BINS,
|
|
24
24
|
EQTL_POSITIVE_BINS,
|
|
25
25
|
LD_BINS,
|
|
26
|
+
LD_HEATMAP_COLORS,
|
|
26
27
|
LEAD_SNP_COLOR,
|
|
28
|
+
LEAD_SNP_HIGHLIGHT_COLOR,
|
|
27
29
|
get_credible_set_color,
|
|
28
30
|
get_eqtl_color,
|
|
29
31
|
get_ld_bin,
|
|
@@ -223,6 +225,10 @@ class LocusZoomPlotter:
|
|
|
223
225
|
genes_df: Optional[pd.DataFrame] = None,
|
|
224
226
|
exons_df: Optional[pd.DataFrame] = None,
|
|
225
227
|
recomb_df: Optional[pd.DataFrame] = None,
|
|
228
|
+
ld_heatmap_df: Optional[pd.DataFrame] = None,
|
|
229
|
+
ld_heatmap_snp_ids: Optional[List[str]] = None,
|
|
230
|
+
ld_heatmap_height: float = 0.25,
|
|
231
|
+
ld_heatmap_metric: str = "r2",
|
|
226
232
|
) -> Any:
|
|
227
233
|
"""Create a regional association plot.
|
|
228
234
|
|
|
@@ -246,12 +252,21 @@ class LocusZoomPlotter:
|
|
|
246
252
|
exons_df: Exon annotations with chr, start, end, gene_name.
|
|
247
253
|
recomb_df: Pre-loaded recombination rate data.
|
|
248
254
|
If None and show_recombination=True, loads from species default.
|
|
255
|
+
ld_heatmap_df: Pairwise LD matrix (square DataFrame) from
|
|
256
|
+
calculate_pairwise_ld. If provided with ld_heatmap_snp_ids,
|
|
257
|
+
renders heatmap panel below association plot.
|
|
258
|
+
ld_heatmap_snp_ids: List of SNP IDs in matrix order. Required if
|
|
259
|
+
ld_heatmap_df is provided.
|
|
260
|
+
ld_heatmap_height: Height ratio of heatmap panel relative to
|
|
261
|
+
association panel. Default 0.25.
|
|
262
|
+
ld_heatmap_metric: LD metric label for colorbar ("r2" or "dprime").
|
|
249
263
|
|
|
250
264
|
Returns:
|
|
251
265
|
Figure object (type depends on backend).
|
|
252
266
|
|
|
253
267
|
Raises:
|
|
254
268
|
ValidationError: If parameters or DataFrame columns are invalid.
|
|
269
|
+
ValueError: If ld_heatmap_df provided without ld_heatmap_snp_ids.
|
|
255
270
|
|
|
256
271
|
Example:
|
|
257
272
|
>>> fig = plotter.plot(
|
|
@@ -280,6 +295,12 @@ class LocusZoomPlotter:
|
|
|
280
295
|
# Validate inputs
|
|
281
296
|
validate_gwas_df(gwas_df, pos_col=pos_col, p_col=p_col)
|
|
282
297
|
|
|
298
|
+
# Validate LD heatmap parameters
|
|
299
|
+
if ld_heatmap_df is not None and ld_heatmap_snp_ids is None:
|
|
300
|
+
raise ValueError(
|
|
301
|
+
"ld_heatmap_snp_ids is required when ld_heatmap_df is provided"
|
|
302
|
+
)
|
|
303
|
+
|
|
283
304
|
# Auto-fetch genes if enabled and not provided
|
|
284
305
|
if genes_df is None and self._auto_genes:
|
|
285
306
|
logger.debug(
|
|
@@ -352,8 +373,33 @@ class LocusZoomPlotter:
|
|
|
352
373
|
if show_recombination and recomb_df is None:
|
|
353
374
|
recomb_df = self._get_recomb_for_region(chrom, start, end)
|
|
354
375
|
|
|
376
|
+
# Transform heatmap to genomic coordinates if provided
|
|
377
|
+
heatmap_data = None
|
|
378
|
+
if ld_heatmap_df is not None and ld_heatmap_snp_ids is not None:
|
|
379
|
+
heatmap_data = self._transform_heatmap_to_genomic_coords(
|
|
380
|
+
ld_matrix=ld_heatmap_df,
|
|
381
|
+
snp_ids=ld_heatmap_snp_ids,
|
|
382
|
+
gwas_df=df,
|
|
383
|
+
start=start,
|
|
384
|
+
end=end,
|
|
385
|
+
rs_col=rs_col,
|
|
386
|
+
pos_col=pos_col,
|
|
387
|
+
)
|
|
388
|
+
if heatmap_data is None:
|
|
389
|
+
logger.warning(
|
|
390
|
+
"No SNPs from LD heatmap overlap with region - heatmap not rendered"
|
|
391
|
+
)
|
|
392
|
+
|
|
355
393
|
# Create figure layout
|
|
356
|
-
fig, ax, gene_ax = self.
|
|
394
|
+
fig, ax, gene_ax, heatmap_ax = self._create_figure_with_heatmap(
|
|
395
|
+
genes_df=genes_df,
|
|
396
|
+
chrom=chrom,
|
|
397
|
+
start=start,
|
|
398
|
+
end=end,
|
|
399
|
+
figsize=figsize,
|
|
400
|
+
heatmap_data=heatmap_data,
|
|
401
|
+
heatmap_height=ld_heatmap_height,
|
|
402
|
+
)
|
|
357
403
|
|
|
358
404
|
# Plot association data
|
|
359
405
|
self._plot_association(ax, df, pos_col, ld_col, lead_pos, rs_col, p_col)
|
|
@@ -370,9 +416,11 @@ class LocusZoomPlotter:
|
|
|
370
416
|
)
|
|
371
417
|
|
|
372
418
|
# Add SNP labels (capability check - interactive backends use hover tooltips)
|
|
419
|
+
# Create labels without adjusting - we'll adjust after axis limits are set
|
|
420
|
+
snp_label_texts: list = []
|
|
373
421
|
if snp_labels and rs_col in df.columns and label_top_n > 0 and not df.empty:
|
|
374
422
|
if self._backend.supports_snp_labels:
|
|
375
|
-
self._backend.add_snp_labels(
|
|
423
|
+
snp_label_texts = self._backend.add_snp_labels(
|
|
376
424
|
ax,
|
|
377
425
|
df,
|
|
378
426
|
pos_col=pos_col,
|
|
@@ -381,6 +429,7 @@ class LocusZoomPlotter:
|
|
|
381
429
|
label_top_n=label_top_n,
|
|
382
430
|
genes_df=genes_df,
|
|
383
431
|
chrom=chrom,
|
|
432
|
+
adjust=False, # Defer adjustment until after axis limits set
|
|
384
433
|
)
|
|
385
434
|
|
|
386
435
|
# Add recombination overlay (all backends with secondary axis support)
|
|
@@ -407,11 +456,38 @@ class LocusZoomPlotter:
|
|
|
407
456
|
plot_gene_track_generic(
|
|
408
457
|
gene_ax, self._backend, genes_df, chrom, start, end, exons_df
|
|
409
458
|
)
|
|
410
|
-
self._backend.set_xlabel(gene_ax, f"Chromosome {chrom} (Mb)")
|
|
411
459
|
self._backend.hide_spines(gene_ax, ["top", "right", "left"])
|
|
412
460
|
# Format both axes for interactive backends (they don't share x-axis)
|
|
413
461
|
self._backend.format_xaxis_mb(gene_ax)
|
|
414
|
-
|
|
462
|
+
# Only set x-label on gene track if no heatmap below
|
|
463
|
+
if heatmap_ax is None:
|
|
464
|
+
self._backend.set_xlabel(gene_ax, f"Chromosome {chrom} (Mb)")
|
|
465
|
+
|
|
466
|
+
# Render LD heatmap panel if data available
|
|
467
|
+
if heatmap_ax is not None and heatmap_data is not None:
|
|
468
|
+
filtered_matrix, x_positions, filtered_snp_ids = heatmap_data
|
|
469
|
+
# Find lead SNP ID if lead_pos is set
|
|
470
|
+
lead_snp_id = None
|
|
471
|
+
if lead_pos is not None and rs_col in df.columns:
|
|
472
|
+
lead_row = df[df[pos_col] == lead_pos]
|
|
473
|
+
if not lead_row.empty:
|
|
474
|
+
lead_snp_id = lead_row[rs_col].iloc[0]
|
|
475
|
+
self._render_heatmap_panel(
|
|
476
|
+
ax=heatmap_ax,
|
|
477
|
+
fig=fig,
|
|
478
|
+
ld_matrix=filtered_matrix,
|
|
479
|
+
x_positions=x_positions,
|
|
480
|
+
snp_ids=filtered_snp_ids,
|
|
481
|
+
metric=ld_heatmap_metric,
|
|
482
|
+
lead_snp_id=lead_snp_id,
|
|
483
|
+
start=start,
|
|
484
|
+
end=end,
|
|
485
|
+
)
|
|
486
|
+
# Heatmap is at bottom - set x-label on it
|
|
487
|
+
self._backend.set_xlabel(heatmap_ax, f"Chromosome {chrom} (Mb)")
|
|
488
|
+
self._backend.format_xaxis_mb(heatmap_ax)
|
|
489
|
+
elif gene_ax is None and heatmap_ax is None:
|
|
490
|
+
# No gene track and no heatmap - set x-label on association plot
|
|
415
491
|
self._backend.set_xlabel(ax, f"Chromosome {chrom} (Mb)")
|
|
416
492
|
|
|
417
493
|
# Format x-axis with Mb labels (association axis always needs formatting)
|
|
@@ -420,6 +496,11 @@ class LocusZoomPlotter:
|
|
|
420
496
|
# Adjust layout
|
|
421
497
|
self._backend.finalize_layout(fig, hspace=0.1)
|
|
422
498
|
|
|
499
|
+
# Adjust SNP labels AFTER all axis limits and layout are finalized
|
|
500
|
+
# adjustText needs final plot bounds to position labels correctly
|
|
501
|
+
if snp_label_texts:
|
|
502
|
+
self._backend.adjust_snp_labels(ax, snp_label_texts)
|
|
503
|
+
|
|
423
504
|
return fig
|
|
424
505
|
|
|
425
506
|
def _create_figure(
|
|
@@ -471,6 +552,304 @@ class LocusZoomPlotter:
|
|
|
471
552
|
)
|
|
472
553
|
return fig, axes[0], None
|
|
473
554
|
|
|
555
|
+
def _create_figure_with_heatmap(
|
|
556
|
+
self,
|
|
557
|
+
genes_df: Optional[pd.DataFrame],
|
|
558
|
+
chrom: int,
|
|
559
|
+
start: int,
|
|
560
|
+
end: int,
|
|
561
|
+
figsize: Tuple[float, float],
|
|
562
|
+
heatmap_data: Optional[Tuple[pd.DataFrame, List[int], List[str]]],
|
|
563
|
+
heatmap_height: float = 0.25,
|
|
564
|
+
) -> Tuple[Any, Any, Optional[Any], Optional[Any]]:
|
|
565
|
+
"""Create figure with optional gene track and heatmap panel.
|
|
566
|
+
|
|
567
|
+
Args:
|
|
568
|
+
genes_df: Gene annotations DataFrame.
|
|
569
|
+
chrom: Chromosome number.
|
|
570
|
+
start: Region start position.
|
|
571
|
+
end: Region end position.
|
|
572
|
+
figsize: Base figure size as (width, height).
|
|
573
|
+
heatmap_data: Tuple of (filtered_matrix, x_positions, snp_ids) from
|
|
574
|
+
_transform_heatmap_to_genomic_coords, or None.
|
|
575
|
+
heatmap_height: Height ratio of heatmap relative to association panel.
|
|
576
|
+
|
|
577
|
+
Returns:
|
|
578
|
+
Tuple of (fig, assoc_ax, gene_ax, heatmap_ax). gene_ax and heatmap_ax
|
|
579
|
+
are None if not included.
|
|
580
|
+
"""
|
|
581
|
+
# Calculate base heights
|
|
582
|
+
assoc_height = figsize[1] * 0.6
|
|
583
|
+
|
|
584
|
+
# Calculate gene track height if needed
|
|
585
|
+
gene_track_height = 0.0
|
|
586
|
+
if genes_df is not None:
|
|
587
|
+
chrom_str = normalize_chrom(chrom)
|
|
588
|
+
region_genes = genes_df[
|
|
589
|
+
(
|
|
590
|
+
genes_df["chr"].astype(str).str.replace("chr", "", regex=False)
|
|
591
|
+
== chrom_str
|
|
592
|
+
)
|
|
593
|
+
& (genes_df["end"] >= start)
|
|
594
|
+
& (genes_df["start"] <= end)
|
|
595
|
+
]
|
|
596
|
+
if not region_genes.empty:
|
|
597
|
+
temp_positions = assign_gene_positions(
|
|
598
|
+
region_genes.sort_values("start"), start, end
|
|
599
|
+
)
|
|
600
|
+
n_gene_rows = max(temp_positions) + 1 if temp_positions else 1
|
|
601
|
+
else:
|
|
602
|
+
n_gene_rows = 1
|
|
603
|
+
|
|
604
|
+
base_gene_height = 1.0
|
|
605
|
+
per_row_height = 0.5
|
|
606
|
+
gene_track_height = base_gene_height + (n_gene_rows - 1) * per_row_height
|
|
607
|
+
|
|
608
|
+
# Calculate heatmap height if needed
|
|
609
|
+
actual_heatmap_height = 0.0
|
|
610
|
+
if heatmap_data is not None:
|
|
611
|
+
actual_heatmap_height = assoc_height * heatmap_height
|
|
612
|
+
|
|
613
|
+
# Build panel list (top to bottom): assoc, gene track, heatmap
|
|
614
|
+
n_panels = 1 # Association panel always present
|
|
615
|
+
height_ratios = [assoc_height]
|
|
616
|
+
|
|
617
|
+
if genes_df is not None:
|
|
618
|
+
n_panels += 1
|
|
619
|
+
height_ratios.append(gene_track_height)
|
|
620
|
+
|
|
621
|
+
if heatmap_data is not None:
|
|
622
|
+
n_panels += 1
|
|
623
|
+
height_ratios.append(actual_heatmap_height)
|
|
624
|
+
|
|
625
|
+
total_height = sum(height_ratios)
|
|
626
|
+
|
|
627
|
+
# Create figure
|
|
628
|
+
fig, axes = self._backend.create_figure(
|
|
629
|
+
n_panels=n_panels,
|
|
630
|
+
height_ratios=height_ratios,
|
|
631
|
+
figsize=(figsize[0], total_height),
|
|
632
|
+
sharex=True,
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
# Assign axes
|
|
636
|
+
assoc_ax = axes[0]
|
|
637
|
+
gene_ax = None
|
|
638
|
+
heatmap_ax = None
|
|
639
|
+
|
|
640
|
+
panel_idx = 1
|
|
641
|
+
if genes_df is not None:
|
|
642
|
+
gene_ax = axes[panel_idx]
|
|
643
|
+
panel_idx += 1
|
|
644
|
+
if heatmap_data is not None:
|
|
645
|
+
heatmap_ax = axes[panel_idx]
|
|
646
|
+
|
|
647
|
+
return fig, assoc_ax, gene_ax, heatmap_ax
|
|
648
|
+
|
|
649
|
+
def _transform_heatmap_to_genomic_coords(
|
|
650
|
+
self,
|
|
651
|
+
ld_matrix: pd.DataFrame,
|
|
652
|
+
snp_ids: List[str],
|
|
653
|
+
gwas_df: pd.DataFrame,
|
|
654
|
+
start: int,
|
|
655
|
+
end: int,
|
|
656
|
+
rs_col: str,
|
|
657
|
+
pos_col: str,
|
|
658
|
+
) -> Optional[Tuple[pd.DataFrame, List[int], List[str]]]:
|
|
659
|
+
"""Transform heatmap matrix to genomic coordinates.
|
|
660
|
+
|
|
661
|
+
Args:
|
|
662
|
+
ld_matrix: Square LD matrix from calculate_pairwise_ld.
|
|
663
|
+
snp_ids: SNP IDs in matrix order.
|
|
664
|
+
gwas_df: GWAS DataFrame with position column.
|
|
665
|
+
start: Region start position.
|
|
666
|
+
end: Region end position.
|
|
667
|
+
rs_col: SNP ID column name.
|
|
668
|
+
pos_col: Position column name.
|
|
669
|
+
|
|
670
|
+
Returns:
|
|
671
|
+
Tuple of (filtered_matrix, x_positions, filtered_snp_ids), or None
|
|
672
|
+
if no SNPs overlap with the region.
|
|
673
|
+
"""
|
|
674
|
+
# Build SNP-to-position mapping from GWAS data
|
|
675
|
+
if rs_col not in gwas_df.columns:
|
|
676
|
+
logger.warning(
|
|
677
|
+
f"Cannot map heatmap to genomic coords: column '{rs_col}' not in GWAS data"
|
|
678
|
+
)
|
|
679
|
+
return None
|
|
680
|
+
|
|
681
|
+
snp_to_pos = dict(zip(gwas_df[rs_col], gwas_df[pos_col]))
|
|
682
|
+
|
|
683
|
+
# Filter to SNPs present in GWAS and within region
|
|
684
|
+
filtered_indices = []
|
|
685
|
+
filtered_snp_ids = []
|
|
686
|
+
x_positions = []
|
|
687
|
+
|
|
688
|
+
for i, snp_id in enumerate(snp_ids):
|
|
689
|
+
if snp_id in snp_to_pos:
|
|
690
|
+
pos = snp_to_pos[snp_id]
|
|
691
|
+
if start <= pos <= end:
|
|
692
|
+
filtered_indices.append(i)
|
|
693
|
+
filtered_snp_ids.append(snp_id)
|
|
694
|
+
x_positions.append(int(pos))
|
|
695
|
+
|
|
696
|
+
if not filtered_indices:
|
|
697
|
+
return None
|
|
698
|
+
|
|
699
|
+
# Filter matrix to matching rows/columns
|
|
700
|
+
filtered_matrix = ld_matrix.iloc[filtered_indices, filtered_indices].copy()
|
|
701
|
+
|
|
702
|
+
return filtered_matrix, x_positions, filtered_snp_ids
|
|
703
|
+
|
|
704
|
+
def _render_heatmap_panel(
|
|
705
|
+
self,
|
|
706
|
+
ax: Any,
|
|
707
|
+
fig: Any,
|
|
708
|
+
ld_matrix: pd.DataFrame,
|
|
709
|
+
x_positions: List[int],
|
|
710
|
+
snp_ids: List[str],
|
|
711
|
+
metric: str,
|
|
712
|
+
lead_snp_id: Optional[str],
|
|
713
|
+
start: int,
|
|
714
|
+
end: int,
|
|
715
|
+
) -> None:
|
|
716
|
+
"""Render LD heatmap panel with genomic x-coordinates.
|
|
717
|
+
|
|
718
|
+
Args:
|
|
719
|
+
ax: Axes object for heatmap panel.
|
|
720
|
+
fig: Figure object.
|
|
721
|
+
ld_matrix: Filtered LD matrix.
|
|
722
|
+
x_positions: Genomic positions for each SNP (x-axis).
|
|
723
|
+
snp_ids: SNP IDs in filtered order.
|
|
724
|
+
metric: LD metric label ("r2" or "dprime").
|
|
725
|
+
lead_snp_id: Lead SNP ID to highlight (if present in snp_ids).
|
|
726
|
+
start: Region start for x-axis limits.
|
|
727
|
+
end: Region end for x-axis limits.
|
|
728
|
+
"""
|
|
729
|
+
data = ld_matrix.values
|
|
730
|
+
n_snps = len(snp_ids)
|
|
731
|
+
|
|
732
|
+
# Skip rendering if only one SNP (can't show pairwise LD)
|
|
733
|
+
if n_snps < 2:
|
|
734
|
+
logger.debug("Skipping heatmap: fewer than 2 SNPs after filtering")
|
|
735
|
+
return
|
|
736
|
+
|
|
737
|
+
# Render triangular heatmap at genomic positions
|
|
738
|
+
mappable = self._backend.add_heatmap(
|
|
739
|
+
ax,
|
|
740
|
+
data=data,
|
|
741
|
+
x_coords=x_positions,
|
|
742
|
+
y_coords=list(range(n_snps)), # Keep y as indices (0, 1, 2, ...)
|
|
743
|
+
cmap_colors=LD_HEATMAP_COLORS,
|
|
744
|
+
vmin=0.0,
|
|
745
|
+
vmax=1.0,
|
|
746
|
+
mask_upper=True,
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
# Add colorbar
|
|
750
|
+
label = "R²" if metric == "r2" else "D'"
|
|
751
|
+
self._backend.add_colorbar(ax, mappable, label=label)
|
|
752
|
+
|
|
753
|
+
# Highlight lead SNP if present
|
|
754
|
+
if lead_snp_id is not None and lead_snp_id in snp_ids:
|
|
755
|
+
lead_idx = snp_ids.index(lead_snp_id)
|
|
756
|
+
self._highlight_heatmap_snp(ax, fig, lead_idx, n_snps)
|
|
757
|
+
|
|
758
|
+
# Set x-axis limits to match regional plot
|
|
759
|
+
self._backend.set_xlim(ax, start, end)
|
|
760
|
+
|
|
761
|
+
# Hide y-axis (SNP indices are not meaningful for viewer)
|
|
762
|
+
self._backend.set_yticks(ax, [], [])
|
|
763
|
+
self._backend.hide_spines(ax, ["top", "right", "left"])
|
|
764
|
+
|
|
765
|
+
def _highlight_heatmap_snp(
|
|
766
|
+
self, ax: Any, fig: Any, snp_idx: int, n_snps: int
|
|
767
|
+
) -> None:
|
|
768
|
+
"""Highlight a SNP's row/column in the heatmap.
|
|
769
|
+
|
|
770
|
+
Args:
|
|
771
|
+
ax: Axes object.
|
|
772
|
+
fig: Figure object.
|
|
773
|
+
snp_idx: Index of SNP to highlight.
|
|
774
|
+
n_snps: Total number of SNPs in matrix.
|
|
775
|
+
"""
|
|
776
|
+
if self._backend_name == "matplotlib":
|
|
777
|
+
from matplotlib.patches import Rectangle
|
|
778
|
+
|
|
779
|
+
# Highlight row (cells in row snp_idx, columns 0 to snp_idx)
|
|
780
|
+
for j in range(snp_idx + 1):
|
|
781
|
+
rect = Rectangle(
|
|
782
|
+
(j - 0.5, snp_idx - 0.5),
|
|
783
|
+
1.0,
|
|
784
|
+
1.0,
|
|
785
|
+
fill=False,
|
|
786
|
+
edgecolor=LEAD_SNP_HIGHLIGHT_COLOR,
|
|
787
|
+
linewidth=2,
|
|
788
|
+
zorder=10,
|
|
789
|
+
)
|
|
790
|
+
ax.add_patch(rect)
|
|
791
|
+
|
|
792
|
+
# Highlight column (cells in column snp_idx, rows snp_idx to n_snps-1)
|
|
793
|
+
for i in range(snp_idx + 1, n_snps):
|
|
794
|
+
rect = Rectangle(
|
|
795
|
+
(snp_idx - 0.5, i - 0.5),
|
|
796
|
+
1.0,
|
|
797
|
+
1.0,
|
|
798
|
+
fill=False,
|
|
799
|
+
edgecolor=LEAD_SNP_HIGHLIGHT_COLOR,
|
|
800
|
+
linewidth=2,
|
|
801
|
+
zorder=10,
|
|
802
|
+
)
|
|
803
|
+
ax.add_patch(rect)
|
|
804
|
+
|
|
805
|
+
elif self._backend_name == "plotly":
|
|
806
|
+
# For plotly, add shapes for row and column highlights
|
|
807
|
+
for j in range(snp_idx + 1):
|
|
808
|
+
fig.add_shape(
|
|
809
|
+
type="rect",
|
|
810
|
+
x0=j - 0.5,
|
|
811
|
+
x1=j + 0.5,
|
|
812
|
+
y0=snp_idx - 0.5,
|
|
813
|
+
y1=snp_idx + 0.5,
|
|
814
|
+
line=dict(color=LEAD_SNP_HIGHLIGHT_COLOR, width=2),
|
|
815
|
+
fillcolor="rgba(0,0,0,0)",
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
for i in range(snp_idx + 1, n_snps):
|
|
819
|
+
fig.add_shape(
|
|
820
|
+
type="rect",
|
|
821
|
+
x0=snp_idx - 0.5,
|
|
822
|
+
x1=snp_idx + 0.5,
|
|
823
|
+
y0=i - 0.5,
|
|
824
|
+
y1=i + 0.5,
|
|
825
|
+
line=dict(color=LEAD_SNP_HIGHLIGHT_COLOR, width=2),
|
|
826
|
+
fillcolor="rgba(0,0,0,0)",
|
|
827
|
+
)
|
|
828
|
+
|
|
829
|
+
elif self._backend_name == "bokeh":
|
|
830
|
+
# For bokeh, add rect glyphs for highlights
|
|
831
|
+
for j in range(snp_idx + 1):
|
|
832
|
+
ax.rect(
|
|
833
|
+
x=j,
|
|
834
|
+
y=snp_idx,
|
|
835
|
+
width=1,
|
|
836
|
+
height=1,
|
|
837
|
+
fill_alpha=0,
|
|
838
|
+
line_color=LEAD_SNP_HIGHLIGHT_COLOR,
|
|
839
|
+
line_width=2,
|
|
840
|
+
)
|
|
841
|
+
|
|
842
|
+
for i in range(snp_idx + 1, n_snps):
|
|
843
|
+
ax.rect(
|
|
844
|
+
x=snp_idx,
|
|
845
|
+
y=i,
|
|
846
|
+
width=1,
|
|
847
|
+
height=1,
|
|
848
|
+
fill_alpha=0,
|
|
849
|
+
line_color=LEAD_SNP_HIGHLIGHT_COLOR,
|
|
850
|
+
line_width=2,
|
|
851
|
+
)
|
|
852
|
+
|
|
474
853
|
def _plot_association(
|
|
475
854
|
self,
|
|
476
855
|
ax: Any,
|
|
@@ -648,6 +1027,10 @@ class LocusZoomPlotter:
|
|
|
648
1027
|
finemapping_df: Optional[pd.DataFrame] = None,
|
|
649
1028
|
finemapping_cs_col: Optional[str] = "cs",
|
|
650
1029
|
recomb_df: Optional[pd.DataFrame] = None,
|
|
1030
|
+
ld_heatmap_df: Optional[pd.DataFrame] = None,
|
|
1031
|
+
ld_heatmap_snp_ids: Optional[List[str]] = None,
|
|
1032
|
+
ld_heatmap_height: float = 0.25,
|
|
1033
|
+
ld_heatmap_metric: str = "r2",
|
|
651
1034
|
) -> Any:
|
|
652
1035
|
"""Create stacked regional association plots for multiple GWAS.
|
|
653
1036
|
|
|
@@ -680,10 +1063,21 @@ class LocusZoomPlotter:
|
|
|
680
1063
|
Displayed as PIP line with optional credible set coloring.
|
|
681
1064
|
finemapping_cs_col: Column name for credible set assignment.
|
|
682
1065
|
recomb_df: Pre-loaded recombination rate data.
|
|
1066
|
+
ld_heatmap_df: Pairwise LD matrix (square DataFrame) from
|
|
1067
|
+
calculate_pairwise_ld. If provided with ld_heatmap_snp_ids,
|
|
1068
|
+
renders heatmap panel at the very bottom of the stack.
|
|
1069
|
+
ld_heatmap_snp_ids: List of SNP IDs in matrix order. Required if
|
|
1070
|
+
ld_heatmap_df is provided.
|
|
1071
|
+
ld_heatmap_height: Height ratio of heatmap panel relative to
|
|
1072
|
+
association panel. Default 0.25.
|
|
1073
|
+
ld_heatmap_metric: LD metric label for colorbar ("r2" or "dprime").
|
|
683
1074
|
|
|
684
1075
|
Returns:
|
|
685
1076
|
Figure object (type depends on backend).
|
|
686
1077
|
|
|
1078
|
+
Raises:
|
|
1079
|
+
ValueError: If ld_heatmap_df provided without ld_heatmap_snp_ids.
|
|
1080
|
+
|
|
687
1081
|
Example:
|
|
688
1082
|
>>> fig = plotter.plot_stacked(
|
|
689
1083
|
... [gwas_height, gwas_bmi, gwas_whr],
|
|
@@ -739,6 +1133,12 @@ class LocusZoomPlotter:
|
|
|
739
1133
|
if eqtl_df is not None:
|
|
740
1134
|
validate_eqtl_df(eqtl_df)
|
|
741
1135
|
|
|
1136
|
+
# Validate LD heatmap parameters
|
|
1137
|
+
if ld_heatmap_df is not None and ld_heatmap_snp_ids is None:
|
|
1138
|
+
raise ValueError(
|
|
1139
|
+
"ld_heatmap_snp_ids is required when ld_heatmap_df is provided"
|
|
1140
|
+
)
|
|
1141
|
+
|
|
742
1142
|
# Handle lead positions
|
|
743
1143
|
if lead_positions is None:
|
|
744
1144
|
lead_positions = []
|
|
@@ -762,10 +1162,31 @@ class LocusZoomPlotter:
|
|
|
762
1162
|
if ld_reference_files is None and ld_reference_file is not None:
|
|
763
1163
|
ld_reference_files = [ld_reference_file] * n_gwas
|
|
764
1164
|
|
|
1165
|
+
# Transform heatmap to genomic coordinates if provided (use first GWAS for mapping)
|
|
1166
|
+
heatmap_data = None
|
|
1167
|
+
if ld_heatmap_df is not None and ld_heatmap_snp_ids is not None:
|
|
1168
|
+
# Use first GWAS DataFrame for SNP-to-position mapping
|
|
1169
|
+
first_gwas = gwas_dfs[0].copy()
|
|
1170
|
+
first_gwas = self._transform_pvalues(first_gwas, p_col)
|
|
1171
|
+
heatmap_data = self._transform_heatmap_to_genomic_coords(
|
|
1172
|
+
ld_matrix=ld_heatmap_df,
|
|
1173
|
+
snp_ids=ld_heatmap_snp_ids,
|
|
1174
|
+
gwas_df=first_gwas,
|
|
1175
|
+
start=start,
|
|
1176
|
+
end=end,
|
|
1177
|
+
rs_col=rs_col,
|
|
1178
|
+
pos_col=pos_col,
|
|
1179
|
+
)
|
|
1180
|
+
if heatmap_data is None:
|
|
1181
|
+
logger.warning(
|
|
1182
|
+
"No SNPs from LD heatmap overlap with region - heatmap not rendered"
|
|
1183
|
+
)
|
|
1184
|
+
|
|
765
1185
|
# Calculate panel layout
|
|
766
1186
|
panel_height = 2.5 # inches per GWAS panel
|
|
767
1187
|
eqtl_height = 2.0 if eqtl_df is not None else 0
|
|
768
1188
|
finemapping_height = 1.5 if finemapping_df is not None else 0
|
|
1189
|
+
heatmap_height_inches = panel_height * ld_heatmap_height if heatmap_data else 0
|
|
769
1190
|
|
|
770
1191
|
# Gene track height
|
|
771
1192
|
if genes_df is not None:
|
|
@@ -790,11 +1211,13 @@ class LocusZoomPlotter:
|
|
|
790
1211
|
gene_track_height = 0
|
|
791
1212
|
|
|
792
1213
|
# Calculate total panels and heights
|
|
1214
|
+
# Order from top to bottom: GWAS, finemapping, eQTL, gene track, heatmap
|
|
793
1215
|
n_panels = (
|
|
794
1216
|
n_gwas
|
|
795
1217
|
+ (1 if finemapping_df is not None else 0)
|
|
796
1218
|
+ (1 if eqtl_df is not None else 0)
|
|
797
1219
|
+ (1 if genes_df is not None else 0)
|
|
1220
|
+
+ (1 if heatmap_data is not None else 0)
|
|
798
1221
|
)
|
|
799
1222
|
height_ratios = [panel_height] * n_gwas
|
|
800
1223
|
if finemapping_df is not None:
|
|
@@ -803,6 +1226,8 @@ class LocusZoomPlotter:
|
|
|
803
1226
|
height_ratios.append(eqtl_height)
|
|
804
1227
|
if genes_df is not None:
|
|
805
1228
|
height_ratios.append(gene_track_height)
|
|
1229
|
+
if heatmap_data is not None:
|
|
1230
|
+
height_ratios.append(heatmap_height_inches)
|
|
806
1231
|
|
|
807
1232
|
# Calculate figure height
|
|
808
1233
|
total_height = figsize[1] if figsize[1] else sum(height_ratios)
|
|
@@ -824,6 +1249,9 @@ class LocusZoomPlotter:
|
|
|
824
1249
|
sharex=True,
|
|
825
1250
|
)
|
|
826
1251
|
|
|
1252
|
+
# Collect label texts for deferred adjustment
|
|
1253
|
+
all_snp_label_texts: list[tuple] = []
|
|
1254
|
+
|
|
827
1255
|
# Plot each GWAS panel
|
|
828
1256
|
for i, (gwas_df, lead_pos) in enumerate(zip(gwas_dfs, lead_positions)):
|
|
829
1257
|
ax = axes[i]
|
|
@@ -874,9 +1302,10 @@ class LocusZoomPlotter:
|
|
|
874
1302
|
)
|
|
875
1303
|
|
|
876
1304
|
# Add SNP labels (capability check - interactive backends use hover tooltips)
|
|
1305
|
+
# Create labels without adjusting - we'll adjust after axis limits are set
|
|
877
1306
|
if snp_labels and rs_col in df.columns and label_top_n > 0 and not df.empty:
|
|
878
1307
|
if self._backend.supports_snp_labels:
|
|
879
|
-
self._backend.add_snp_labels(
|
|
1308
|
+
texts = self._backend.add_snp_labels(
|
|
880
1309
|
ax,
|
|
881
1310
|
df,
|
|
882
1311
|
pos_col=pos_col,
|
|
@@ -885,7 +1314,10 @@ class LocusZoomPlotter:
|
|
|
885
1314
|
label_top_n=label_top_n,
|
|
886
1315
|
genes_df=genes_df,
|
|
887
1316
|
chrom=chrom,
|
|
1317
|
+
adjust=False, # Defer adjustment until after axis limits set
|
|
888
1318
|
)
|
|
1319
|
+
if texts:
|
|
1320
|
+
all_snp_label_texts.append((ax, texts))
|
|
889
1321
|
|
|
890
1322
|
# Add recombination overlay (only on first panel, all backends)
|
|
891
1323
|
if i == 0 and recomb_df is not None and not recomb_df.empty:
|
|
@@ -1070,8 +1502,37 @@ class LocusZoomPlotter:
|
|
|
1070
1502
|
plot_gene_track_generic(
|
|
1071
1503
|
gene_ax, self._backend, genes_df, chrom, start, end, exons_df
|
|
1072
1504
|
)
|
|
1073
|
-
self._backend.set_xlabel(gene_ax, f"Chromosome {chrom} (Mb)")
|
|
1074
1505
|
self._backend.hide_spines(gene_ax, ["top", "right", "left"])
|
|
1506
|
+
panel_idx += 1
|
|
1507
|
+
|
|
1508
|
+
# Plot LD heatmap panel if provided (at very bottom)
|
|
1509
|
+
if heatmap_data is not None:
|
|
1510
|
+
heatmap_ax = axes[panel_idx]
|
|
1511
|
+
filtered_matrix, x_positions, filtered_snp_ids = heatmap_data
|
|
1512
|
+
# Find lead SNP ID from first GWAS panel if lead_positions set
|
|
1513
|
+
lead_snp_id = None
|
|
1514
|
+
if lead_positions and lead_positions[0] is not None:
|
|
1515
|
+
first_gwas = gwas_dfs[0]
|
|
1516
|
+
if rs_col in first_gwas.columns:
|
|
1517
|
+
lead_row = first_gwas[first_gwas[pos_col] == lead_positions[0]]
|
|
1518
|
+
if not lead_row.empty:
|
|
1519
|
+
lead_snp_id = lead_row[rs_col].iloc[0]
|
|
1520
|
+
self._render_heatmap_panel(
|
|
1521
|
+
ax=heatmap_ax,
|
|
1522
|
+
fig=fig,
|
|
1523
|
+
ld_matrix=filtered_matrix,
|
|
1524
|
+
x_positions=x_positions,
|
|
1525
|
+
snp_ids=filtered_snp_ids,
|
|
1526
|
+
metric=ld_heatmap_metric,
|
|
1527
|
+
lead_snp_id=lead_snp_id,
|
|
1528
|
+
start=start,
|
|
1529
|
+
end=end,
|
|
1530
|
+
)
|
|
1531
|
+
# Heatmap is at very bottom - set x-label here
|
|
1532
|
+
self._backend.set_xlabel(heatmap_ax, f"Chromosome {chrom} (Mb)")
|
|
1533
|
+
elif genes_df is not None:
|
|
1534
|
+
# Gene track is at bottom (no heatmap) - set x-label on gene track
|
|
1535
|
+
self._backend.set_xlabel(gene_ax, f"Chromosome {chrom} (Mb)")
|
|
1075
1536
|
else:
|
|
1076
1537
|
# Set x-label on bottom panel
|
|
1077
1538
|
self._backend.set_xlabel(axes[-1], f"Chromosome {chrom} (Mb)")
|
|
@@ -1083,4 +1544,9 @@ class LocusZoomPlotter:
|
|
|
1083
1544
|
# Adjust layout
|
|
1084
1545
|
self._backend.finalize_layout(fig, hspace=0.1)
|
|
1085
1546
|
|
|
1547
|
+
# Adjust SNP labels AFTER all axis limits and layout are finalized
|
|
1548
|
+
# adjustText needs final plot bounds to position labels correctly
|
|
1549
|
+
for ax, texts in all_snp_label_texts:
|
|
1550
|
+
self._backend.adjust_snp_labels(ax, texts)
|
|
1551
|
+
|
|
1086
1552
|
return fig
|