pylocuszoom 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +15 -0
- pylocuszoom/backends/__init__.py +116 -17
- pylocuszoom/backends/base.py +363 -60
- pylocuszoom/backends/bokeh_backend.py +77 -15
- pylocuszoom/backends/hover.py +198 -0
- pylocuszoom/backends/matplotlib_backend.py +263 -3
- pylocuszoom/backends/plotly_backend.py +73 -16
- pylocuszoom/ensembl.py +476 -0
- pylocuszoom/eqtl.py +15 -19
- pylocuszoom/finemapping.py +17 -26
- pylocuszoom/forest.py +9 -11
- pylocuszoom/gene_track.py +161 -135
- pylocuszoom/loaders.py +3 -1
- pylocuszoom/phewas.py +10 -11
- pylocuszoom/plotter.py +120 -194
- pylocuszoom/recombination.py +19 -3
- pylocuszoom/utils.py +52 -0
- pylocuszoom/validation.py +172 -0
- {pylocuszoom-0.6.0.dist-info → pylocuszoom-0.8.0.dist-info}/METADATA +46 -25
- pylocuszoom-0.8.0.dist-info/RECORD +29 -0
- pylocuszoom-0.6.0.dist-info/RECORD +0 -26
- {pylocuszoom-0.6.0.dist-info → pylocuszoom-0.8.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-0.6.0.dist-info → pylocuszoom-0.8.0.dist-info}/licenses/LICENSE.md +0 -0
pylocuszoom/plotter.py
CHANGED
|
@@ -15,12 +15,9 @@ from typing import Any, List, Optional, Tuple
|
|
|
15
15
|
import matplotlib.pyplot as plt
|
|
16
16
|
import numpy as np
|
|
17
17
|
import pandas as pd
|
|
18
|
-
from matplotlib.axes import Axes
|
|
19
|
-
from matplotlib.figure import Figure
|
|
20
|
-
from matplotlib.lines import Line2D
|
|
21
|
-
from matplotlib.patches import Patch
|
|
22
18
|
|
|
23
19
|
from .backends import BackendType, get_backend
|
|
20
|
+
from .backends.hover import HoverConfig, HoverDataBuilder
|
|
24
21
|
from .colors import (
|
|
25
22
|
EQTL_NEGATIVE_BINS,
|
|
26
23
|
EQTL_POSITIVE_BINS,
|
|
@@ -33,6 +30,7 @@ from .colors import (
|
|
|
33
30
|
get_ld_color_palette,
|
|
34
31
|
get_phewas_category_palette,
|
|
35
32
|
)
|
|
33
|
+
from .ensembl import get_genes_for_region
|
|
36
34
|
from .eqtl import validate_eqtl_df
|
|
37
35
|
from .finemapping import (
|
|
38
36
|
get_credible_sets,
|
|
@@ -41,16 +39,13 @@ from .finemapping import (
|
|
|
41
39
|
from .forest import validate_forest_df
|
|
42
40
|
from .gene_track import (
|
|
43
41
|
assign_gene_positions,
|
|
44
|
-
plot_gene_track,
|
|
45
42
|
plot_gene_track_generic,
|
|
46
43
|
)
|
|
47
|
-
from .labels import add_snp_labels
|
|
48
44
|
from .ld import calculate_ld, find_plink
|
|
49
45
|
from .logging import enable_logging, logger
|
|
50
46
|
from .phewas import validate_phewas_df
|
|
51
47
|
from .recombination import (
|
|
52
48
|
RECOMB_COLOR,
|
|
53
|
-
add_recombination_overlay,
|
|
54
49
|
download_canine_recombination_maps,
|
|
55
50
|
get_default_data_dir,
|
|
56
51
|
get_recombination_rate_for_region,
|
|
@@ -119,8 +114,21 @@ class LocusZoomPlotter:
|
|
|
119
114
|
recomb_data_dir: Optional[str] = None,
|
|
120
115
|
genomewide_threshold: float = DEFAULT_GENOMEWIDE_THRESHOLD,
|
|
121
116
|
log_level: Optional[str] = "INFO",
|
|
117
|
+
auto_genes: bool = False,
|
|
122
118
|
):
|
|
123
|
-
"""Initialize the plotter.
|
|
119
|
+
"""Initialize the plotter.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
species: Species name ('canine', 'feline', or None for custom).
|
|
123
|
+
genome_build: Genome build for coordinate system.
|
|
124
|
+
backend: Plotting backend ('matplotlib', 'plotly', or 'bokeh').
|
|
125
|
+
plink_path: Path to PLINK executable for LD calculation.
|
|
126
|
+
recomb_data_dir: Directory containing recombination maps.
|
|
127
|
+
genomewide_threshold: P-value threshold for significance line.
|
|
128
|
+
log_level: Logging level.
|
|
129
|
+
auto_genes: If True, automatically fetch genes from Ensembl when
|
|
130
|
+
genes_df is not provided. Default False for backward compatibility.
|
|
131
|
+
"""
|
|
124
132
|
# Configure logging
|
|
125
133
|
if log_level is not None:
|
|
126
134
|
enable_logging(log_level)
|
|
@@ -129,12 +137,12 @@ class LocusZoomPlotter:
|
|
|
129
137
|
self.genome_build = (
|
|
130
138
|
genome_build if genome_build else self._default_build(species)
|
|
131
139
|
)
|
|
132
|
-
self.backend_name = backend
|
|
133
140
|
self._backend = get_backend(backend)
|
|
134
141
|
self.plink_path = plink_path or find_plink()
|
|
135
142
|
self.recomb_data_dir = recomb_data_dir
|
|
136
143
|
self.genomewide_threshold = genomewide_threshold
|
|
137
144
|
self._genomewide_line = -np.log10(genomewide_threshold)
|
|
145
|
+
self._auto_genes = auto_genes
|
|
138
146
|
|
|
139
147
|
# Cache for loaded data
|
|
140
148
|
self._recomb_cache = {}
|
|
@@ -248,6 +256,22 @@ class LocusZoomPlotter:
|
|
|
248
256
|
"""
|
|
249
257
|
# Validate inputs
|
|
250
258
|
validate_gwas_df(gwas_df, pos_col=pos_col, p_col=p_col)
|
|
259
|
+
|
|
260
|
+
# Auto-fetch genes if enabled and not provided
|
|
261
|
+
if genes_df is None and self._auto_genes:
|
|
262
|
+
logger.debug(
|
|
263
|
+
f"auto_genes enabled, fetching genes for chr{chrom}:{start}-{end}"
|
|
264
|
+
)
|
|
265
|
+
genes_df = get_genes_for_region(
|
|
266
|
+
species=self.species,
|
|
267
|
+
chrom=chrom,
|
|
268
|
+
start=start,
|
|
269
|
+
end=end,
|
|
270
|
+
)
|
|
271
|
+
if genes_df.empty:
|
|
272
|
+
logger.debug("No genes found in region from Ensembl")
|
|
273
|
+
genes_df = None
|
|
274
|
+
|
|
251
275
|
if genes_df is not None:
|
|
252
276
|
validate_genes_df(genes_df)
|
|
253
277
|
|
|
@@ -305,10 +329,10 @@ class LocusZoomPlotter:
|
|
|
305
329
|
zorder=1,
|
|
306
330
|
)
|
|
307
331
|
|
|
308
|
-
# Add SNP labels (
|
|
332
|
+
# Add SNP labels (capability check - interactive backends use hover tooltips)
|
|
309
333
|
if snp_labels and rs_col in df.columns and label_top_n > 0 and not df.empty:
|
|
310
|
-
if self.
|
|
311
|
-
add_snp_labels(
|
|
334
|
+
if self._backend.supports_snp_labels:
|
|
335
|
+
self._backend.add_snp_labels(
|
|
312
336
|
ax,
|
|
313
337
|
df,
|
|
314
338
|
pos_col=pos_col,
|
|
@@ -319,12 +343,10 @@ class LocusZoomPlotter:
|
|
|
319
343
|
chrom=chrom,
|
|
320
344
|
)
|
|
321
345
|
|
|
322
|
-
# Add recombination overlay (all backends)
|
|
346
|
+
# Add recombination overlay (all backends with secondary axis support)
|
|
323
347
|
if recomb_df is not None and not recomb_df.empty:
|
|
324
|
-
if self.
|
|
325
|
-
|
|
326
|
-
else:
|
|
327
|
-
self._add_recombination_overlay_generic(ax, recomb_df, start, end)
|
|
348
|
+
if self._backend.supports_secondary_axis:
|
|
349
|
+
self._add_recombination_overlay(ax, recomb_df, start, end)
|
|
328
350
|
|
|
329
351
|
# Format axes
|
|
330
352
|
self._backend.set_ylabel(ax, r"$-\log_{10}$ P")
|
|
@@ -333,19 +355,13 @@ class LocusZoomPlotter:
|
|
|
333
355
|
|
|
334
356
|
# Add LD legend (all backends)
|
|
335
357
|
if ld_col is not None and ld_col in df.columns:
|
|
336
|
-
|
|
337
|
-
self._add_ld_legend(ax)
|
|
338
|
-
else:
|
|
339
|
-
self._backend.add_ld_legend(ax, LD_BINS, LEAD_SNP_COLOR)
|
|
358
|
+
self._backend.add_ld_legend(ax, LD_BINS, LEAD_SNP_COLOR)
|
|
340
359
|
|
|
341
|
-
# Plot gene track (all backends)
|
|
360
|
+
# Plot gene track (all backends use generic function)
|
|
342
361
|
if genes_df is not None and gene_ax is not None:
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
plot_gene_track_generic(
|
|
347
|
-
gene_ax, self._backend, genes_df, chrom, start, end, exons_df
|
|
348
|
-
)
|
|
362
|
+
plot_gene_track_generic(
|
|
363
|
+
gene_ax, self._backend, genes_df, chrom, start, end, exons_df
|
|
364
|
+
)
|
|
349
365
|
self._backend.set_xlabel(gene_ax, f"Chromosome {chrom} (Mb)")
|
|
350
366
|
self._backend.hide_spines(gene_ax, ["top", "right", "left"])
|
|
351
367
|
else:
|
|
@@ -366,7 +382,7 @@ class LocusZoomPlotter:
|
|
|
366
382
|
start: int,
|
|
367
383
|
end: int,
|
|
368
384
|
figsize: Tuple[int, int],
|
|
369
|
-
) -> Tuple[
|
|
385
|
+
) -> Tuple[Any, Any, Optional[Any]]:
|
|
370
386
|
"""Create figure with optional gene track."""
|
|
371
387
|
if genes_df is not None:
|
|
372
388
|
# Calculate dynamic height based on gene rows
|
|
@@ -410,7 +426,7 @@ class LocusZoomPlotter:
|
|
|
410
426
|
|
|
411
427
|
def _plot_association(
|
|
412
428
|
self,
|
|
413
|
-
ax:
|
|
429
|
+
ax: Any,
|
|
414
430
|
df: pd.DataFrame,
|
|
415
431
|
pos_col: str,
|
|
416
432
|
ld_col: Optional[str],
|
|
@@ -419,23 +435,14 @@ class LocusZoomPlotter:
|
|
|
419
435
|
p_col: Optional[str] = None,
|
|
420
436
|
) -> None:
|
|
421
437
|
"""Plot association scatter with LD coloring."""
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
if
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
if pos_col in subset_df.columns:
|
|
431
|
-
hover_cols["Position"] = subset_df[pos_col].values
|
|
432
|
-
# P-value
|
|
433
|
-
if p_col and p_col in subset_df.columns:
|
|
434
|
-
hover_cols["P-value"] = subset_df[p_col].values
|
|
435
|
-
# LD
|
|
436
|
-
if ld_col and ld_col in subset_df.columns:
|
|
437
|
-
hover_cols["R²"] = subset_df[ld_col].values
|
|
438
|
-
return pd.DataFrame(hover_cols) if hover_cols else None
|
|
438
|
+
# Build hover data using HoverDataBuilder
|
|
439
|
+
hover_config = HoverConfig(
|
|
440
|
+
snp_col=rs_col if rs_col and rs_col in df.columns else None,
|
|
441
|
+
pos_col=pos_col if pos_col in df.columns else None,
|
|
442
|
+
p_col=p_col if p_col and p_col in df.columns else None,
|
|
443
|
+
ld_col=ld_col if ld_col and ld_col in df.columns else None,
|
|
444
|
+
)
|
|
445
|
+
hover_builder = HoverDataBuilder(hover_config)
|
|
439
446
|
|
|
440
447
|
# LD-based coloring
|
|
441
448
|
if ld_col is not None and ld_col in df.columns:
|
|
@@ -454,7 +461,7 @@ class LocusZoomPlotter:
|
|
|
454
461
|
edgecolor="black",
|
|
455
462
|
linewidth=0.5,
|
|
456
463
|
zorder=2,
|
|
457
|
-
hover_data=
|
|
464
|
+
hover_data=hover_builder.build_dataframe(bin_data),
|
|
458
465
|
)
|
|
459
466
|
else:
|
|
460
467
|
# Default: grey points
|
|
@@ -467,7 +474,7 @@ class LocusZoomPlotter:
|
|
|
467
474
|
edgecolor="black",
|
|
468
475
|
linewidth=0.5,
|
|
469
476
|
zorder=2,
|
|
470
|
-
hover_data=
|
|
477
|
+
hover_data=hover_builder.build_dataframe(df),
|
|
471
478
|
)
|
|
472
479
|
|
|
473
480
|
# Highlight lead SNP with larger, more prominent marker
|
|
@@ -484,57 +491,21 @@ class LocusZoomPlotter:
|
|
|
484
491
|
edgecolor="black",
|
|
485
492
|
linewidth=1.5,
|
|
486
493
|
zorder=10,
|
|
487
|
-
hover_data=
|
|
494
|
+
hover_data=hover_builder.build_dataframe(lead_snp),
|
|
488
495
|
)
|
|
489
496
|
|
|
490
|
-
def
|
|
491
|
-
"""Add LD color legend to plot."""
|
|
492
|
-
palette = get_ld_color_palette()
|
|
493
|
-
legend_elements = [
|
|
494
|
-
Line2D(
|
|
495
|
-
[0],
|
|
496
|
-
[0],
|
|
497
|
-
marker="D",
|
|
498
|
-
color="w",
|
|
499
|
-
markerfacecolor=LEAD_SNP_COLOR,
|
|
500
|
-
markeredgecolor="black",
|
|
501
|
-
markersize=6,
|
|
502
|
-
label="Lead SNP",
|
|
503
|
-
),
|
|
504
|
-
]
|
|
505
|
-
|
|
506
|
-
for threshold, label, _ in LD_BINS:
|
|
507
|
-
legend_elements.append(
|
|
508
|
-
Patch(
|
|
509
|
-
facecolor=palette[label],
|
|
510
|
-
edgecolor="black",
|
|
511
|
-
label=label,
|
|
512
|
-
)
|
|
513
|
-
)
|
|
514
|
-
|
|
515
|
-
ax.legend(
|
|
516
|
-
handles=legend_elements,
|
|
517
|
-
loc="upper right",
|
|
518
|
-
fontsize=9,
|
|
519
|
-
frameon=True,
|
|
520
|
-
framealpha=0.9,
|
|
521
|
-
title=r"$r^2$",
|
|
522
|
-
title_fontsize=10,
|
|
523
|
-
handlelength=1.5,
|
|
524
|
-
handleheight=1.0,
|
|
525
|
-
labelspacing=0.4,
|
|
526
|
-
)
|
|
527
|
-
|
|
528
|
-
def _add_recombination_overlay_generic(
|
|
497
|
+
def _add_recombination_overlay(
|
|
529
498
|
self,
|
|
530
499
|
ax: Any,
|
|
531
500
|
recomb_df: pd.DataFrame,
|
|
532
501
|
start: int,
|
|
533
502
|
end: int,
|
|
534
503
|
) -> None:
|
|
535
|
-
"""Add recombination overlay for
|
|
504
|
+
"""Add recombination overlay for all backends.
|
|
536
505
|
|
|
537
506
|
Creates a secondary y-axis with recombination rate line and fill.
|
|
507
|
+
Uses backend-agnostic secondary axis methods that work across
|
|
508
|
+
matplotlib, plotly, and bokeh.
|
|
538
509
|
"""
|
|
539
510
|
# Filter to region
|
|
540
511
|
region_recomb = recomb_df[
|
|
@@ -591,7 +562,7 @@ class LocusZoomPlotter:
|
|
|
591
562
|
|
|
592
563
|
def _plot_finemapping(
|
|
593
564
|
self,
|
|
594
|
-
ax:
|
|
565
|
+
ax: Any,
|
|
595
566
|
df: pd.DataFrame,
|
|
596
567
|
pos_col: str = "pos",
|
|
597
568
|
pip_col: str = "pip",
|
|
@@ -610,22 +581,15 @@ class LocusZoomPlotter:
|
|
|
610
581
|
show_credible_sets: Whether to color points by credible set.
|
|
611
582
|
pip_threshold: Minimum PIP to display as scatter point.
|
|
612
583
|
"""
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
# PIP
|
|
623
|
-
if pip_col in subset_df.columns:
|
|
624
|
-
hover_cols["PIP"] = subset_df[pip_col].values
|
|
625
|
-
# Credible set
|
|
626
|
-
if cs_col and cs_col in subset_df.columns:
|
|
627
|
-
hover_cols["Credible Set"] = subset_df[cs_col].values
|
|
628
|
-
return pd.DataFrame(hover_cols) if hover_cols else None
|
|
584
|
+
# Build hover data using HoverDataBuilder
|
|
585
|
+
extra_cols = {pip_col: "PIP"}
|
|
586
|
+
if cs_col and cs_col in df.columns:
|
|
587
|
+
extra_cols[cs_col] = "Credible Set"
|
|
588
|
+
hover_config = HoverConfig(
|
|
589
|
+
pos_col=pos_col if pos_col in df.columns else None,
|
|
590
|
+
extra_cols=extra_cols,
|
|
591
|
+
)
|
|
592
|
+
hover_builder = HoverDataBuilder(hover_config)
|
|
629
593
|
|
|
630
594
|
# Sort by position for line plotting
|
|
631
595
|
df = df.sort_values(pos_col)
|
|
@@ -660,7 +624,7 @@ class LocusZoomPlotter:
|
|
|
660
624
|
edgecolor="black",
|
|
661
625
|
linewidth=0.5,
|
|
662
626
|
zorder=3,
|
|
663
|
-
hover_data=
|
|
627
|
+
hover_data=hover_builder.build_dataframe(cs_data),
|
|
664
628
|
)
|
|
665
629
|
# Plot variants not in any credible set
|
|
666
630
|
non_cs_data = df[(df[cs_col].isna()) | (df[cs_col] == 0)]
|
|
@@ -677,7 +641,7 @@ class LocusZoomPlotter:
|
|
|
677
641
|
edgecolor="black",
|
|
678
642
|
linewidth=0.3,
|
|
679
643
|
zorder=2,
|
|
680
|
-
hover_data=
|
|
644
|
+
hover_data=hover_builder.build_dataframe(non_cs_data),
|
|
681
645
|
)
|
|
682
646
|
else:
|
|
683
647
|
# No credible sets - show all points above threshold
|
|
@@ -694,7 +658,7 @@ class LocusZoomPlotter:
|
|
|
694
658
|
edgecolor="black",
|
|
695
659
|
linewidth=0.5,
|
|
696
660
|
zorder=3,
|
|
697
|
-
hover_data=
|
|
661
|
+
hover_data=hover_builder.build_dataframe(high_pip),
|
|
698
662
|
)
|
|
699
663
|
|
|
700
664
|
def plot_stacked(
|
|
@@ -912,10 +876,10 @@ class LocusZoomPlotter:
|
|
|
912
876
|
zorder=1,
|
|
913
877
|
)
|
|
914
878
|
|
|
915
|
-
# Add SNP labels (
|
|
879
|
+
# Add SNP labels (capability check - interactive backends use hover tooltips)
|
|
916
880
|
if snp_labels and rs_col in df.columns and label_top_n > 0 and not df.empty:
|
|
917
|
-
if self.
|
|
918
|
-
add_snp_labels(
|
|
881
|
+
if self._backend.supports_snp_labels:
|
|
882
|
+
self._backend.add_snp_labels(
|
|
919
883
|
ax,
|
|
920
884
|
df,
|
|
921
885
|
pos_col=pos_col,
|
|
@@ -928,10 +892,8 @@ class LocusZoomPlotter:
|
|
|
928
892
|
|
|
929
893
|
# Add recombination overlay (only on first panel, all backends)
|
|
930
894
|
if i == 0 and recomb_df is not None and not recomb_df.empty:
|
|
931
|
-
if self.
|
|
932
|
-
|
|
933
|
-
else:
|
|
934
|
-
self._add_recombination_overlay_generic(ax, recomb_df, start, end)
|
|
895
|
+
if self._backend.supports_secondary_axis:
|
|
896
|
+
self._add_recombination_overlay(ax, recomb_df, start, end)
|
|
935
897
|
|
|
936
898
|
# Format axes
|
|
937
899
|
self._backend.set_ylabel(ax, r"$-\log_{10}$ P")
|
|
@@ -940,50 +902,11 @@ class LocusZoomPlotter:
|
|
|
940
902
|
|
|
941
903
|
# Add panel label
|
|
942
904
|
if panel_labels and i < len(panel_labels):
|
|
943
|
-
|
|
944
|
-
ax.annotate(
|
|
945
|
-
panel_labels[i],
|
|
946
|
-
xy=(0.02, 0.95),
|
|
947
|
-
xycoords="axes fraction",
|
|
948
|
-
fontsize=11,
|
|
949
|
-
fontweight="bold",
|
|
950
|
-
va="top",
|
|
951
|
-
ha="left",
|
|
952
|
-
)
|
|
953
|
-
elif self.backend_name == "plotly":
|
|
954
|
-
fig, row = ax
|
|
955
|
-
fig.add_annotation(
|
|
956
|
-
text=f"<b>{panel_labels[i]}</b>",
|
|
957
|
-
xref=f"x{row} domain" if row > 1 else "x domain",
|
|
958
|
-
yref=f"y{row} domain" if row > 1 else "y domain",
|
|
959
|
-
x=0.02,
|
|
960
|
-
y=0.95,
|
|
961
|
-
showarrow=False,
|
|
962
|
-
font=dict(size=11),
|
|
963
|
-
xanchor="left",
|
|
964
|
-
yanchor="top",
|
|
965
|
-
)
|
|
966
|
-
elif self.backend_name == "bokeh":
|
|
967
|
-
from bokeh.models import Label
|
|
968
|
-
|
|
969
|
-
# Get y-axis range for positioning
|
|
970
|
-
y_max = ax.y_range.end if ax.y_range.end else 10
|
|
971
|
-
x_min = ax.x_range.start if ax.x_range.start else start
|
|
972
|
-
label = Label(
|
|
973
|
-
x=x_min + (end - start) * 0.02,
|
|
974
|
-
y=y_max * 0.95,
|
|
975
|
-
text=panel_labels[i],
|
|
976
|
-
text_font_size="11pt",
|
|
977
|
-
text_font_style="bold",
|
|
978
|
-
)
|
|
979
|
-
ax.add_layout(label)
|
|
905
|
+
self._backend.add_panel_label(ax, panel_labels[i])
|
|
980
906
|
|
|
981
907
|
# Add LD legend (only on first panel, all backends)
|
|
982
908
|
if i == 0 and panel_ld_col is not None and panel_ld_col in df.columns:
|
|
983
|
-
|
|
984
|
-
self._add_ld_legend(ax)
|
|
985
|
-
else:
|
|
986
|
-
self._backend.add_ld_legend(ax, LD_BINS, LEAD_SNP_COLOR)
|
|
909
|
+
self._backend.add_ld_legend(ax, LD_BINS, LEAD_SNP_COLOR)
|
|
987
910
|
|
|
988
911
|
# Track current panel index
|
|
989
912
|
panel_idx = n_gwas
|
|
@@ -1050,24 +973,18 @@ class LocusZoomPlotter:
|
|
|
1050
973
|
eqtl_data["p_value"].clip(lower=1e-300)
|
|
1051
974
|
)
|
|
1052
975
|
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
""
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
if "effect_size" in subset_df.columns:
|
|
1066
|
-
hover_cols["Effect"] = subset_df["effect_size"].values
|
|
1067
|
-
# Gene
|
|
1068
|
-
if "gene" in subset_df.columns:
|
|
1069
|
-
hover_cols["Gene"] = subset_df["gene"].values
|
|
1070
|
-
return pd.DataFrame(hover_cols) if hover_cols else None
|
|
976
|
+
# Build hover data using HoverDataBuilder
|
|
977
|
+
eqtl_extra_cols = {}
|
|
978
|
+
if "effect_size" in eqtl_data.columns:
|
|
979
|
+
eqtl_extra_cols["effect_size"] = "Effect"
|
|
980
|
+
if "gene" in eqtl_data.columns:
|
|
981
|
+
eqtl_extra_cols["gene"] = "Gene"
|
|
982
|
+
eqtl_hover_config = HoverConfig(
|
|
983
|
+
pos_col="pos" if "pos" in eqtl_data.columns else None,
|
|
984
|
+
p_col="p_value" if "p_value" in eqtl_data.columns else None,
|
|
985
|
+
extra_cols=eqtl_extra_cols,
|
|
986
|
+
)
|
|
987
|
+
eqtl_hover_builder = HoverDataBuilder(eqtl_hover_config)
|
|
1071
988
|
|
|
1072
989
|
# Check if effect_size column exists for directional coloring
|
|
1073
990
|
has_effect = "effect_size" in eqtl_data.columns
|
|
@@ -1090,7 +1007,7 @@ class LocusZoomPlotter:
|
|
|
1090
1007
|
edgecolor="black",
|
|
1091
1008
|
linewidth=0.5,
|
|
1092
1009
|
zorder=2,
|
|
1093
|
-
hover_data=
|
|
1010
|
+
hover_data=eqtl_hover_builder.build_dataframe(row_df),
|
|
1094
1011
|
)
|
|
1095
1012
|
# Plot negative effects (down triangles)
|
|
1096
1013
|
for _, row in neg_effects.iterrows():
|
|
@@ -1105,7 +1022,7 @@ class LocusZoomPlotter:
|
|
|
1105
1022
|
edgecolor="black",
|
|
1106
1023
|
linewidth=0.5,
|
|
1107
1024
|
zorder=2,
|
|
1108
|
-
hover_data=
|
|
1025
|
+
hover_data=eqtl_hover_builder.build_dataframe(row_df),
|
|
1109
1026
|
)
|
|
1110
1027
|
# Add eQTL effect legend (all backends)
|
|
1111
1028
|
self._backend.add_eqtl_legend(
|
|
@@ -1125,7 +1042,7 @@ class LocusZoomPlotter:
|
|
|
1125
1042
|
linewidth=0.5,
|
|
1126
1043
|
zorder=2,
|
|
1127
1044
|
label=label,
|
|
1128
|
-
hover_data=
|
|
1045
|
+
hover_data=eqtl_hover_builder.build_dataframe(eqtl_data),
|
|
1129
1046
|
)
|
|
1130
1047
|
self._backend.add_simple_legend(ax, label, loc="upper right")
|
|
1131
1048
|
|
|
@@ -1141,15 +1058,12 @@ class LocusZoomPlotter:
|
|
|
1141
1058
|
self._backend.hide_spines(ax, ["top", "right"])
|
|
1142
1059
|
panel_idx += 1
|
|
1143
1060
|
|
|
1144
|
-
# Plot gene track (all backends)
|
|
1061
|
+
# Plot gene track (all backends use generic function)
|
|
1145
1062
|
if genes_df is not None:
|
|
1146
1063
|
gene_ax = axes[panel_idx]
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
plot_gene_track_generic(
|
|
1151
|
-
gene_ax, self._backend, genes_df, chrom, start, end, exons_df
|
|
1152
|
-
)
|
|
1064
|
+
plot_gene_track_generic(
|
|
1065
|
+
gene_ax, self._backend, genes_df, chrom, start, end, exons_df
|
|
1066
|
+
)
|
|
1153
1067
|
self._backend.set_xlabel(gene_ax, f"Chromosome {chrom} (Mb)")
|
|
1154
1068
|
self._backend.hide_spines(gene_ax, ["top", "right", "left"])
|
|
1155
1069
|
else:
|
|
@@ -1281,10 +1195,13 @@ class LocusZoomPlotter:
|
|
|
1281
1195
|
self._backend.set_ylabel(ax, "Phenotype")
|
|
1282
1196
|
self._backend.set_ylim(ax, -0.5, len(df) - 0.5)
|
|
1283
1197
|
|
|
1284
|
-
# Set y-tick labels to phenotype names
|
|
1285
|
-
|
|
1286
|
-
ax
|
|
1287
|
-
|
|
1198
|
+
# Set y-tick labels to phenotype names
|
|
1199
|
+
self._backend.set_yticks(
|
|
1200
|
+
ax,
|
|
1201
|
+
positions=df["y_pos"].tolist(),
|
|
1202
|
+
labels=df[phenotype_col].tolist(),
|
|
1203
|
+
fontsize=8,
|
|
1204
|
+
)
|
|
1288
1205
|
|
|
1289
1206
|
self._backend.set_title(ax, f"PheWAS: {variant_id}")
|
|
1290
1207
|
self._backend.hide_spines(ax, ["top", "right"])
|
|
@@ -1399,10 +1316,19 @@ class LocusZoomPlotter:
|
|
|
1399
1316
|
self._backend.set_xlabel(ax, effect_label)
|
|
1400
1317
|
self._backend.set_ylim(ax, -0.5, len(df) - 0.5)
|
|
1401
1318
|
|
|
1402
|
-
#
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1319
|
+
# Ensure x-axis includes the null value with some padding
|
|
1320
|
+
x_min = min(df[ci_lower_col].min(), null_value)
|
|
1321
|
+
x_max = max(df[ci_upper_col].max(), null_value)
|
|
1322
|
+
x_padding = (x_max - x_min) * 0.1
|
|
1323
|
+
self._backend.set_xlim(ax, x_min - x_padding, x_max + x_padding)
|
|
1324
|
+
|
|
1325
|
+
# Set y-tick labels to study names
|
|
1326
|
+
self._backend.set_yticks(
|
|
1327
|
+
ax,
|
|
1328
|
+
positions=df["y_pos"].tolist(),
|
|
1329
|
+
labels=df[study_col].tolist(),
|
|
1330
|
+
fontsize=10,
|
|
1331
|
+
)
|
|
1406
1332
|
|
|
1407
1333
|
self._backend.set_title(ax, f"Forest Plot: {variant_id}")
|
|
1408
1334
|
self._backend.hide_spines(ax, ["top", "right"])
|
pylocuszoom/recombination.py
CHANGED
|
@@ -18,6 +18,7 @@ from matplotlib.axes import Axes
|
|
|
18
18
|
from tqdm import tqdm
|
|
19
19
|
|
|
20
20
|
from .logging import logger
|
|
21
|
+
from .utils import filter_by_region
|
|
21
22
|
|
|
22
23
|
# Recombination overlay color
|
|
23
24
|
RECOMB_COLOR = "#7FCDFF" # Light blue
|
|
@@ -252,10 +253,20 @@ def download_canine_recombination_maps(
|
|
|
252
253
|
|
|
253
254
|
logger.debug(f"Downloaded {tar_path.stat().st_size / 1024:.1f} KB")
|
|
254
255
|
|
|
255
|
-
# Extract tar.gz
|
|
256
|
+
# Extract tar.gz with path traversal protection
|
|
256
257
|
logger.debug("Extracting genetic maps...")
|
|
257
258
|
with tarfile.open(tar_path, "r:gz") as tar:
|
|
258
|
-
|
|
259
|
+
# Filter to prevent path traversal attacks
|
|
260
|
+
safe_members = []
|
|
261
|
+
for member in tar.getmembers():
|
|
262
|
+
# Resolve the path and ensure it stays within tmpdir
|
|
263
|
+
member_path = Path(tmpdir) / member.name
|
|
264
|
+
try:
|
|
265
|
+
member_path.resolve().relative_to(Path(tmpdir).resolve())
|
|
266
|
+
safe_members.append(member)
|
|
267
|
+
except ValueError:
|
|
268
|
+
logger.warning(f"Skipping unsafe path in archive: {member.name}")
|
|
269
|
+
tar.extractall(tmpdir, members=safe_members)
|
|
259
270
|
|
|
260
271
|
# Find and process the extracted files
|
|
261
272
|
extracted_dir = Path(tmpdir)
|
|
@@ -374,7 +385,12 @@ def get_recombination_rate_for_region(
|
|
|
374
385
|
)
|
|
375
386
|
|
|
376
387
|
# Filter to region
|
|
377
|
-
region_df =
|
|
388
|
+
region_df = filter_by_region(
|
|
389
|
+
df,
|
|
390
|
+
region=(chrom, start, end),
|
|
391
|
+
chrom_col="", # Recomb maps don't have chromosome column
|
|
392
|
+
pos_col="pos",
|
|
393
|
+
)
|
|
378
394
|
|
|
379
395
|
return region_df[["pos", "rate"]]
|
|
380
396
|
|
pylocuszoom/utils.py
CHANGED
|
@@ -106,6 +106,58 @@ def normalize_chrom(chrom: Union[int, str]) -> str:
|
|
|
106
106
|
return str(chrom).replace("chr", "")
|
|
107
107
|
|
|
108
108
|
|
|
109
|
+
def filter_by_region(
|
|
110
|
+
df: pd.DataFrame,
|
|
111
|
+
region: tuple,
|
|
112
|
+
chrom_col: str = "chrom",
|
|
113
|
+
pos_col: str = "pos",
|
|
114
|
+
) -> pd.DataFrame:
|
|
115
|
+
"""Filter DataFrame to genomic region with inclusive bounds.
|
|
116
|
+
|
|
117
|
+
Filters rows where position is within [start, end] (inclusive).
|
|
118
|
+
If chrom_col exists in DataFrame, also filters by chromosome.
|
|
119
|
+
Chromosome comparison normalizes types (int/str, chr prefix).
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
df: DataFrame to filter.
|
|
123
|
+
region: Tuple of (chrom, start, end) defining the region.
|
|
124
|
+
chrom_col: Column name for chromosome (default: "chrom").
|
|
125
|
+
If column doesn't exist, filters by position only.
|
|
126
|
+
pos_col: Column name for position (default: "pos").
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Filtered DataFrame (copy, not view).
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
KeyError: If pos_col is not found in DataFrame.
|
|
133
|
+
|
|
134
|
+
Example:
|
|
135
|
+
>>> filtered = filter_by_region(df, region=(1, 1000000, 2000000))
|
|
136
|
+
>>> filtered = filter_by_region(df, region=("chr1", 1e6, 2e6), pos_col="position")
|
|
137
|
+
"""
|
|
138
|
+
chrom, start, end = region
|
|
139
|
+
|
|
140
|
+
# Validate position column exists
|
|
141
|
+
if pos_col not in df.columns:
|
|
142
|
+
raise KeyError(
|
|
143
|
+
f"Position column '{pos_col}' not found in DataFrame. "
|
|
144
|
+
f"Available columns: {list(df.columns)}"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Position filtering (inclusive bounds)
|
|
148
|
+
mask = (df[pos_col] >= start) & (df[pos_col] <= end)
|
|
149
|
+
|
|
150
|
+
# Chromosome filtering (if column exists)
|
|
151
|
+
if chrom_col in df.columns:
|
|
152
|
+
chrom_normalized = normalize_chrom(chrom)
|
|
153
|
+
df_chrom_normalized = (
|
|
154
|
+
df[chrom_col].astype(str).str.replace("chr", "", regex=False)
|
|
155
|
+
)
|
|
156
|
+
mask = mask & (df_chrom_normalized == chrom_normalized)
|
|
157
|
+
|
|
158
|
+
return df[mask].copy()
|
|
159
|
+
|
|
160
|
+
|
|
109
161
|
def validate_dataframe(
|
|
110
162
|
df: pd.DataFrame,
|
|
111
163
|
required_cols: List[str],
|