pylocuszoom 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pylocuszoom/plotter.py ADDED
@@ -0,0 +1,733 @@
1
+ """Main LocusZoomPlotter class for regional association plots.
2
+
3
+ Orchestrates all components (LD coloring, gene track, recombination overlay,
4
+ SNP labels) into a unified plotting interface.
5
+
6
+ Supports multiple backends:
7
+ - matplotlib (default): Static publication-quality plots
8
+ - plotly: Interactive HTML with hover tooltips
9
+ - bokeh: Interactive HTML for dashboards
10
+ """
11
+
12
+ from pathlib import Path
13
+ from typing import Any, List, Optional, Tuple, Union
14
+
15
+ import matplotlib.pyplot as plt
16
+ import numpy as np
17
+ import pandas as pd
18
+ from matplotlib.axes import Axes
19
+ from matplotlib.figure import Figure
20
+ from matplotlib.lines import Line2D
21
+ from matplotlib.patches import Patch
22
+ from matplotlib.ticker import FuncFormatter, MaxNLocator
23
+
24
+ from .backends import BackendType, PlotBackend, get_backend
25
+
26
+ from .colors import (
27
+ LD_BINS,
28
+ LEAD_SNP_COLOR,
29
+ get_ld_bin,
30
+ get_ld_color_palette,
31
+ )
32
+ from .gene_track import assign_gene_positions, plot_gene_track
33
+ from .labels import add_snp_labels
34
+ from .ld import calculate_ld, find_plink
35
+ from .logging import enable_logging, logger
36
+ from .recombination import (
37
+ add_recombination_overlay,
38
+ download_dog_recombination_maps,
39
+ get_default_data_dir,
40
+ get_recombination_rate_for_region,
41
+ )
42
+ from .utils import normalize_chrom, validate_genes_df, validate_gwas_df
43
+
44
+ # Default significance threshold: 5e-8 for human, 5e-7 for dog
45
+ DEFAULT_GENOMEWIDE_THRESHOLD = 5e-7
46
+ DEFAULT_GENOMEWIDE_LINE = -np.log10(DEFAULT_GENOMEWIDE_THRESHOLD)
47
+
48
+
49
+ class LocusZoomPlotter:
50
+ """Regional association plot generator with LD coloring and annotations.
51
+
52
+ Creates LocusZoom-style regional plots with:
53
+ - LD coloring based on R² with lead variant
54
+ - Gene and exon tracks
55
+ - Recombination rate overlays (dog built-in, or user-provided)
56
+ - Automatic SNP labeling
57
+
58
+ Supports multiple rendering backends:
59
+ - matplotlib (default): Static publication-quality plots
60
+ - plotly: Interactive HTML with hover tooltips
61
+ - bokeh: Interactive HTML for dashboards
62
+
63
+ Args:
64
+ species: Species name ('dog', 'cat', or None for custom).
65
+ Dog has built-in recombination maps.
66
+ genome_build: Genome build for coordinate system. For dog:
67
+ "canfam3.1" (default) or "canfam4". If "canfam4", recombination
68
+ maps are automatically lifted over from CanFam3.1.
69
+ backend: Plotting backend ('matplotlib', 'plotly', or 'bokeh').
70
+ Defaults to 'matplotlib' for static plots.
71
+ plink_path: Path to PLINK executable for LD calculation.
72
+ Auto-detects if None.
73
+ recomb_data_dir: Directory containing recombination maps.
74
+ Uses platform cache if None.
75
+ genomewide_threshold: P-value threshold for significance line.
76
+ log_level: Logging level ("DEBUG", "INFO", "WARNING", "ERROR", or None
77
+ to disable). Defaults to "INFO".
78
+
79
+ Example:
80
+ >>> # Static plot (default)
81
+ >>> plotter = LocusZoomPlotter(species="dog")
82
+ >>>
83
+ >>> # Interactive plot with plotly
84
+ >>> plotter = LocusZoomPlotter(species="dog", backend="plotly")
85
+ >>>
86
+ >>> fig = plotter.plot(
87
+ ... gwas_df,
88
+ ... chrom=1,
89
+ ... start=1000000,
90
+ ... end=2000000,
91
+ ... lead_pos=1500000,
92
+ ... )
93
+ >>> fig.savefig("regional_plot.png", dpi=150) # matplotlib
94
+ >>> # or fig.save("plot.html") # plotly/bokeh
95
+ """
96
+
97
+ def __init__(
98
+ self,
99
+ species: str = "dog",
100
+ genome_build: Optional[str] = None,
101
+ backend: BackendType = "matplotlib",
102
+ plink_path: Optional[str] = None,
103
+ recomb_data_dir: Optional[str] = None,
104
+ genomewide_threshold: float = DEFAULT_GENOMEWIDE_THRESHOLD,
105
+ log_level: Optional[str] = "INFO",
106
+ ):
107
+ """Initialize the plotter."""
108
+ # Configure logging
109
+ if log_level is not None:
110
+ enable_logging(log_level)
111
+
112
+ self.species = species
113
+ self.genome_build = (
114
+ genome_build if genome_build else self._default_build(species)
115
+ )
116
+ self.backend_name = backend
117
+ self._backend = get_backend(backend)
118
+ self.plink_path = plink_path or find_plink()
119
+ self.recomb_data_dir = recomb_data_dir
120
+ self.genomewide_threshold = genomewide_threshold
121
+ self._genomewide_line = -np.log10(genomewide_threshold)
122
+
123
+ # Cache for loaded data
124
+ self._recomb_cache = {}
125
+
126
+ @staticmethod
127
+ def _default_build(species: str) -> Optional[str]:
128
+ """Get default genome build for species."""
129
+ if species == "dog":
130
+ return "canfam3.1"
131
+ if species == "cat":
132
+ return "felCat9"
133
+ return None
134
+
135
+ def _ensure_recomb_maps(self) -> Optional[Path]:
136
+ """Ensure recombination maps are downloaded.
137
+
138
+ Returns path to recombination map directory, or None if not available.
139
+ """
140
+ if self.species == "dog":
141
+ if self.recomb_data_dir:
142
+ return Path(self.recomb_data_dir)
143
+ # Check if already downloaded
144
+ default_dir = get_default_data_dir()
145
+ if (
146
+ default_dir.exists()
147
+ and len(list(default_dir.glob("chr*_recomb.tsv"))) >= 38
148
+ ):
149
+ return default_dir
150
+ # Download
151
+ try:
152
+ return download_dog_recombination_maps()
153
+ except Exception as e:
154
+ logger.warning(f"Could not download recombination maps: {e}")
155
+ return None
156
+ elif self.recomb_data_dir:
157
+ return Path(self.recomb_data_dir)
158
+ return None
159
+
160
+ def _get_recomb_for_region(
161
+ self, chrom: int, start: int, end: int
162
+ ) -> Optional[pd.DataFrame]:
163
+ """Get recombination rate data for a region, with caching."""
164
+ cache_key = (chrom, start, end, self.genome_build)
165
+ if cache_key in self._recomb_cache:
166
+ return self._recomb_cache[cache_key]
167
+
168
+ recomb_dir = self._ensure_recomb_maps()
169
+ if recomb_dir is None:
170
+ return None
171
+
172
+ try:
173
+ recomb_df = get_recombination_rate_for_region(
174
+ chrom=chrom,
175
+ start=start,
176
+ end=end,
177
+ species=self.species,
178
+ data_dir=str(recomb_dir),
179
+ genome_build=self.genome_build,
180
+ )
181
+ self._recomb_cache[cache_key] = recomb_df
182
+ return recomb_df
183
+ except FileNotFoundError:
184
+ return None
185
+
186
+ def plot(
187
+ self,
188
+ gwas_df: pd.DataFrame,
189
+ chrom: int,
190
+ start: int,
191
+ end: int,
192
+ lead_pos: Optional[int] = None,
193
+ ld_reference_file: Optional[str] = None,
194
+ ld_col: Optional[str] = None,
195
+ genes_df: Optional[pd.DataFrame] = None,
196
+ exons_df: Optional[pd.DataFrame] = None,
197
+ recomb_df: Optional[pd.DataFrame] = None,
198
+ show_recombination: bool = True,
199
+ snp_labels: bool = True,
200
+ label_top_n: int = 5,
201
+ pos_col: str = "ps",
202
+ p_col: str = "p_wald",
203
+ rs_col: str = "rs",
204
+ figsize: Tuple[int, int] = (12, 8),
205
+ ) -> Figure:
206
+ """Create a regional association plot.
207
+
208
+ Args:
209
+ gwas_df: GWAS results DataFrame.
210
+ chrom: Chromosome number.
211
+ start: Start position of the region.
212
+ end: End position of the region.
213
+ lead_pos: Position of the lead/index SNP to highlight.
214
+ ld_reference_file: PLINK binary fileset for LD calculation.
215
+ If provided with lead_pos, calculates LD on the fly.
216
+ ld_col: Column name for pre-computed LD (R²) values.
217
+ Use this if LD was calculated externally.
218
+ genes_df: Gene annotations with chr, start, end, gene_name.
219
+ exons_df: Exon annotations with chr, start, end, gene_name.
220
+ recomb_df: Pre-loaded recombination rate data.
221
+ If None and show_recombination=True, loads from species default.
222
+ show_recombination: Whether to show recombination rate overlay.
223
+ snp_labels: Whether to label top SNPs.
224
+ label_top_n: Number of top SNPs to label.
225
+ pos_col: Column name for position.
226
+ p_col: Column name for p-value.
227
+ rs_col: Column name for SNP ID.
228
+ figsize: Figure size.
229
+
230
+ Returns:
231
+ Matplotlib Figure object.
232
+
233
+ Raises:
234
+ ValidationError: If required DataFrame columns are missing.
235
+ """
236
+ # Validate inputs
237
+ validate_gwas_df(gwas_df, pos_col=pos_col, p_col=p_col)
238
+ if genes_df is not None:
239
+ validate_genes_df(genes_df)
240
+
241
+ logger.debug(f"Creating plot for chr{chrom}:{start}-{end}")
242
+
243
+ # Prevent auto-display in interactive environments
244
+ plt.ioff()
245
+
246
+ # Prepare data
247
+ df = gwas_df.copy()
248
+ df["neglog10p"] = -np.log10(df[p_col].clip(lower=1e-300))
249
+
250
+ # Calculate LD if reference file provided
251
+ if ld_reference_file and lead_pos and ld_col is None:
252
+ lead_snp_row = df[df[pos_col] == lead_pos]
253
+ if not lead_snp_row.empty:
254
+ lead_snp_id = lead_snp_row[rs_col].iloc[0]
255
+ logger.debug(f"Calculating LD for lead SNP {lead_snp_id}")
256
+ ld_df = calculate_ld(
257
+ bfile_path=ld_reference_file,
258
+ lead_snp=lead_snp_id,
259
+ window_kb=max((end - start) // 1000, 500),
260
+ plink_path=self.plink_path,
261
+ species=self.species,
262
+ )
263
+ if not ld_df.empty:
264
+ df = df.merge(ld_df, left_on=rs_col, right_on="SNP", how="left")
265
+ ld_col = "R2"
266
+
267
+ # Load recombination data if needed
268
+ if show_recombination and recomb_df is None:
269
+ recomb_df = self._get_recomb_for_region(chrom, start, end)
270
+
271
+ # Create figure layout
272
+ fig, ax, gene_ax = self._create_figure(genes_df, chrom, start, end, figsize)
273
+
274
+ # Plot association data
275
+ self._plot_association(ax, df, pos_col, ld_col, lead_pos)
276
+
277
+ # Add significance line
278
+ ax.axhline(
279
+ y=self._genomewide_line,
280
+ color="grey",
281
+ linestyle="--",
282
+ linewidth=1,
283
+ zorder=1,
284
+ )
285
+
286
+ # Add SNP labels
287
+ if snp_labels and rs_col in df.columns and label_top_n > 0 and not df.empty:
288
+ add_snp_labels(
289
+ ax,
290
+ df,
291
+ pos_col=pos_col,
292
+ neglog10p_col="neglog10p",
293
+ rs_col=rs_col,
294
+ label_top_n=label_top_n,
295
+ genes_df=genes_df,
296
+ chrom=chrom,
297
+ )
298
+
299
+ # Add recombination overlay
300
+ if recomb_df is not None and not recomb_df.empty:
301
+ add_recombination_overlay(ax, recomb_df, start, end)
302
+
303
+ # Format axes
304
+ ax.set_ylabel(r"$-\log_{10}$ P")
305
+ ax.set_xlim(start, end)
306
+ ax.spines["top"].set_visible(False)
307
+ ax.spines["right"].set_visible(False)
308
+
309
+ # Add LD legend
310
+ if ld_col is not None and ld_col in df.columns:
311
+ self._add_ld_legend(ax)
312
+
313
+ # Plot gene track
314
+ if genes_df is not None and gene_ax is not None:
315
+ plot_gene_track(gene_ax, genes_df, chrom, start, end, exons_df)
316
+ gene_ax.set_xlabel(f"Chromosome {chrom} (Mb)")
317
+ gene_ax.spines["top"].set_visible(False)
318
+ gene_ax.spines["right"].set_visible(False)
319
+ gene_ax.spines["left"].set_visible(False)
320
+ else:
321
+ ax.set_xlabel(f"Chromosome {chrom} (Mb)")
322
+
323
+ # Format x-axis with Mb labels
324
+ ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{x / 1e6:.2f}"))
325
+ ax.xaxis.set_major_locator(MaxNLocator(nbins=6))
326
+
327
+ # Adjust layout
328
+ fig.subplots_adjust(left=0.08, right=0.95, top=0.95, bottom=0.1, hspace=0.08)
329
+ plt.ion()
330
+
331
+ return fig
332
+
333
+ def _create_figure(
334
+ self,
335
+ genes_df: Optional[pd.DataFrame],
336
+ chrom: int,
337
+ start: int,
338
+ end: int,
339
+ figsize: Tuple[int, int],
340
+ ) -> Tuple[Figure, Axes, Optional[Axes]]:
341
+ """Create figure with optional gene track."""
342
+ if genes_df is not None:
343
+ # Calculate dynamic height based on gene rows
344
+ chrom_str = normalize_chrom(chrom)
345
+ region_genes = genes_df[
346
+ (
347
+ genes_df["chr"].astype(str).str.replace("chr", "", regex=False)
348
+ == chrom_str
349
+ )
350
+ & (genes_df["end"] >= start)
351
+ & (genes_df["start"] <= end)
352
+ ]
353
+ if not region_genes.empty:
354
+ temp_positions = assign_gene_positions(
355
+ region_genes.sort_values("start"), start, end
356
+ )
357
+ n_gene_rows = max(temp_positions) + 1 if temp_positions else 1
358
+ else:
359
+ n_gene_rows = 1
360
+
361
+ base_gene_height = 1.0
362
+ per_row_height = 0.5
363
+ gene_track_height = base_gene_height + (n_gene_rows - 1) * per_row_height
364
+ assoc_height = figsize[1] * 0.6
365
+ total_height = assoc_height + gene_track_height
366
+
367
+ fig, axes = plt.subplots(
368
+ 2,
369
+ 1,
370
+ figsize=(figsize[0], total_height),
371
+ height_ratios=[assoc_height, gene_track_height],
372
+ sharex=True,
373
+ gridspec_kw={"hspace": 0},
374
+ )
375
+ return fig, axes[0], axes[1]
376
+ else:
377
+ fig, ax = plt.subplots(figsize=(figsize[0], figsize[1] * 0.75))
378
+ return fig, ax, None
379
+
380
+ def _plot_association(
381
+ self,
382
+ ax: Axes,
383
+ df: pd.DataFrame,
384
+ pos_col: str,
385
+ ld_col: Optional[str],
386
+ lead_pos: Optional[int],
387
+ ) -> None:
388
+ """Plot association scatter with LD coloring."""
389
+ # LD-based coloring
390
+ if ld_col is not None and ld_col in df.columns:
391
+ df["ld_bin"] = df[ld_col].apply(get_ld_bin)
392
+ df = df.sort_values(ld_col, ascending=True, na_position="first")
393
+
394
+ palette = get_ld_color_palette()
395
+ for bin_label in df["ld_bin"].unique():
396
+ bin_data = df[df["ld_bin"] == bin_label]
397
+ ax.scatter(
398
+ bin_data[pos_col],
399
+ bin_data["neglog10p"],
400
+ c=palette.get(bin_label, "#BEBEBE"),
401
+ s=60,
402
+ edgecolor="black",
403
+ linewidth=0.5,
404
+ zorder=2,
405
+ )
406
+ else:
407
+ # Default: grey points
408
+ ax.scatter(
409
+ df[pos_col],
410
+ df["neglog10p"],
411
+ c="#BEBEBE",
412
+ s=60,
413
+ edgecolor="black",
414
+ linewidth=0.5,
415
+ zorder=2,
416
+ )
417
+
418
+ # Highlight lead SNP
419
+ if lead_pos is not None:
420
+ lead_snp = df[df[pos_col] == lead_pos]
421
+ if not lead_snp.empty:
422
+ ax.scatter(
423
+ lead_snp[pos_col],
424
+ lead_snp["neglog10p"],
425
+ c=LEAD_SNP_COLOR,
426
+ s=120,
427
+ marker="D",
428
+ edgecolors="black",
429
+ linewidths=1,
430
+ zorder=10,
431
+ )
432
+
433
+ def _add_ld_legend(self, ax: Axes) -> None:
434
+ """Add LD color legend to plot."""
435
+ palette = get_ld_color_palette()
436
+ legend_elements = [
437
+ Line2D(
438
+ [0],
439
+ [0],
440
+ marker="D",
441
+ color="w",
442
+ markerfacecolor=LEAD_SNP_COLOR,
443
+ markeredgecolor="black",
444
+ markersize=8,
445
+ label="Index SNP",
446
+ ),
447
+ ]
448
+
449
+ for threshold, label, _ in LD_BINS:
450
+ legend_elements.append(
451
+ Patch(
452
+ facecolor=palette[label],
453
+ edgecolor="black",
454
+ label=label,
455
+ )
456
+ )
457
+
458
+ ax.legend(
459
+ handles=legend_elements,
460
+ loc="upper left",
461
+ fontsize=9,
462
+ frameon=True,
463
+ framealpha=0.9,
464
+ title=r"$r^2$",
465
+ title_fontsize=10,
466
+ handlelength=1.5,
467
+ handleheight=1.0,
468
+ labelspacing=0.4,
469
+ )
470
+
471
+ def plot_stacked(
472
+ self,
473
+ gwas_dfs: List[pd.DataFrame],
474
+ chrom: int,
475
+ start: int,
476
+ end: int,
477
+ lead_positions: Optional[List[int]] = None,
478
+ panel_labels: Optional[List[str]] = None,
479
+ ld_reference_file: Optional[str] = None,
480
+ ld_reference_files: Optional[List[str]] = None,
481
+ genes_df: Optional[pd.DataFrame] = None,
482
+ exons_df: Optional[pd.DataFrame] = None,
483
+ eqtl_df: Optional[pd.DataFrame] = None,
484
+ eqtl_gene: Optional[str] = None,
485
+ recomb_df: Optional[pd.DataFrame] = None,
486
+ show_recombination: bool = True,
487
+ snp_labels: bool = True,
488
+ label_top_n: int = 3,
489
+ pos_col: str = "ps",
490
+ p_col: str = "p_wald",
491
+ rs_col: str = "rs",
492
+ figsize: Tuple[float, Optional[float]] = (12, None),
493
+ ) -> Any:
494
+ """Create stacked regional association plots for multiple GWAS.
495
+
496
+ Vertically stacks multiple GWAS results for comparison, with shared
497
+ x-axis and optional gene track at the bottom.
498
+
499
+ Args:
500
+ gwas_dfs: List of GWAS results DataFrames to stack.
501
+ chrom: Chromosome number.
502
+ start: Start position of the region.
503
+ end: End position of the region.
504
+ lead_positions: List of lead SNP positions (one per GWAS).
505
+ If None, auto-detects from lowest p-value.
506
+ panel_labels: Labels for each panel (e.g., phenotype names).
507
+ ld_reference_file: Single PLINK fileset for all panels.
508
+ ld_reference_files: List of PLINK filesets (one per panel).
509
+ genes_df: Gene annotations for bottom track.
510
+ exons_df: Exon annotations for gene track.
511
+ eqtl_df: eQTL data to display as additional panel.
512
+ eqtl_gene: Filter eQTL data to this target gene.
513
+ recomb_df: Pre-loaded recombination rate data.
514
+ show_recombination: Whether to show recombination overlay.
515
+ snp_labels: Whether to label top SNPs.
516
+ label_top_n: Number of top SNPs to label per panel.
517
+ pos_col: Column name for position.
518
+ p_col: Column name for p-value.
519
+ rs_col: Column name for SNP ID.
520
+ figsize: Figure size (width, height). If height is None, auto-calculates.
521
+
522
+ Returns:
523
+ Figure object (type depends on backend).
524
+
525
+ Example:
526
+ >>> fig = plotter.plot_stacked(
527
+ ... [gwas_height, gwas_bmi, gwas_whr],
528
+ ... chrom=1, start=1000000, end=2000000,
529
+ ... panel_labels=["Height", "BMI", "WHR"],
530
+ ... genes_df=genes_df,
531
+ ... )
532
+ """
533
+ n_gwas = len(gwas_dfs)
534
+ if n_gwas == 0:
535
+ raise ValueError("At least one GWAS DataFrame required")
536
+
537
+ # Validate inputs
538
+ for i, df in enumerate(gwas_dfs):
539
+ validate_gwas_df(df, pos_col=pos_col, p_col=p_col)
540
+ if genes_df is not None:
541
+ validate_genes_df(genes_df)
542
+
543
+ # Handle lead positions
544
+ if lead_positions is None:
545
+ lead_positions = []
546
+ for df in gwas_dfs:
547
+ region_df = df[(df[pos_col] >= start) & (df[pos_col] <= end)]
548
+ if not region_df.empty:
549
+ lead_idx = region_df[p_col].idxmin()
550
+ lead_positions.append(int(region_df.loc[lead_idx, pos_col]))
551
+ else:
552
+ lead_positions.append(None)
553
+
554
+ # Handle LD reference files
555
+ if ld_reference_files is None and ld_reference_file is not None:
556
+ ld_reference_files = [ld_reference_file] * n_gwas
557
+
558
+ # Calculate panel layout
559
+ panel_height = 2.5 # inches per GWAS panel
560
+ eqtl_height = 2.0 if eqtl_df is not None else 0
561
+
562
+ # Gene track height
563
+ if genes_df is not None:
564
+ chrom_str = normalize_chrom(chrom)
565
+ region_genes = genes_df[
566
+ (genes_df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
567
+ & (genes_df["end"] >= start)
568
+ & (genes_df["start"] <= end)
569
+ ]
570
+ if not region_genes.empty:
571
+ temp_positions = assign_gene_positions(
572
+ region_genes.sort_values("start"), start, end
573
+ )
574
+ n_gene_rows = max(temp_positions) + 1 if temp_positions else 1
575
+ else:
576
+ n_gene_rows = 1
577
+ gene_track_height = 1.0 + (n_gene_rows - 1) * 0.5
578
+ else:
579
+ gene_track_height = 0
580
+
581
+ # Calculate total panels and heights
582
+ n_panels = n_gwas + (1 if eqtl_df is not None else 0) + (1 if genes_df is not None else 0)
583
+ height_ratios = [panel_height] * n_gwas
584
+ if eqtl_df is not None:
585
+ height_ratios.append(eqtl_height)
586
+ if genes_df is not None:
587
+ height_ratios.append(gene_track_height)
588
+
589
+ # Calculate figure height
590
+ total_height = figsize[1] if figsize[1] else sum(height_ratios)
591
+ actual_figsize = (figsize[0], total_height)
592
+
593
+ logger.debug(f"Creating stacked plot with {n_panels} panels for chr{chrom}:{start}-{end}")
594
+
595
+ # Prevent auto-display in interactive environments
596
+ plt.ioff()
597
+
598
+ # Load recombination data if needed
599
+ if show_recombination and recomb_df is None:
600
+ recomb_df = self._get_recomb_for_region(chrom, start, end)
601
+
602
+ # Create figure
603
+ fig, axes = plt.subplots(
604
+ n_panels,
605
+ 1,
606
+ figsize=actual_figsize,
607
+ height_ratios=height_ratios,
608
+ sharex=True,
609
+ gridspec_kw={"hspace": 0.05},
610
+ )
611
+ if n_panels == 1:
612
+ axes = [axes]
613
+
614
+ # Plot each GWAS panel
615
+ for i, (gwas_df, lead_pos) in enumerate(zip(gwas_dfs, lead_positions)):
616
+ ax = axes[i]
617
+ df = gwas_df.copy()
618
+ df["neglog10p"] = -np.log10(df[p_col].clip(lower=1e-300))
619
+
620
+ # Calculate LD if reference provided
621
+ ld_col = None
622
+ if ld_reference_files and ld_reference_files[i] and lead_pos:
623
+ lead_snp_row = df[df[pos_col] == lead_pos]
624
+ if not lead_snp_row.empty and rs_col in df.columns:
625
+ lead_snp_id = lead_snp_row[rs_col].iloc[0]
626
+ ld_df = calculate_ld(
627
+ bfile_path=ld_reference_files[i],
628
+ lead_snp=lead_snp_id,
629
+ window_kb=max((end - start) // 1000, 500),
630
+ plink_path=self.plink_path,
631
+ species=self.species,
632
+ )
633
+ if not ld_df.empty:
634
+ df = df.merge(ld_df, left_on=rs_col, right_on="SNP", how="left")
635
+ ld_col = "R2"
636
+
637
+ # Plot association
638
+ self._plot_association(ax, df, pos_col, ld_col, lead_pos)
639
+
640
+ # Add significance line
641
+ ax.axhline(y=self._genomewide_line, color="grey", linestyle="--", linewidth=1, zorder=1)
642
+
643
+ # Add SNP labels
644
+ if snp_labels and rs_col in df.columns and label_top_n > 0 and not df.empty:
645
+ add_snp_labels(
646
+ ax, df, pos_col=pos_col, neglog10p_col="neglog10p",
647
+ rs_col=rs_col, label_top_n=label_top_n, genes_df=genes_df, chrom=chrom,
648
+ )
649
+
650
+ # Add recombination overlay (only on first panel)
651
+ if i == 0 and recomb_df is not None and not recomb_df.empty:
652
+ add_recombination_overlay(ax, recomb_df, start, end)
653
+
654
+ # Format axes
655
+ ax.set_ylabel(r"$-\log_{10}$ P")
656
+ ax.set_xlim(start, end)
657
+ ax.spines["top"].set_visible(False)
658
+ ax.spines["right"].set_visible(False)
659
+
660
+ # Add panel label
661
+ if panel_labels and i < len(panel_labels):
662
+ ax.annotate(
663
+ panel_labels[i],
664
+ xy=(0.02, 0.95),
665
+ xycoords="axes fraction",
666
+ fontsize=11,
667
+ fontweight="bold",
668
+ va="top",
669
+ ha="left",
670
+ )
671
+
672
+ # Add LD legend (only on first panel)
673
+ if i == 0 and ld_col is not None and ld_col in df.columns:
674
+ self._add_ld_legend(ax)
675
+
676
+ # Plot eQTL panel if provided
677
+ panel_idx = n_gwas
678
+ if eqtl_df is not None:
679
+ ax = axes[panel_idx]
680
+ eqtl_data = eqtl_df.copy()
681
+
682
+ # Filter by gene if specified
683
+ if eqtl_gene and "gene" in eqtl_data.columns:
684
+ eqtl_data = eqtl_data[eqtl_data["gene"] == eqtl_gene]
685
+
686
+ # Filter by region
687
+ if "pos" in eqtl_data.columns:
688
+ eqtl_data = eqtl_data[(eqtl_data["pos"] >= start) & (eqtl_data["pos"] <= end)]
689
+
690
+ if not eqtl_data.empty:
691
+ eqtl_data["neglog10p"] = -np.log10(eqtl_data["p_value"].clip(lower=1e-300))
692
+
693
+ # Plot as diamonds (different from GWAS circles)
694
+ ax.scatter(
695
+ eqtl_data["pos"],
696
+ eqtl_data["neglog10p"],
697
+ c="#FF6B6B",
698
+ s=60,
699
+ marker="D",
700
+ edgecolor="black",
701
+ linewidth=0.5,
702
+ zorder=2,
703
+ label=f"eQTL ({eqtl_gene})" if eqtl_gene else "eQTL",
704
+ )
705
+ ax.legend(loc="upper left", fontsize=9)
706
+
707
+ ax.set_ylabel(r"$-\log_{10}$ P (eQTL)")
708
+ ax.axhline(y=self._genomewide_line, color="grey", linestyle="--", linewidth=1)
709
+ ax.spines["top"].set_visible(False)
710
+ ax.spines["right"].set_visible(False)
711
+ panel_idx += 1
712
+
713
+ # Plot gene track
714
+ if genes_df is not None:
715
+ gene_ax = axes[panel_idx]
716
+ plot_gene_track(gene_ax, genes_df, chrom, start, end, exons_df)
717
+ gene_ax.set_xlabel(f"Chromosome {chrom} (Mb)")
718
+ gene_ax.spines["top"].set_visible(False)
719
+ gene_ax.spines["right"].set_visible(False)
720
+ gene_ax.spines["left"].set_visible(False)
721
+ else:
722
+ # Set x-label on bottom panel
723
+ axes[-1].set_xlabel(f"Chromosome {chrom} (Mb)")
724
+
725
+ # Format x-axis
726
+ axes[0].xaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{x / 1e6:.2f}"))
727
+ axes[0].xaxis.set_major_locator(MaxNLocator(nbins=6))
728
+
729
+ # Adjust layout
730
+ fig.subplots_adjust(left=0.08, right=0.95, top=0.95, bottom=0.08, hspace=0.05)
731
+ plt.ion()
732
+
733
+ return fig