pylocuszoom 0.8.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pylocuszoom/__init__.py CHANGED
@@ -51,6 +51,8 @@ from .colors import (
51
51
  get_phewas_category_palette,
52
52
  )
53
53
 
54
+ # Configuration classes (internal - use kwargs directly on plot()/plot_stacked())
55
+ # from .config import PlotConfig, StackedPlotConfig # Internal use only
54
56
  # Ensembl integration
55
57
  from .ensembl import (
56
58
  clear_ensembl_cache,
@@ -62,7 +64,6 @@ from .ensembl import (
62
64
 
63
65
  # eQTL support
64
66
  from .eqtl import (
65
- EQTLValidationError,
66
67
  calculate_colocalization_overlap,
67
68
  filter_eqtl_by_gene,
68
69
  filter_eqtl_by_region,
@@ -71,9 +72,19 @@ from .eqtl import (
71
72
  validate_eqtl_df,
72
73
  )
73
74
 
75
+ # Exception hierarchy
76
+ from .exceptions import (
77
+ BackendError,
78
+ DataDownloadError,
79
+ EQTLValidationError,
80
+ FinemappingValidationError,
81
+ LoaderValidationError,
82
+ PyLocusZoomError,
83
+ ValidationError,
84
+ )
85
+
74
86
  # Fine-mapping/SuSiE support
75
87
  from .finemapping import (
76
- FinemappingValidationError,
77
88
  filter_by_credible_set,
78
89
  filter_finemapping_by_region,
79
90
  get_credible_sets,
@@ -134,11 +145,8 @@ from .recombination import (
134
145
  load_recombination_map,
135
146
  )
136
147
 
137
- # Schema validation
138
- from .schemas import LoaderValidationError
139
-
140
148
  # Validation utilities
141
- from .utils import ValidationError, to_pandas
149
+ from .utils import to_pandas
142
150
 
143
151
  __all__ = [
144
152
  # Core
@@ -187,8 +195,12 @@ __all__ = [
187
195
  # Logging
188
196
  "enable_logging",
189
197
  "disable_logging",
190
- # Validation & Utils
198
+ # Exceptions
199
+ "PyLocusZoomError",
191
200
  "ValidationError",
201
+ "BackendError",
202
+ "DataDownloadError",
203
+ # Utils
192
204
  "to_pandas",
193
205
  # PheWAS
194
206
  "validate_phewas_df",
pylocuszoom/config.py ADDED
@@ -0,0 +1,365 @@
1
+ """Pydantic configuration classes for pyLocusZoom plot methods.
2
+
3
+ This module provides typed, validated configuration objects that replace
4
+ the parameter explosion in plot methods. Each config is immutable (frozen)
5
+ to prevent accidental modification.
6
+
7
+ Example:
8
+ >>> from pylocuszoom.config import RegionConfig, DisplayConfig, PlotConfig
9
+ >>> region = RegionConfig(chrom=1, start=1000000, end=2000000)
10
+ >>> display = DisplayConfig(snp_labels=False, label_top_n=3)
11
+ >>>
12
+ >>> # Using composite PlotConfig with factory method
13
+ >>> config = PlotConfig.from_kwargs(chrom=1, start=1000000, end=2000000)
14
+ """
15
+
16
+ from typing import List, Optional, Tuple
17
+
18
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
19
+
20
+
21
+ class RegionConfig(BaseModel):
22
+ """Genomic region specification.
23
+
24
+ Attributes:
25
+ chrom: Chromosome number (must be >= 1).
26
+ start: Start position in base pairs (must be >= 0).
27
+ end: End position in base pairs (must be > start).
28
+ """
29
+
30
+ model_config = ConfigDict(frozen=True)
31
+
32
+ chrom: int = Field(..., ge=1, description="Chromosome number")
33
+ start: int = Field(..., ge=0, description="Start position (bp)")
34
+ end: int = Field(..., gt=0, description="End position (bp)")
35
+
36
+ @model_validator(mode="after")
37
+ def validate_region(self) -> "RegionConfig":
38
+ """Validate that start < end."""
39
+ if self.start >= self.end:
40
+ raise ValueError(f"start ({self.start}) must be < end ({self.end})")
41
+ return self
42
+
43
+
44
+ class ColumnConfig(BaseModel):
45
+ """DataFrame column name mappings for GWAS data.
46
+
47
+ Attributes:
48
+ pos_col: Column name for genomic position.
49
+ p_col: Column name for p-value.
50
+ rs_col: Column name for SNP identifier.
51
+ """
52
+
53
+ model_config = ConfigDict(frozen=True)
54
+
55
+ pos_col: str = Field(default="ps", description="Position column name")
56
+ p_col: str = Field(default="p_wald", description="P-value column name")
57
+ rs_col: str = Field(default="rs", description="SNP ID column name")
58
+
59
+
60
+ class DisplayConfig(BaseModel):
61
+ """Display and visual options for plots.
62
+
63
+ Attributes:
64
+ snp_labels: Whether to show SNP labels on plot.
65
+ label_top_n: Number of top SNPs to label.
66
+ show_recombination: Whether to show recombination rate overlay.
67
+ figsize: Figure size as (width, height) in inches.
68
+ """
69
+
70
+ model_config = ConfigDict(frozen=True)
71
+
72
+ snp_labels: bool = Field(default=True, description="Show SNP labels")
73
+ label_top_n: int = Field(default=5, ge=0, description="Number of top SNPs to label")
74
+ show_recombination: bool = Field(
75
+ default=True, description="Show recombination overlay"
76
+ )
77
+ figsize: Tuple[float, float] = Field(
78
+ default=(12.0, 8.0), description="Figure size (width, height)"
79
+ )
80
+
81
+
82
+ class LDConfig(BaseModel):
83
+ """Linkage disequilibrium configuration.
84
+
85
+ Supports three modes:
86
+ 1. No LD coloring: All fields None (default)
87
+ 2. Pre-computed LD: Provide ld_col for column with R^2 values
88
+ 3. Calculate LD: Provide lead_pos and ld_reference_file
89
+
90
+ Attributes:
91
+ lead_pos: Position of lead/index SNP to highlight.
92
+ ld_reference_file: Path to PLINK binary fileset for LD calculation.
93
+ ld_col: Column name for pre-computed LD (R^2) values.
94
+ """
95
+
96
+ model_config = ConfigDict(frozen=True)
97
+
98
+ lead_pos: Optional[int] = Field(default=None, ge=1, description="Lead SNP position")
99
+ ld_reference_file: Optional[str] = Field(
100
+ default=None, description="PLINK binary fileset path"
101
+ )
102
+ ld_col: Optional[str] = Field(
103
+ default=None, description="Pre-computed LD column name"
104
+ )
105
+
106
+ @model_validator(mode="after")
107
+ def validate_ld_config(self) -> "LDConfig":
108
+ """Validate LD configuration consistency.
109
+
110
+ When ld_reference_file is provided, lead_pos is required to identify
111
+ the index SNP for LD calculation.
112
+
113
+ Note: For StackedPlotConfig, ld_reference_file may be provided without
114
+ lead_pos when lead_positions list is used (broadcast mode). This is
115
+ validated at the StackedPlotConfig level, not here.
116
+ """
117
+ # Validation moved to StackedPlotConfig.validate_broadcast_ld
118
+ # to allow broadcast mode where lead_positions list is used instead
119
+ return self
120
+
121
+
122
+ class PlotConfig(BaseModel):
123
+ """Composite configuration for plot() method.
124
+
125
+ Composes all sub-configs into a single validated configuration object.
126
+ Use either direct construction with nested configs, or the from_kwargs()
127
+ factory method for backward compatibility with existing code.
128
+
129
+ Attributes:
130
+ region: Genomic region specification (required).
131
+ columns: DataFrame column name mappings.
132
+ display: Display and visual options.
133
+ ld: Linkage disequilibrium configuration.
134
+
135
+ Example:
136
+ >>> # Direct construction
137
+ >>> config = PlotConfig(
138
+ ... region=RegionConfig(chrom=1, start=1000000, end=2000000),
139
+ ... display=DisplayConfig(snp_labels=False),
140
+ ... )
141
+ >>>
142
+ >>> # Factory method (backward compatible with plot() signature)
143
+ >>> config = PlotConfig.from_kwargs(
144
+ ... chrom=1, start=1000000, end=2000000,
145
+ ... snp_labels=False, lead_pos=1500000,
146
+ ... )
147
+ """
148
+
149
+ model_config = ConfigDict(frozen=True)
150
+
151
+ region: RegionConfig
152
+ columns: ColumnConfig = Field(default_factory=ColumnConfig)
153
+ display: DisplayConfig = Field(default_factory=DisplayConfig)
154
+ ld: LDConfig = Field(default_factory=LDConfig)
155
+
156
+ @model_validator(mode="after")
157
+ def validate_ld_requires_lead_pos(self) -> "PlotConfig":
158
+ """Validate that LD reference file has lead_pos for single plots."""
159
+ if self.ld.ld_reference_file is not None and self.ld.lead_pos is None:
160
+ raise ValueError("lead_pos is required when ld_reference_file is provided")
161
+ return self
162
+
163
+ @classmethod
164
+ def from_kwargs(
165
+ cls,
166
+ *,
167
+ # Region params (required)
168
+ chrom: int,
169
+ start: int,
170
+ end: int,
171
+ # Column params
172
+ pos_col: str = "ps",
173
+ p_col: str = "p_wald",
174
+ rs_col: str = "rs",
175
+ # Display params
176
+ snp_labels: bool = True,
177
+ label_top_n: int = 5,
178
+ show_recombination: bool = True,
179
+ figsize: Tuple[float, float] = (12.0, 8.0),
180
+ # LD params
181
+ lead_pos: Optional[int] = None,
182
+ ld_reference_file: Optional[str] = None,
183
+ ld_col: Optional[str] = None,
184
+ ) -> "PlotConfig":
185
+ """Create PlotConfig from flat keyword arguments.
186
+
187
+ Factory method that accepts parameters matching the plot() method
188
+ signature, enabling backward compatibility with existing code.
189
+
190
+ Args:
191
+ chrom: Chromosome number.
192
+ start: Start position (bp).
193
+ end: End position (bp).
194
+ pos_col: Column name for position.
195
+ p_col: Column name for p-value.
196
+ rs_col: Column name for SNP ID.
197
+ snp_labels: Whether to show SNP labels.
198
+ label_top_n: Number of top SNPs to label.
199
+ show_recombination: Whether to show recombination overlay.
200
+ figsize: Figure size (width, height).
201
+ lead_pos: Position of lead SNP.
202
+ ld_reference_file: PLINK binary fileset path.
203
+ ld_col: Pre-computed LD column name.
204
+
205
+ Returns:
206
+ PlotConfig with nested config objects.
207
+
208
+ Raises:
209
+ ValidationError: If parameters are invalid.
210
+ """
211
+ return cls(
212
+ region=RegionConfig(chrom=chrom, start=start, end=end),
213
+ columns=ColumnConfig(pos_col=pos_col, p_col=p_col, rs_col=rs_col),
214
+ display=DisplayConfig(
215
+ snp_labels=snp_labels,
216
+ label_top_n=label_top_n,
217
+ show_recombination=show_recombination,
218
+ figsize=figsize,
219
+ ),
220
+ ld=LDConfig(
221
+ lead_pos=lead_pos,
222
+ ld_reference_file=ld_reference_file,
223
+ ld_col=ld_col,
224
+ ),
225
+ )
226
+
227
+
228
+ class StackedPlotConfig(BaseModel):
229
+ """Composite configuration for plot_stacked() method.
230
+
231
+ Extends PlotConfig pattern with list-based parameters for stacked plots.
232
+ Supports multiple lead positions, panel labels, and LD reference files.
233
+
234
+ Attributes:
235
+ region: Genomic region specification (required).
236
+ columns: DataFrame column name mappings.
237
+ display: Display and visual options.
238
+ ld: Linkage disequilibrium configuration (single file for broadcast).
239
+ lead_positions: List of lead SNP positions (one per panel).
240
+ panel_labels: List of panel labels (one per panel).
241
+ ld_reference_files: List of PLINK filesets (one per panel).
242
+
243
+ Example:
244
+ >>> config = StackedPlotConfig.from_kwargs(
245
+ ... chrom=1, start=1000000, end=2000000,
246
+ ... lead_positions=[1500000, 1600000],
247
+ ... panel_labels=["Study A", "Study B"],
248
+ ... )
249
+ """
250
+
251
+ model_config = ConfigDict(frozen=True)
252
+
253
+ region: RegionConfig
254
+ columns: ColumnConfig = Field(default_factory=ColumnConfig)
255
+ display: DisplayConfig = Field(default_factory=DisplayConfig)
256
+ ld: LDConfig = Field(default_factory=LDConfig)
257
+
258
+ # Stacked-specific list parameters
259
+ lead_positions: Optional[List[int]] = Field(
260
+ default=None, description="Lead SNP positions (one per panel)"
261
+ )
262
+ panel_labels: Optional[List[str]] = Field(
263
+ default=None, description="Panel labels (one per panel)"
264
+ )
265
+ ld_reference_files: Optional[List[str]] = Field(
266
+ default=None, description="PLINK filesets (one per panel)"
267
+ )
268
+
269
+ @model_validator(mode="after")
270
+ def validate_broadcast_ld(self) -> "StackedPlotConfig":
271
+ """Validate broadcast LD configuration for stacked plots.
272
+
273
+ When ld_reference_file is provided for broadcast, lead_positions must
274
+ be provided to specify the reference SNP for each panel.
275
+ """
276
+ if self.ld.ld_reference_file is not None and self.ld.lead_pos is None:
277
+ # Broadcast mode: ld_reference_file without lead_pos in LDConfig
278
+ # Requires lead_positions list instead
279
+ if self.lead_positions is None:
280
+ raise ValueError(
281
+ "lead_positions is required when ld_reference_file is provided "
282
+ "for broadcast (one lead position per panel)"
283
+ )
284
+ return self
285
+
286
+ @classmethod
287
+ def from_kwargs(
288
+ cls,
289
+ *,
290
+ # Region params (required)
291
+ chrom: int,
292
+ start: int,
293
+ end: int,
294
+ # Column params
295
+ pos_col: str = "ps",
296
+ p_col: str = "p_wald",
297
+ rs_col: str = "rs",
298
+ # Display params
299
+ snp_labels: bool = True,
300
+ label_top_n: int = 3, # Default for stacked is 3 (less crowded)
301
+ show_recombination: bool = True,
302
+ figsize: Tuple[float, float] = (12.0, 8.0),
303
+ # LD params (single for broadcast)
304
+ ld_reference_file: Optional[str] = None,
305
+ ld_col: Optional[str] = None,
306
+ # Stacked-specific list params
307
+ lead_positions: Optional[List[int]] = None,
308
+ panel_labels: Optional[List[str]] = None,
309
+ ld_reference_files: Optional[List[str]] = None,
310
+ ) -> "StackedPlotConfig":
311
+ """Create StackedPlotConfig from flat keyword arguments.
312
+
313
+ Factory method that accepts parameters matching the plot_stacked()
314
+ method signature, enabling backward compatibility.
315
+
316
+ Args:
317
+ chrom: Chromosome number.
318
+ start: Start position (bp).
319
+ end: End position (bp).
320
+ pos_col: Column name for position.
321
+ p_col: Column name for p-value.
322
+ rs_col: Column name for SNP ID.
323
+ snp_labels: Whether to show SNP labels.
324
+ label_top_n: Number of top SNPs to label (default 3 for stacked).
325
+ show_recombination: Whether to show recombination overlay.
326
+ figsize: Figure size (width, height).
327
+ ld_reference_file: Single PLINK fileset (broadcast to all panels).
328
+ ld_col: Pre-computed LD column name.
329
+ lead_positions: List of lead SNP positions.
330
+ panel_labels: List of panel labels.
331
+ ld_reference_files: List of PLINK filesets.
332
+
333
+ Returns:
334
+ StackedPlotConfig with nested config objects.
335
+
336
+ Raises:
337
+ ValidationError: If parameters are invalid.
338
+ """
339
+ return cls(
340
+ region=RegionConfig(chrom=chrom, start=start, end=end),
341
+ columns=ColumnConfig(pos_col=pos_col, p_col=p_col, rs_col=rs_col),
342
+ display=DisplayConfig(
343
+ snp_labels=snp_labels,
344
+ label_top_n=label_top_n,
345
+ show_recombination=show_recombination,
346
+ figsize=figsize,
347
+ ),
348
+ ld=LDConfig(
349
+ ld_reference_file=ld_reference_file,
350
+ ld_col=ld_col,
351
+ ),
352
+ lead_positions=lead_positions,
353
+ panel_labels=panel_labels,
354
+ ld_reference_files=ld_reference_files,
355
+ )
356
+
357
+
358
+ __all__ = [
359
+ "RegionConfig",
360
+ "ColumnConfig",
361
+ "DisplayConfig",
362
+ "LDConfig",
363
+ "PlotConfig",
364
+ "StackedPlotConfig",
365
+ ]
pylocuszoom/eqtl.py CHANGED
@@ -9,20 +9,15 @@ from typing import List, Optional
9
9
  import numpy as np
10
10
  import pandas as pd
11
11
 
12
+ from .exceptions import EQTLValidationError, ValidationError
12
13
  from .logging import logger
13
- from .utils import ValidationError, filter_by_region
14
+ from .utils import filter_by_region
14
15
  from .validation import DataFrameValidator
15
16
 
16
17
  REQUIRED_EQTL_COLS = ["pos", "p_value"]
17
18
  OPTIONAL_EQTL_COLS = ["gene", "effect_size", "rs", "se"]
18
19
 
19
20
 
20
- class EQTLValidationError(ValueError):
21
- """Raised when eQTL DataFrame validation fails."""
22
-
23
- pass
24
-
25
-
26
21
  def validate_eqtl_df(
27
22
  df: pd.DataFrame,
28
23
  pos_col: str = "pos",
@@ -42,6 +37,7 @@ def validate_eqtl_df(
42
37
  (
43
38
  DataFrameValidator(df, "eQTL DataFrame")
44
39
  .require_columns([pos_col, p_col])
40
+ .require_numeric([p_col])
45
41
  .validate()
46
42
  )
47
43
  except ValidationError as e:
@@ -0,0 +1,33 @@
1
+ """Exception hierarchy for pyLocusZoom.
2
+
3
+ All pyLocusZoom exceptions inherit from PyLocusZoomError, enabling users to
4
+ catch all library errors with `except PyLocusZoomError`.
5
+ """
6
+
7
+
8
+ class PyLocusZoomError(Exception):
9
+ """Base exception for all pyLocusZoom errors."""
10
+
11
+
12
+ class ValidationError(PyLocusZoomError, ValueError):
13
+ """Raised when input validation fails. Inherits ValueError for backward compat."""
14
+
15
+
16
+ class EQTLValidationError(ValidationError):
17
+ """Raised when eQTL DataFrame validation fails."""
18
+
19
+
20
+ class FinemappingValidationError(ValidationError):
21
+ """Raised when fine-mapping DataFrame validation fails."""
22
+
23
+
24
+ class LoaderValidationError(ValidationError):
25
+ """Raised when loaded data fails validation."""
26
+
27
+
28
+ class BackendError(PyLocusZoomError):
29
+ """Raised when backend operations fail."""
30
+
31
+
32
+ class DataDownloadError(PyLocusZoomError, RuntimeError):
33
+ """Raised when data download operations fail."""
@@ -8,8 +8,9 @@ from typing import List, Optional
8
8
 
9
9
  import pandas as pd
10
10
 
11
+ from .exceptions import FinemappingValidationError, ValidationError
11
12
  from .logging import logger
12
- from .utils import ValidationError, filter_by_region
13
+ from .utils import filter_by_region
13
14
  from .validation import DataFrameValidator
14
15
 
15
16
  # Required columns for fine-mapping data
@@ -17,12 +18,6 @@ REQUIRED_FINEMAPPING_COLS = ["pos", "pip"]
17
18
  OPTIONAL_FINEMAPPING_COLS = ["rs", "cs", "cs_id", "effect", "se"]
18
19
 
19
20
 
20
- class FinemappingValidationError(ValueError):
21
- """Raised when fine-mapping DataFrame validation fails."""
22
-
23
- pass
24
-
25
-
26
21
  def validate_finemapping_df(
27
22
  df: pd.DataFrame,
28
23
  pos_col: str = "pos",
pylocuszoom/forest.py CHANGED
@@ -31,5 +31,6 @@ def validate_forest_df(
31
31
  DataFrameValidator(df, "Forest plot DataFrame")
32
32
  .require_columns([study_col, effect_col, ci_lower_col, ci_upper_col])
33
33
  .require_numeric([effect_col, ci_lower_col, ci_upper_col])
34
+ .require_ci_ordering(ci_lower_col, effect_col, ci_upper_col)
34
35
  .validate()
35
36
  )
pylocuszoom/gene_track.py CHANGED
@@ -48,22 +48,23 @@ def assign_gene_positions(genes_df: pd.DataFrame, start: int, end: int) -> List[
48
48
  List of integer row indices (0, 1, 2, ...) for each gene.
49
49
  """
50
50
  positions = []
51
- occupied = [] # List of (end_pos, row)
51
+ # Track the rightmost end position for each row (including label buffer)
52
+ row_ends: dict[int, int] = {} # row -> rightmost end position
52
53
  region_width = end - start
54
+ label_buffer = region_width * 0.08 # Extra space for labels
53
55
 
54
56
  for _, gene in genes_df.iterrows():
55
57
  gene_start = max(gene["start"], start)
56
58
  gene_end = min(gene["end"], end)
57
59
 
58
- # Find first available row with buffer for label spacing
60
+ # Find first available row where gene doesn't overlap
59
61
  row = 0
60
- label_buffer = region_width * 0.08 # Extra space for labels
61
- for occ_end, occ_row in occupied:
62
- if occ_row == row and occ_end > gene_start - label_buffer:
63
- row = occ_row + 1
62
+ while row in row_ends and row_ends[row] > gene_start - label_buffer:
63
+ row += 1
64
64
 
65
65
  positions.append(row)
66
- occupied.append((gene_end, row))
66
+ # Update the row's end position (including buffer for next gene check)
67
+ row_ends[row] = gene_end
67
68
 
68
69
  return positions
69
70
 
pylocuszoom/plotter.py CHANGED
@@ -15,6 +15,7 @@ from typing import Any, List, Optional, Tuple
15
15
  import matplotlib.pyplot as plt
16
16
  import numpy as np
17
17
  import pandas as pd
18
+ import requests
18
19
 
19
20
  from .backends import BackendType, get_backend
20
21
  from .backends.hover import HoverConfig, HoverDataBuilder
@@ -30,6 +31,7 @@ from .colors import (
30
31
  get_ld_color_palette,
31
32
  get_phewas_category_palette,
32
33
  )
34
+ from .config import PlotConfig, StackedPlotConfig
33
35
  from .ensembl import get_genes_for_region
34
36
  from .eqtl import validate_eqtl_df
35
37
  from .finemapping import (
@@ -171,9 +173,17 @@ class LocusZoomPlotter:
171
173
  # Download
172
174
  try:
173
175
  return download_canine_recombination_maps()
174
- except Exception as e:
176
+ except (requests.RequestException, OSError, IOError) as e:
177
+ # Expected network/file errors - graceful fallback
175
178
  logger.warning(f"Could not download recombination maps: {e}")
176
179
  return None
180
+ except Exception as e:
181
+ # JUSTIFICATION: Download failure should not prevent plotting.
182
+ # We catch broadly here because graceful degradation is acceptable
183
+ # for optional recombination map downloads. Error-level logging
184
+ # ensures the issue is visible.
185
+ logger.error(f"Unexpected error downloading recombination maps: {e}")
186
+ return None
177
187
  elif self.recomb_data_dir:
178
188
  return Path(self.recomb_data_dir)
179
189
  return None
@@ -207,53 +217,76 @@ class LocusZoomPlotter:
207
217
  def plot(
208
218
  self,
209
219
  gwas_df: pd.DataFrame,
220
+ *,
210
221
  chrom: int,
211
222
  start: int,
212
223
  end: int,
224
+ pos_col: str = "ps",
225
+ p_col: str = "p_wald",
226
+ rs_col: str = "rs",
227
+ snp_labels: bool = True,
228
+ label_top_n: int = 5,
229
+ show_recombination: bool = True,
230
+ figsize: Tuple[float, float] = (12.0, 8.0),
213
231
  lead_pos: Optional[int] = None,
214
232
  ld_reference_file: Optional[str] = None,
215
233
  ld_col: Optional[str] = None,
216
234
  genes_df: Optional[pd.DataFrame] = None,
217
235
  exons_df: Optional[pd.DataFrame] = None,
218
236
  recomb_df: Optional[pd.DataFrame] = None,
219
- show_recombination: bool = True,
220
- snp_labels: bool = True,
221
- label_top_n: int = 5,
222
- pos_col: str = "ps",
223
- p_col: str = "p_wald",
224
- rs_col: str = "rs",
225
- figsize: Tuple[int, int] = (12, 8),
226
237
  ) -> Any:
227
238
  """Create a regional association plot.
228
239
 
229
240
  Args:
230
241
  gwas_df: GWAS results DataFrame.
231
242
  chrom: Chromosome number.
232
- start: Start position of the region.
233
- end: End position of the region.
234
- lead_pos: Position of the lead/index SNP to highlight.
235
- ld_reference_file: PLINK binary fileset for LD calculation.
236
- If provided with lead_pos, calculates LD on the fly.
237
- ld_col: Column name for pre-computed LD (R²) values.
238
- Use this if LD was calculated externally.
243
+ start: Start position in base pairs.
244
+ end: End position in base pairs.
245
+ pos_col: Column name for genomic position.
246
+ p_col: Column name for p-value.
247
+ rs_col: Column name for SNP identifier.
248
+ snp_labels: Whether to show SNP labels on plot.
249
+ label_top_n: Number of top SNPs to label.
250
+ show_recombination: Whether to show recombination rate overlay.
251
+ figsize: Figure size as (width, height) in inches.
252
+ lead_pos: Position of lead/index SNP to highlight.
253
+ ld_reference_file: Path to PLINK binary fileset for LD calculation.
254
+ ld_col: Column name for pre-computed LD (R^2) values.
239
255
  genes_df: Gene annotations with chr, start, end, gene_name.
240
256
  exons_df: Exon annotations with chr, start, end, gene_name.
241
257
  recomb_df: Pre-loaded recombination rate data.
242
258
  If None and show_recombination=True, loads from species default.
243
- show_recombination: Whether to show recombination rate overlay.
244
- snp_labels: Whether to label top SNPs.
245
- label_top_n: Number of top SNPs to label.
246
- pos_col: Column name for position.
247
- p_col: Column name for p-value.
248
- rs_col: Column name for SNP ID.
249
- figsize: Figure size.
250
259
 
251
260
  Returns:
252
- Matplotlib Figure object.
261
+ Figure object (type depends on backend).
253
262
 
254
263
  Raises:
255
- ValidationError: If required DataFrame columns are missing.
264
+ ValidationError: If parameters or DataFrame columns are invalid.
265
+
266
+ Example:
267
+ >>> fig = plotter.plot(
268
+ ... gwas_df,
269
+ ... chrom=1, start=1000000, end=2000000,
270
+ ... lead_pos=1500000, snp_labels=True,
271
+ ... )
256
272
  """
273
+ # Validate parameters via Pydantic
274
+ PlotConfig.from_kwargs(
275
+ chrom=chrom,
276
+ start=start,
277
+ end=end,
278
+ pos_col=pos_col,
279
+ p_col=p_col,
280
+ rs_col=rs_col,
281
+ snp_labels=snp_labels,
282
+ label_top_n=label_top_n,
283
+ show_recombination=show_recombination,
284
+ figsize=figsize,
285
+ lead_pos=lead_pos,
286
+ ld_reference_file=ld_reference_file,
287
+ ld_col=ld_col,
288
+ )
289
+
257
290
  # Validate inputs
258
291
  validate_gwas_df(gwas_df, pos_col=pos_col, p_col=p_col)
259
292
 
@@ -282,6 +315,23 @@ class LocusZoomPlotter:
282
315
 
283
316
  # Prepare data
284
317
  df = gwas_df.copy()
318
+
319
+ # Validate p-values and warn about issues
320
+ p_values = df[p_col]
321
+ nan_count = p_values.isna().sum()
322
+ if nan_count > 0:
323
+ logger.warning(
324
+ f"GWAS data contains {nan_count} NaN p-values which will be excluded"
325
+ )
326
+ invalid_count = ((p_values < 0) | (p_values > 1)).sum()
327
+ if invalid_count > 0:
328
+ logger.warning(
329
+ f"GWAS data contains {invalid_count} p-values outside [0, 1] range"
330
+ )
331
+ clipped_count = (p_values < 1e-300).sum()
332
+ if clipped_count > 0:
333
+ logger.debug(f"Clipping {clipped_count} p-values below 1e-300 to 1e-300")
334
+
285
335
  df["neglog10p"] = -np.log10(df[p_col].clip(lower=1e-300))
286
336
 
287
337
  # Calculate LD if reference file provided
@@ -364,10 +414,12 @@ class LocusZoomPlotter:
364
414
  )
365
415
  self._backend.set_xlabel(gene_ax, f"Chromosome {chrom} (Mb)")
366
416
  self._backend.hide_spines(gene_ax, ["top", "right", "left"])
417
+ # Format both axes for interactive backends (they don't share x-axis)
418
+ self._backend.format_xaxis_mb(gene_ax)
367
419
  else:
368
420
  self._backend.set_xlabel(ax, f"Chromosome {chrom} (Mb)")
369
421
 
370
- # Format x-axis with Mb labels
422
+ # Format x-axis with Mb labels (association axis always needs formatting)
371
423
  self._backend.format_xaxis_mb(ax)
372
424
 
373
425
  # Adjust layout
@@ -516,18 +568,29 @@ class LocusZoomPlotter:
516
568
  return
517
569
 
518
570
  # Create secondary y-axis
519
- yaxis_name = self._backend.create_twin_axis(ax)
520
-
521
- # For plotly, yaxis_name is a tuple (fig, row, secondary_y)
522
- # For bokeh, yaxis_name is just a string
523
- if isinstance(yaxis_name, tuple):
524
- _, _, secondary_y = yaxis_name
571
+ twin_result = self._backend.create_twin_axis(ax)
572
+
573
+ # Matplotlib returns the twin Axes object itself - use it for drawing
574
+ # Plotly returns tuple (fig, row, secondary_y_name)
575
+ # Bokeh returns string "secondary"
576
+ from matplotlib.axes import Axes
577
+
578
+ if isinstance(twin_result, Axes):
579
+ # Matplotlib: use the twin axis for all secondary axis operations
580
+ secondary_ax = twin_result
581
+ secondary_y = None # Not used for matplotlib
582
+ elif isinstance(twin_result, tuple):
583
+ # Plotly: use original ax, specify y-axis via yaxis_name
584
+ secondary_ax = ax
585
+ _, _, secondary_y = twin_result
525
586
  else:
526
- secondary_y = yaxis_name
587
+ # Bokeh: use original ax, specify y-axis via yaxis_name
588
+ secondary_ax = ax
589
+ secondary_y = twin_result
527
590
 
528
591
  # Plot fill under curve
529
592
  self._backend.fill_between_secondary(
530
- ax,
593
+ secondary_ax,
531
594
  region_recomb["pos"],
532
595
  0,
533
596
  region_recomb["rate"],
@@ -538,7 +601,7 @@ class LocusZoomPlotter:
538
601
 
539
602
  # Plot recombination rate line
540
603
  self._backend.line_secondary(
541
- ax,
604
+ secondary_ax,
542
605
  region_recomb["pos"],
543
606
  region_recomb["rate"],
544
607
  color=RECOMB_COLOR,
@@ -550,10 +613,10 @@ class LocusZoomPlotter:
550
613
  # Set y-axis limits and label
551
614
  max_rate = region_recomb["rate"].max()
552
615
  self._backend.set_secondary_ylim(
553
- ax, 0, max(max_rate * 1.2, 20), yaxis_name=secondary_y
616
+ secondary_ax, 0, max(max_rate * 1.2, 20), yaxis_name=secondary_y
554
617
  )
555
618
  self._backend.set_secondary_ylabel(
556
- ax,
619
+ secondary_ax,
557
620
  "Recombination rate (cM/Mb)",
558
621
  color=RECOMB_COLOR,
559
622
  fontsize=9,
@@ -664,14 +727,22 @@ class LocusZoomPlotter:
664
727
  def plot_stacked(
665
728
  self,
666
729
  gwas_dfs: List[pd.DataFrame],
730
+ *,
667
731
  chrom: int,
668
732
  start: int,
669
733
  end: int,
734
+ pos_col: str = "ps",
735
+ p_col: str = "p_wald",
736
+ rs_col: str = "rs",
737
+ snp_labels: bool = True,
738
+ label_top_n: int = 3,
739
+ show_recombination: bool = True,
740
+ figsize: Tuple[float, float] = (12.0, 8.0),
741
+ ld_reference_file: Optional[str] = None,
742
+ ld_col: Optional[str] = None,
670
743
  lead_positions: Optional[List[int]] = None,
671
744
  panel_labels: Optional[List[str]] = None,
672
- ld_reference_file: Optional[str] = None,
673
745
  ld_reference_files: Optional[List[str]] = None,
674
- ld_col: Optional[str] = None,
675
746
  genes_df: Optional[pd.DataFrame] = None,
676
747
  exons_df: Optional[pd.DataFrame] = None,
677
748
  eqtl_df: Optional[pd.DataFrame] = None,
@@ -679,13 +750,6 @@ class LocusZoomPlotter:
679
750
  finemapping_df: Optional[pd.DataFrame] = None,
680
751
  finemapping_cs_col: Optional[str] = "cs",
681
752
  recomb_df: Optional[pd.DataFrame] = None,
682
- show_recombination: bool = True,
683
- snp_labels: bool = True,
684
- label_top_n: int = 3,
685
- pos_col: str = "ps",
686
- p_col: str = "p_wald",
687
- rs_col: str = "rs",
688
- figsize: Tuple[float, Optional[float]] = (12, None),
689
753
  ) -> Any:
690
754
  """Create stacked regional association plots for multiple GWAS.
691
755
 
@@ -695,30 +759,28 @@ class LocusZoomPlotter:
695
759
  Args:
696
760
  gwas_dfs: List of GWAS results DataFrames to stack.
697
761
  chrom: Chromosome number.
698
- start: Start position of the region.
699
- end: End position of the region.
700
- lead_positions: List of lead SNP positions (one per GWAS).
701
- If None, auto-detects from lowest p-value.
702
- panel_labels: Labels for each panel (e.g., phenotype names).
703
- ld_reference_file: Single PLINK fileset for all panels.
762
+ start: Start position in base pairs.
763
+ end: End position in base pairs.
764
+ pos_col: Column name for genomic position.
765
+ p_col: Column name for p-value.
766
+ rs_col: Column name for SNP identifier.
767
+ snp_labels: Whether to show SNP labels on plot.
768
+ label_top_n: Number of top SNPs to label (default 3 for stacked).
769
+ show_recombination: Whether to show recombination rate overlay.
770
+ figsize: Figure size as (width, height) in inches.
771
+ ld_reference_file: Single PLINK fileset (broadcast to all panels).
772
+ ld_col: Column name for pre-computed LD (R^2) values.
773
+ lead_positions: List of lead SNP positions (one per panel).
774
+ panel_labels: List of panel labels (one per panel).
704
775
  ld_reference_files: List of PLINK filesets (one per panel).
705
- ld_col: Column name for pre-computed LD (R²) values in each DataFrame.
706
- Use this if LD was calculated externally.
707
776
  genes_df: Gene annotations for bottom track.
708
777
  exons_df: Exon annotations for gene track.
709
778
  eqtl_df: eQTL data to display as additional panel.
710
779
  eqtl_gene: Filter eQTL data to this target gene.
711
780
  finemapping_df: Fine-mapping/SuSiE results with pos and pip columns.
712
781
  Displayed as PIP line with optional credible set coloring.
713
- finemapping_cs_col: Column name for credible set assignment in finemapping_df.
782
+ finemapping_cs_col: Column name for credible set assignment.
714
783
  recomb_df: Pre-loaded recombination rate data.
715
- show_recombination: Whether to show recombination overlay.
716
- snp_labels: Whether to label top SNPs.
717
- label_top_n: Number of top SNPs to label per panel.
718
- pos_col: Column name for position.
719
- p_col: Column name for p-value.
720
- rs_col: Column name for SNP ID.
721
- figsize: Figure size (width, height). If height is None, auto-calculates.
722
784
 
723
785
  Returns:
724
786
  Figure object (type depends on backend).
@@ -728,9 +790,27 @@ class LocusZoomPlotter:
728
790
  ... [gwas_height, gwas_bmi, gwas_whr],
729
791
  ... chrom=1, start=1000000, end=2000000,
730
792
  ... panel_labels=["Height", "BMI", "WHR"],
731
- ... genes_df=genes_df,
732
793
  ... )
733
794
  """
795
+ # Validate parameters via Pydantic
796
+ StackedPlotConfig.from_kwargs(
797
+ chrom=chrom,
798
+ start=start,
799
+ end=end,
800
+ pos_col=pos_col,
801
+ p_col=p_col,
802
+ rs_col=rs_col,
803
+ snp_labels=snp_labels,
804
+ label_top_n=label_top_n,
805
+ show_recombination=show_recombination,
806
+ figsize=figsize,
807
+ ld_reference_file=ld_reference_file,
808
+ ld_col=ld_col,
809
+ lead_positions=lead_positions,
810
+ panel_labels=panel_labels,
811
+ ld_reference_files=ld_reference_files,
812
+ )
813
+
734
814
  n_gwas = len(gwas_dfs)
735
815
  if n_gwas == 0:
736
816
  raise ValueError("At least one GWAS DataFrame required")
@@ -766,8 +846,16 @@ class LocusZoomPlotter:
766
846
  for df in gwas_dfs:
767
847
  region_df = df[(df[pos_col] >= start) & (df[pos_col] <= end)]
768
848
  if not region_df.empty:
769
- lead_idx = region_df[p_col].idxmin()
770
- lead_positions.append(int(region_df.loc[lead_idx, pos_col]))
849
+ # Filter out NaN p-values for lead SNP detection
850
+ valid_p = region_df[p_col].dropna()
851
+ if valid_p.empty:
852
+ logger.warning(
853
+ "All p-values in region are NaN, cannot determine lead SNP"
854
+ )
855
+ lead_positions.append(None)
856
+ else:
857
+ lead_idx = valid_p.idxmin()
858
+ lead_positions.append(int(region_df.loc[lead_idx, pos_col]))
771
859
  else:
772
860
  lead_positions.append(None)
773
861
 
@@ -990,40 +1078,41 @@ class LocusZoomPlotter:
990
1078
  has_effect = "effect_size" in eqtl_data.columns
991
1079
 
992
1080
  if has_effect:
993
- # Plot triangles by effect direction (batch by sign for efficiency)
1081
+ # Vectorized plotting: split by sign, assign colors in bulk
994
1082
  pos_effects = eqtl_data[eqtl_data["effect_size"] >= 0]
995
1083
  neg_effects = eqtl_data[eqtl_data["effect_size"] < 0]
996
1084
 
997
- # Plot positive effects (up triangles)
998
- for _, row in pos_effects.iterrows():
999
- row_df = pd.DataFrame([row])
1085
+ # Vectorized color assignment using apply
1086
+ if not pos_effects.empty:
1087
+ pos_colors = pos_effects["effect_size"].apply(get_eqtl_color)
1000
1088
  self._backend.scatter(
1001
1089
  ax,
1002
- pd.Series([row["pos"]]),
1003
- pd.Series([row["neglog10p"]]),
1004
- colors=get_eqtl_color(row["effect_size"]),
1090
+ pos_effects["pos"],
1091
+ pos_effects["neglog10p"],
1092
+ colors=pos_colors.tolist(),
1005
1093
  sizes=50,
1006
1094
  marker="^",
1007
1095
  edgecolor="black",
1008
1096
  linewidth=0.5,
1009
1097
  zorder=2,
1010
- hover_data=eqtl_hover_builder.build_dataframe(row_df),
1098
+ hover_data=eqtl_hover_builder.build_dataframe(pos_effects),
1011
1099
  )
1012
- # Plot negative effects (down triangles)
1013
- for _, row in neg_effects.iterrows():
1014
- row_df = pd.DataFrame([row])
1100
+
1101
+ if not neg_effects.empty:
1102
+ neg_colors = neg_effects["effect_size"].apply(get_eqtl_color)
1015
1103
  self._backend.scatter(
1016
1104
  ax,
1017
- pd.Series([row["pos"]]),
1018
- pd.Series([row["neglog10p"]]),
1019
- colors=get_eqtl_color(row["effect_size"]),
1105
+ neg_effects["pos"],
1106
+ neg_effects["neglog10p"],
1107
+ colors=neg_colors.tolist(),
1020
1108
  sizes=50,
1021
1109
  marker="v",
1022
1110
  edgecolor="black",
1023
1111
  linewidth=0.5,
1024
1112
  zorder=2,
1025
- hover_data=eqtl_hover_builder.build_dataframe(row_df),
1113
+ hover_data=eqtl_hover_builder.build_dataframe(neg_effects),
1026
1114
  )
1115
+
1027
1116
  # Add eQTL effect legend (all backends)
1028
1117
  self._backend.add_eqtl_legend(
1029
1118
  ax, EQTL_POSITIVE_BINS, EQTL_NEGATIVE_BINS
@@ -1144,18 +1233,37 @@ class LocusZoomPlotter:
1144
1233
  # Plot points by category
1145
1234
  if categories:
1146
1235
  for cat in categories:
1147
- cat_data = df[df[category_col] == cat]
1236
+ # Handle NaN category: NaN == NaN is False in pandas
1237
+ if pd.isna(cat):
1238
+ cat_data = df[df[category_col].isna()]
1239
+ else:
1240
+ cat_data = df[df[category_col] == cat]
1148
1241
  # Use upward triangles for positive effects, circles otherwise
1149
1242
  if effect_col and effect_col in cat_data.columns:
1150
- for _, row in cat_data.iterrows():
1151
- marker = "^" if row[effect_col] >= 0 else "v"
1243
+ # Vectorized: split by effect sign, 2 scatter calls per category
1244
+ pos_data = cat_data[cat_data[effect_col] >= 0]
1245
+ neg_data = cat_data[cat_data[effect_col] < 0]
1246
+
1247
+ if not pos_data.empty:
1152
1248
  self._backend.scatter(
1153
1249
  ax,
1154
- pd.Series([row["neglog10p"]]),
1155
- pd.Series([row["y_pos"]]),
1250
+ pos_data["neglog10p"],
1251
+ pos_data["y_pos"],
1156
1252
  colors=palette[cat],
1157
1253
  sizes=60,
1158
- marker=marker,
1254
+ marker="^",
1255
+ edgecolor="black",
1256
+ linewidth=0.5,
1257
+ zorder=2,
1258
+ )
1259
+ if not neg_data.empty:
1260
+ self._backend.scatter(
1261
+ ax,
1262
+ neg_data["neglog10p"],
1263
+ neg_data["y_pos"],
1264
+ colors=palette[cat],
1265
+ sizes=60,
1266
+ marker="v",
1159
1267
  edgecolor="black",
1160
1268
  linewidth=0.5,
1161
1269
  zorder=2,
pylocuszoom/schemas.py CHANGED
@@ -10,12 +10,7 @@ from typing import Optional, Union
10
10
  import pandas as pd
11
11
  from pydantic import BaseModel, ConfigDict, field_validator, model_validator
12
12
 
13
-
14
- class LoaderValidationError(Exception):
15
- """Raised when loaded data fails validation."""
16
-
17
- pass
18
-
13
+ from .exceptions import LoaderValidationError
19
14
 
20
15
  # =============================================================================
21
16
  # GWAS Validation
pylocuszoom/utils.py CHANGED
@@ -8,6 +8,8 @@ from typing import TYPE_CHECKING, Any, List, Optional, Union
8
8
 
9
9
  import pandas as pd
10
10
 
11
+ from .exceptions import ValidationError
12
+
11
13
  if TYPE_CHECKING:
12
14
  from pyspark.sql import DataFrame as SparkDataFrame
13
15
 
@@ -15,10 +17,6 @@ if TYPE_CHECKING:
15
17
  DataFrameLike = Union[pd.DataFrame, "SparkDataFrame", Any]
16
18
 
17
19
 
18
- class ValidationError(ValueError):
19
- """Raised when input validation fails."""
20
-
21
-
22
20
  def is_spark_dataframe(df: Any) -> bool:
23
21
  """Check if object is a PySpark DataFrame.
24
22
 
pylocuszoom/validation.py CHANGED
@@ -159,6 +159,57 @@ class DataFrameValidator:
159
159
 
160
160
  return self
161
161
 
162
+ def require_ci_ordering(
163
+ self,
164
+ ci_lower_col: str,
165
+ effect_col: str,
166
+ ci_upper_col: str,
167
+ ) -> "DataFrameValidator":
168
+ """Check that confidence intervals are properly ordered.
169
+
170
+ Validates that ci_lower <= effect <= ci_upper for all rows.
171
+ Invalid ordering would produce negative error bar lengths.
172
+
173
+ Args:
174
+ ci_lower_col: Column name for lower CI bound.
175
+ effect_col: Column name for effect size (point estimate).
176
+ ci_upper_col: Column name for upper CI bound.
177
+
178
+ Returns:
179
+ Self for method chaining.
180
+ """
181
+ # Skip if any column is missing
182
+ for col in [ci_lower_col, effect_col, ci_upper_col]:
183
+ if col not in self._df.columns:
184
+ return self
185
+
186
+ lower = self._df[ci_lower_col]
187
+ effect = self._df[effect_col]
188
+ upper = self._df[ci_upper_col]
189
+
190
+ # Check ci_lower <= effect
191
+ lower_gt_effect = (lower > effect).sum()
192
+ if lower_gt_effect > 0:
193
+ self._errors.append(
194
+ f"{lower_gt_effect} rows have {ci_lower_col} > {effect_col}"
195
+ )
196
+
197
+ # Check effect <= ci_upper
198
+ effect_gt_upper = (effect > upper).sum()
199
+ if effect_gt_upper > 0:
200
+ self._errors.append(
201
+ f"{effect_gt_upper} rows have {effect_col} > {ci_upper_col}"
202
+ )
203
+
204
+ # Check ci_lower <= ci_upper (implicit from above, but explicit is clearer)
205
+ lower_gt_upper = (lower > upper).sum()
206
+ if lower_gt_upper > 0:
207
+ self._errors.append(
208
+ f"{lower_gt_upper} rows have {ci_lower_col} > {ci_upper_col}"
209
+ )
210
+
211
+ return self
212
+
162
213
  def validate(self) -> None:
163
214
  """Raise ValidationError if any validation rules failed.
164
215
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pylocuszoom
3
- Version: 0.8.0
3
+ Version: 1.0.0
4
4
  Summary: Publication-ready regional association plots with LD coloring, gene tracks, and recombination overlays
5
5
  Project-URL: Homepage, https://github.com/michael-denyer/pylocuszoom
6
6
  Project-URL: Documentation, https://github.com/michael-denyer/pylocuszoom#readme
@@ -109,15 +109,14 @@ from pylocuszoom import LocusZoomPlotter
109
109
  # Initialize plotter (loads reference data for canine)
110
110
  plotter = LocusZoomPlotter(species="canine")
111
111
 
112
- # Create regional plot
112
+ # Plot with parameters passed directly
113
113
  fig = plotter.plot(
114
- gwas_df, # DataFrame with ps, p_wald, rs columns
114
+ gwas_df, # DataFrame with ps, p_wald, rs columns
115
115
  chrom=1,
116
116
  start=1000000,
117
117
  end=2000000,
118
- lead_pos=1500000, # Highlight lead SNP
118
+ lead_pos=1500000, # Highlight lead SNP
119
119
  )
120
-
121
120
  fig.savefig("regional_plot.png", dpi=150)
122
121
  ```
123
122
 
@@ -137,9 +136,7 @@ fig = plotter.plot(
137
136
  start=1000000,
138
137
  end=2000000,
139
138
  lead_pos=1500000,
140
- ld_reference_file="genotypes.bed", # For LD calculation
141
- genes_df=genes_df, # Gene annotations
142
- exons_df=exons_df, # Exon annotations
139
+ ld_reference_file="genotypes", # PLINK fileset (without extension)
143
140
  show_recombination=True, # Overlay recombination rate
144
141
  snp_labels=True, # Label top SNPs
145
142
  label_top_n=5, # How many to label
@@ -147,6 +144,8 @@ fig = plotter.plot(
147
144
  p_col="p_wald", # Column name for p-value
148
145
  rs_col="rs", # Column name for SNP ID
149
146
  figsize=(12, 8),
147
+ genes_df=genes_df, # Gene annotations
148
+ exons_df=exons_df, # Exon annotations
150
149
  )
151
150
  ```
152
151
 
@@ -163,6 +162,8 @@ Recombination maps are automatically lifted over from CanFam3.1 to CanFam4 coord
163
162
  ## Using with Other Species
164
163
 
165
164
  ```python
165
+ from pylocuszoom import LocusZoomPlotter
166
+
166
167
  # Feline (LD and gene tracks, user provides recombination data)
167
168
  plotter = LocusZoomPlotter(species="feline")
168
169
 
@@ -172,10 +173,12 @@ plotter = LocusZoomPlotter(
172
173
  recomb_data_dir="/path/to/recomb_maps/",
173
174
  )
174
175
 
175
- # Or provide data per-plot
176
+ # Provide data per-plot
176
177
  fig = plotter.plot(
177
178
  gwas_df,
178
- chrom=1, start=1000000, end=2000000,
179
+ chrom=1,
180
+ start=1000000,
181
+ end=2000000,
179
182
  recomb_df=my_recomb_dataframe,
180
183
  genes_df=my_genes_df,
181
184
  )
@@ -186,6 +189,8 @@ fig = plotter.plot(
186
189
  pyLocusZoom can automatically fetch gene annotations from Ensembl for any species:
187
190
 
188
191
  ```python
192
+ from pylocuszoom import LocusZoomPlotter
193
+
189
194
  # Enable automatic gene fetching
190
195
  plotter = LocusZoomPlotter(species="human", auto_genes=True)
191
196
 
@@ -201,6 +206,8 @@ Data is cached locally for fast subsequent plots. Maximum region size is 5Mb (En
201
206
  pyLocusZoom supports multiple rendering backends (set at initialization):
202
207
 
203
208
  ```python
209
+ from pylocuszoom import LocusZoomPlotter
210
+
204
211
  # Static publication-quality plot (default)
205
212
  plotter = LocusZoomPlotter(species="canine", backend="matplotlib")
206
213
  fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000)
@@ -229,6 +236,10 @@ fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000)
229
236
  Compare multiple GWAS results vertically with shared x-axis:
230
237
 
231
238
  ```python
239
+ from pylocuszoom import LocusZoomPlotter
240
+
241
+ plotter = LocusZoomPlotter(species="canine")
242
+
232
243
  fig = plotter.plot_stacked(
233
244
  [gwas_height, gwas_bmi, gwas_whr],
234
245
  chrom=1,
@@ -247,15 +258,21 @@ fig = plotter.plot_stacked(
247
258
  Add expression QTL data as a separate panel:
248
259
 
249
260
  ```python
261
+ from pylocuszoom import LocusZoomPlotter
262
+
250
263
  eqtl_df = pd.DataFrame({
251
264
  "pos": [1000500, 1001200, 1002000],
252
265
  "p_value": [1e-6, 1e-4, 0.01],
253
266
  "gene": ["BRCA1", "BRCA1", "BRCA1"],
254
267
  })
255
268
 
269
+ plotter = LocusZoomPlotter(species="canine")
270
+
256
271
  fig = plotter.plot_stacked(
257
272
  [gwas_df],
258
- chrom=1, start=1000000, end=2000000,
273
+ chrom=1,
274
+ start=1000000,
275
+ end=2000000,
259
276
  eqtl_df=eqtl_df,
260
277
  eqtl_gene="BRCA1",
261
278
  genes_df=genes_df,
@@ -270,15 +287,21 @@ fig = plotter.plot_stacked(
270
287
  Visualize SuSiE or other fine-mapping results with credible set coloring:
271
288
 
272
289
  ```python
290
+ from pylocuszoom import LocusZoomPlotter
291
+
273
292
  finemapping_df = pd.DataFrame({
274
293
  "pos": [1000500, 1001200, 1002000, 1003500],
275
294
  "pip": [0.85, 0.12, 0.02, 0.45], # Posterior inclusion probability
276
295
  "cs": [1, 1, 0, 2], # Credible set assignment (0 = not in CS)
277
296
  })
278
297
 
298
+ plotter = LocusZoomPlotter(species="canine")
299
+
279
300
  fig = plotter.plot_stacked(
280
301
  [gwas_df],
281
- chrom=1, start=1000000, end=2000000,
302
+ chrom=1,
303
+ start=1000000,
304
+ end=2000000,
282
305
  finemapping_df=finemapping_df,
283
306
  finemapping_cs_col="cs",
284
307
  genes_df=genes_df,
@@ -414,7 +437,7 @@ gwas_df = pd.DataFrame({
414
437
  |--------|------|----------|-------------|
415
438
  | `chr` | str or int | Yes | Chromosome identifier. Accepts "1", "chr1", or 1. The "chr" prefix is stripped for matching. |
416
439
  | `start` | int | Yes | Gene start position (bp, 1-based). Transcript start for strand-aware genes. |
417
- | `end` | int | Yes | Gene end position (bp, 1-based). Must be start. |
440
+ | `end` | int | Yes | Gene end position (bp, 1-based). Must be >= start. |
418
441
  | `gene_name` | str | Yes | Gene symbol displayed in track (e.g., "BRCA1", "TP53"). Keep short for readability. |
419
442
 
420
443
  Example:
@@ -516,6 +539,7 @@ Optional:
516
539
  ## Documentation
517
540
 
518
541
  - [User Guide](docs/USER_GUIDE.md) - Comprehensive documentation with API reference
542
+ - [Code Map](docs/CODEMAP.md) - Architecture diagram with source code links
519
543
  - [Architecture](docs/ARCHITECTURE.md) - Design decisions and component overview
520
544
  - [Example Notebook](examples/getting_started.ipynb) - Interactive tutorial
521
545
  - [CHANGELOG](CHANGELOG.md) - Version history
@@ -1,21 +1,23 @@
1
- pylocuszoom/__init__.py,sha256=UtrNrjV0b0frxv3Zl4jw5D8aTMbNSE55j-PPkd8rz28,5585
1
+ pylocuszoom/__init__.py,sha256=DNdSi6JbIQeGr6yt4G_z9NcZoY0P9ivLVbaLaOlLbRM,5894
2
2
  pylocuszoom/colors.py,sha256=B28rfhWwGZ-e6Q-F43iXxC6NZpjUo0yWk4S_-vp9ZvU,7686
3
+ pylocuszoom/config.py,sha256=qjIEodI-RY71RVyQ5QmE6WXcPXU4Re_xEWiDlkEww3g,13266
3
4
  pylocuszoom/ensembl.py,sha256=q767o86FdcKn4V9aK48ESFwNI7ATlaX5tnwjZReYMEw,14436
4
- pylocuszoom/eqtl.py,sha256=OrpWbFMR1wKMCmfQiC-2sqYx-99TT2i1cStIrPWIUOs,5948
5
- pylocuszoom/finemapping.py,sha256=ZPcnc9E6N41Su8222wCqBkB3bhhyfASvj9u9Ot4td4o,5898
6
- pylocuszoom/forest.py,sha256=302gULz9I0UiwqgcB18R756OOl1aa54OsPYHc6TnxGY,1092
7
- pylocuszoom/gene_track.py,sha256=PkBwfqByVxhXlAPco9-d4P5X7cTg2rrOnw7BJVx48ow,17818
5
+ pylocuszoom/eqtl.py,sha256=9hGcFARABQRCMN3rco0pVlFJdmlh4SLBBKSgOvdIH_U,5924
6
+ pylocuszoom/exceptions.py,sha256=nd-rWMUodW62WVV4TfcYVPQcb66xV6v9FA-_4xHb5VY,926
7
+ pylocuszoom/finemapping.py,sha256=VYQs4o4dVREXicueT1anzuENiFZk6YXb6HpbwyF0FD0,5828
8
+ pylocuszoom/forest.py,sha256=K-wBinxBOqIzsNMtZJ587e_oMhUXIXEqmEzVTUbmHSY,1161
9
+ pylocuszoom/gene_track.py,sha256=nbQEC3bbqukhCosPFny5ajv6hjkV-EZe7rKbsSoGs8g,17933
8
10
  pylocuszoom/labels.py,sha256=Ams5WVZFNVT692BRiQ5wZcdbdNEAm5xtgRwmF5u0s_A,3492
9
11
  pylocuszoom/ld.py,sha256=64xIulpDVvbMSryWUPoCQ99Odcjwf1wejpwVr_30MLU,6412
10
12
  pylocuszoom/loaders.py,sha256=KpWPBO0BCb2yrGTtgdiOqOuhx2YLmjK_ywmpr3onnx8,25156
11
13
  pylocuszoom/logging.py,sha256=nZHEkbnjp8zoyWj_S-Hy9UQvUYLoMoxyiOWRozBT2dg,4987
12
14
  pylocuszoom/phewas.py,sha256=6g2LmwA5kmxYlHgPxJvuXIMerEqfqgsrth110Y3CgVU,968
13
- pylocuszoom/plotter.py,sha256=gFywhaHPuXlbKPxWaWfw7Wrw8kqPMUPzKMgDcRB6wu8,50709
15
+ pylocuszoom/plotter.py,sha256=7rWsBXbLg-WSjmK474FU5KbzviidXC-cJGFkMMHomAg,54980
14
16
  pylocuszoom/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
17
  pylocuszoom/recombination.py,sha256=97GGBLDLTlQSRMp5sLOna3mCeRxeJiiWPHrw4dBRjQs,14546
16
- pylocuszoom/schemas.py,sha256=vABBBlAR1vUP6BIewZ8E-TYpacccrcxavrdIDVCrQB4,11916
17
- pylocuszoom/utils.py,sha256=_rI6ov0MbsWlZGJ7ni-V4387cirmJCX6IF2JAYhBx6A,6929
18
- pylocuszoom/validation.py,sha256=UInqlhOWhWaCT_mrO7O7SfB1DNIYkjvEMudy4YjtUBg,5698
18
+ pylocuszoom/schemas.py,sha256=XxeivyRm5LGDwJw4GToxzOSdyx1yXvFYk3xgeFJ6VW0,11858
19
+ pylocuszoom/utils.py,sha256=Z2P__Eau3ilF2ftuAZBm11EZ1NqCFQzfr4br9jCiJmg,6887
20
+ pylocuszoom/validation.py,sha256=3D9axjUvNXWW3Mk7dwRG38-di2P0zDpVVGF5WNSfZbk,7403
19
21
  pylocuszoom/backends/__init__.py,sha256=xefVj3jVxmYwVLLY5AZtFqTPMehQxZ2qGd-Pk7_V_Bk,4267
20
22
  pylocuszoom/backends/base.py,sha256=PBdm9t4f_qFDMkYR5z3edW4DvpuQSCAXuaxs2qjAeH0,21034
21
23
  pylocuszoom/backends/bokeh_backend.py,sha256=11zRhXH2guUHiaYXyd7l2IDAv6uawdRAv6dyGPkHmJc,25512
@@ -23,7 +25,7 @@ pylocuszoom/backends/hover.py,sha256=Hjm_jcxJL8dDxO_Ye7jeWAUcHKlbH6oO8ZfGJ2MzIFM
23
25
  pylocuszoom/backends/matplotlib_backend.py,sha256=098ITnvNrBTaEztqez_7D0sZ_rKAYIxS6EDR5Yxt8is,20924
24
26
  pylocuszoom/backends/plotly_backend.py,sha256=A6ZuHw0wVZaIIA6FgYJ4SH-Sz59tHOtnGUl-e-2VzZM,30574
25
27
  pylocuszoom/reference_data/__init__.py,sha256=qqHqAUt1jebGlCN3CjqW3Z-_coHVNo5K3a3bb9o83hA,109
26
- pylocuszoom-0.8.0.dist-info/METADATA,sha256=VqHRvFL1Wq5OJO3B727Rl0H8UfbBPaxVIJUOSA22s5A,17866
27
- pylocuszoom-0.8.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
28
- pylocuszoom-0.8.0.dist-info/licenses/LICENSE.md,sha256=U2y_hv8RcN5lECA3uK88irU3ODUE1TDAPictcmnP0Q4,698
29
- pylocuszoom-0.8.0.dist-info/RECORD,,
28
+ pylocuszoom-1.0.0.dist-info/METADATA,sha256=OMU09xbn6MMuvw8rPy19aMUiN40rp9Vl69QvqU7nwc4,18390
29
+ pylocuszoom-1.0.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
30
+ pylocuszoom-1.0.0.dist-info/licenses/LICENSE.md,sha256=U2y_hv8RcN5lECA3uK88irU3ODUE1TDAPictcmnP0Q4,698
31
+ pylocuszoom-1.0.0.dist-info/RECORD,,