pylocuszoom 0.8.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +27 -7
- pylocuszoom/_plotter_utils.py +66 -0
- pylocuszoom/backends/base.py +56 -0
- pylocuszoom/backends/bokeh_backend.py +141 -29
- pylocuszoom/backends/matplotlib_backend.py +60 -0
- pylocuszoom/backends/plotly_backend.py +297 -88
- pylocuszoom/config.py +365 -0
- pylocuszoom/ensembl.py +6 -11
- pylocuszoom/eqtl.py +3 -7
- pylocuszoom/exceptions.py +33 -0
- pylocuszoom/finemapping.py +2 -7
- pylocuszoom/forest.py +1 -0
- pylocuszoom/gene_track.py +10 -31
- pylocuszoom/labels.py +6 -2
- pylocuszoom/manhattan.py +246 -0
- pylocuszoom/manhattan_plotter.py +760 -0
- pylocuszoom/plotter.py +401 -327
- pylocuszoom/qq.py +123 -0
- pylocuszoom/recombination.py +7 -7
- pylocuszoom/schemas.py +1 -6
- pylocuszoom/stats_plotter.py +319 -0
- pylocuszoom/utils.py +2 -4
- pylocuszoom/validation.py +51 -0
- {pylocuszoom-0.8.0.dist-info → pylocuszoom-1.1.0.dist-info}/METADATA +159 -25
- pylocuszoom-1.1.0.dist-info/RECORD +36 -0
- pylocuszoom-0.8.0.dist-info/RECORD +0 -29
- {pylocuszoom-0.8.0.dist-info → pylocuszoom-1.1.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-0.8.0.dist-info → pylocuszoom-1.1.0.dist-info}/licenses/LICENSE.md +0 -0
pylocuszoom/config.py
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
"""Pydantic configuration classes for pyLocusZoom plot methods.
|
|
2
|
+
|
|
3
|
+
This module provides typed, validated configuration objects that replace
|
|
4
|
+
the parameter explosion in plot methods. Each config is immutable (frozen)
|
|
5
|
+
to prevent accidental modification.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
>>> from pylocuszoom.config import RegionConfig, DisplayConfig, PlotConfig
|
|
9
|
+
>>> region = RegionConfig(chrom=1, start=1000000, end=2000000)
|
|
10
|
+
>>> display = DisplayConfig(snp_labels=False, label_top_n=3)
|
|
11
|
+
>>>
|
|
12
|
+
>>> # Using composite PlotConfig with factory method
|
|
13
|
+
>>> config = PlotConfig.from_kwargs(chrom=1, start=1000000, end=2000000)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from typing import List, Optional, Tuple
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RegionConfig(BaseModel):
|
|
22
|
+
"""Genomic region specification.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
chrom: Chromosome number (must be >= 1).
|
|
26
|
+
start: Start position in base pairs (must be >= 0).
|
|
27
|
+
end: End position in base pairs (must be > start).
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
model_config = ConfigDict(frozen=True)
|
|
31
|
+
|
|
32
|
+
chrom: int = Field(..., ge=1, description="Chromosome number")
|
|
33
|
+
start: int = Field(..., ge=0, description="Start position (bp)")
|
|
34
|
+
end: int = Field(..., gt=0, description="End position (bp)")
|
|
35
|
+
|
|
36
|
+
@model_validator(mode="after")
|
|
37
|
+
def validate_region(self) -> "RegionConfig":
|
|
38
|
+
"""Validate that start < end."""
|
|
39
|
+
if self.start >= self.end:
|
|
40
|
+
raise ValueError(f"start ({self.start}) must be < end ({self.end})")
|
|
41
|
+
return self
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ColumnConfig(BaseModel):
|
|
45
|
+
"""DataFrame column name mappings for GWAS data.
|
|
46
|
+
|
|
47
|
+
Attributes:
|
|
48
|
+
pos_col: Column name for genomic position.
|
|
49
|
+
p_col: Column name for p-value.
|
|
50
|
+
rs_col: Column name for SNP identifier.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
model_config = ConfigDict(frozen=True)
|
|
54
|
+
|
|
55
|
+
pos_col: str = Field(default="ps", description="Position column name")
|
|
56
|
+
p_col: str = Field(default="p_wald", description="P-value column name")
|
|
57
|
+
rs_col: str = Field(default="rs", description="SNP ID column name")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class DisplayConfig(BaseModel):
|
|
61
|
+
"""Display and visual options for plots.
|
|
62
|
+
|
|
63
|
+
Attributes:
|
|
64
|
+
snp_labels: Whether to show SNP labels on plot.
|
|
65
|
+
label_top_n: Number of top SNPs to label.
|
|
66
|
+
show_recombination: Whether to show recombination rate overlay.
|
|
67
|
+
figsize: Figure size as (width, height) in inches.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
model_config = ConfigDict(frozen=True)
|
|
71
|
+
|
|
72
|
+
snp_labels: bool = Field(default=True, description="Show SNP labels")
|
|
73
|
+
label_top_n: int = Field(default=5, ge=0, description="Number of top SNPs to label")
|
|
74
|
+
show_recombination: bool = Field(
|
|
75
|
+
default=True, description="Show recombination overlay"
|
|
76
|
+
)
|
|
77
|
+
figsize: Tuple[float, float] = Field(
|
|
78
|
+
default=(12.0, 8.0), description="Figure size (width, height)"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class LDConfig(BaseModel):
|
|
83
|
+
"""Linkage disequilibrium configuration.
|
|
84
|
+
|
|
85
|
+
Supports three modes:
|
|
86
|
+
1. No LD coloring: All fields None (default)
|
|
87
|
+
2. Pre-computed LD: Provide ld_col for column with R^2 values
|
|
88
|
+
3. Calculate LD: Provide lead_pos and ld_reference_file
|
|
89
|
+
|
|
90
|
+
Attributes:
|
|
91
|
+
lead_pos: Position of lead/index SNP to highlight.
|
|
92
|
+
ld_reference_file: Path to PLINK binary fileset for LD calculation.
|
|
93
|
+
ld_col: Column name for pre-computed LD (R^2) values.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
model_config = ConfigDict(frozen=True)
|
|
97
|
+
|
|
98
|
+
lead_pos: Optional[int] = Field(default=None, ge=1, description="Lead SNP position")
|
|
99
|
+
ld_reference_file: Optional[str] = Field(
|
|
100
|
+
default=None, description="PLINK binary fileset path"
|
|
101
|
+
)
|
|
102
|
+
ld_col: Optional[str] = Field(
|
|
103
|
+
default=None, description="Pre-computed LD column name"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
@model_validator(mode="after")
|
|
107
|
+
def validate_ld_config(self) -> "LDConfig":
|
|
108
|
+
"""Validate LD configuration consistency.
|
|
109
|
+
|
|
110
|
+
When ld_reference_file is provided, lead_pos is required to identify
|
|
111
|
+
the index SNP for LD calculation.
|
|
112
|
+
|
|
113
|
+
Note: For StackedPlotConfig, ld_reference_file may be provided without
|
|
114
|
+
lead_pos when lead_positions list is used (broadcast mode). This is
|
|
115
|
+
validated at the StackedPlotConfig level, not here.
|
|
116
|
+
"""
|
|
117
|
+
# Validation moved to StackedPlotConfig.validate_broadcast_ld
|
|
118
|
+
# to allow broadcast mode where lead_positions list is used instead
|
|
119
|
+
return self
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class PlotConfig(BaseModel):
|
|
123
|
+
"""Composite configuration for plot() method.
|
|
124
|
+
|
|
125
|
+
Composes all sub-configs into a single validated configuration object.
|
|
126
|
+
Use either direct construction with nested configs, or the from_kwargs()
|
|
127
|
+
factory method for backward compatibility with existing code.
|
|
128
|
+
|
|
129
|
+
Attributes:
|
|
130
|
+
region: Genomic region specification (required).
|
|
131
|
+
columns: DataFrame column name mappings.
|
|
132
|
+
display: Display and visual options.
|
|
133
|
+
ld: Linkage disequilibrium configuration.
|
|
134
|
+
|
|
135
|
+
Example:
|
|
136
|
+
>>> # Direct construction
|
|
137
|
+
>>> config = PlotConfig(
|
|
138
|
+
... region=RegionConfig(chrom=1, start=1000000, end=2000000),
|
|
139
|
+
... display=DisplayConfig(snp_labels=False),
|
|
140
|
+
... )
|
|
141
|
+
>>>
|
|
142
|
+
>>> # Factory method (backward compatible with plot() signature)
|
|
143
|
+
>>> config = PlotConfig.from_kwargs(
|
|
144
|
+
... chrom=1, start=1000000, end=2000000,
|
|
145
|
+
... snp_labels=False, lead_pos=1500000,
|
|
146
|
+
... )
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
model_config = ConfigDict(frozen=True)
|
|
150
|
+
|
|
151
|
+
region: RegionConfig
|
|
152
|
+
columns: ColumnConfig = Field(default_factory=ColumnConfig)
|
|
153
|
+
display: DisplayConfig = Field(default_factory=DisplayConfig)
|
|
154
|
+
ld: LDConfig = Field(default_factory=LDConfig)
|
|
155
|
+
|
|
156
|
+
@model_validator(mode="after")
|
|
157
|
+
def validate_ld_requires_lead_pos(self) -> "PlotConfig":
|
|
158
|
+
"""Validate that LD reference file has lead_pos for single plots."""
|
|
159
|
+
if self.ld.ld_reference_file is not None and self.ld.lead_pos is None:
|
|
160
|
+
raise ValueError("lead_pos is required when ld_reference_file is provided")
|
|
161
|
+
return self
|
|
162
|
+
|
|
163
|
+
@classmethod
|
|
164
|
+
def from_kwargs(
|
|
165
|
+
cls,
|
|
166
|
+
*,
|
|
167
|
+
# Region params (required)
|
|
168
|
+
chrom: int,
|
|
169
|
+
start: int,
|
|
170
|
+
end: int,
|
|
171
|
+
# Column params
|
|
172
|
+
pos_col: str = "ps",
|
|
173
|
+
p_col: str = "p_wald",
|
|
174
|
+
rs_col: str = "rs",
|
|
175
|
+
# Display params
|
|
176
|
+
snp_labels: bool = True,
|
|
177
|
+
label_top_n: int = 5,
|
|
178
|
+
show_recombination: bool = True,
|
|
179
|
+
figsize: Tuple[float, float] = (12.0, 8.0),
|
|
180
|
+
# LD params
|
|
181
|
+
lead_pos: Optional[int] = None,
|
|
182
|
+
ld_reference_file: Optional[str] = None,
|
|
183
|
+
ld_col: Optional[str] = None,
|
|
184
|
+
) -> "PlotConfig":
|
|
185
|
+
"""Create PlotConfig from flat keyword arguments.
|
|
186
|
+
|
|
187
|
+
Factory method that accepts parameters matching the plot() method
|
|
188
|
+
signature, enabling backward compatibility with existing code.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
chrom: Chromosome number.
|
|
192
|
+
start: Start position (bp).
|
|
193
|
+
end: End position (bp).
|
|
194
|
+
pos_col: Column name for position.
|
|
195
|
+
p_col: Column name for p-value.
|
|
196
|
+
rs_col: Column name for SNP ID.
|
|
197
|
+
snp_labels: Whether to show SNP labels.
|
|
198
|
+
label_top_n: Number of top SNPs to label.
|
|
199
|
+
show_recombination: Whether to show recombination overlay.
|
|
200
|
+
figsize: Figure size (width, height).
|
|
201
|
+
lead_pos: Position of lead SNP.
|
|
202
|
+
ld_reference_file: PLINK binary fileset path.
|
|
203
|
+
ld_col: Pre-computed LD column name.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
PlotConfig with nested config objects.
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
ValidationError: If parameters are invalid.
|
|
210
|
+
"""
|
|
211
|
+
return cls(
|
|
212
|
+
region=RegionConfig(chrom=chrom, start=start, end=end),
|
|
213
|
+
columns=ColumnConfig(pos_col=pos_col, p_col=p_col, rs_col=rs_col),
|
|
214
|
+
display=DisplayConfig(
|
|
215
|
+
snp_labels=snp_labels,
|
|
216
|
+
label_top_n=label_top_n,
|
|
217
|
+
show_recombination=show_recombination,
|
|
218
|
+
figsize=figsize,
|
|
219
|
+
),
|
|
220
|
+
ld=LDConfig(
|
|
221
|
+
lead_pos=lead_pos,
|
|
222
|
+
ld_reference_file=ld_reference_file,
|
|
223
|
+
ld_col=ld_col,
|
|
224
|
+
),
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class StackedPlotConfig(BaseModel):
|
|
229
|
+
"""Composite configuration for plot_stacked() method.
|
|
230
|
+
|
|
231
|
+
Extends PlotConfig pattern with list-based parameters for stacked plots.
|
|
232
|
+
Supports multiple lead positions, panel labels, and LD reference files.
|
|
233
|
+
|
|
234
|
+
Attributes:
|
|
235
|
+
region: Genomic region specification (required).
|
|
236
|
+
columns: DataFrame column name mappings.
|
|
237
|
+
display: Display and visual options.
|
|
238
|
+
ld: Linkage disequilibrium configuration (single file for broadcast).
|
|
239
|
+
lead_positions: List of lead SNP positions (one per panel).
|
|
240
|
+
panel_labels: List of panel labels (one per panel).
|
|
241
|
+
ld_reference_files: List of PLINK filesets (one per panel).
|
|
242
|
+
|
|
243
|
+
Example:
|
|
244
|
+
>>> config = StackedPlotConfig.from_kwargs(
|
|
245
|
+
... chrom=1, start=1000000, end=2000000,
|
|
246
|
+
... lead_positions=[1500000, 1600000],
|
|
247
|
+
... panel_labels=["Study A", "Study B"],
|
|
248
|
+
... )
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
model_config = ConfigDict(frozen=True)
|
|
252
|
+
|
|
253
|
+
region: RegionConfig
|
|
254
|
+
columns: ColumnConfig = Field(default_factory=ColumnConfig)
|
|
255
|
+
display: DisplayConfig = Field(default_factory=DisplayConfig)
|
|
256
|
+
ld: LDConfig = Field(default_factory=LDConfig)
|
|
257
|
+
|
|
258
|
+
# Stacked-specific list parameters
|
|
259
|
+
lead_positions: Optional[List[int]] = Field(
|
|
260
|
+
default=None, description="Lead SNP positions (one per panel)"
|
|
261
|
+
)
|
|
262
|
+
panel_labels: Optional[List[str]] = Field(
|
|
263
|
+
default=None, description="Panel labels (one per panel)"
|
|
264
|
+
)
|
|
265
|
+
ld_reference_files: Optional[List[str]] = Field(
|
|
266
|
+
default=None, description="PLINK filesets (one per panel)"
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
@model_validator(mode="after")
|
|
270
|
+
def validate_broadcast_ld(self) -> "StackedPlotConfig":
|
|
271
|
+
"""Validate broadcast LD configuration for stacked plots.
|
|
272
|
+
|
|
273
|
+
When ld_reference_file is provided for broadcast, lead_positions must
|
|
274
|
+
be provided to specify the reference SNP for each panel.
|
|
275
|
+
"""
|
|
276
|
+
if self.ld.ld_reference_file is not None and self.ld.lead_pos is None:
|
|
277
|
+
# Broadcast mode: ld_reference_file without lead_pos in LDConfig
|
|
278
|
+
# Requires lead_positions list instead
|
|
279
|
+
if self.lead_positions is None:
|
|
280
|
+
raise ValueError(
|
|
281
|
+
"lead_positions is required when ld_reference_file is provided "
|
|
282
|
+
"for broadcast (one lead position per panel)"
|
|
283
|
+
)
|
|
284
|
+
return self
|
|
285
|
+
|
|
286
|
+
@classmethod
|
|
287
|
+
def from_kwargs(
|
|
288
|
+
cls,
|
|
289
|
+
*,
|
|
290
|
+
# Region params (required)
|
|
291
|
+
chrom: int,
|
|
292
|
+
start: int,
|
|
293
|
+
end: int,
|
|
294
|
+
# Column params
|
|
295
|
+
pos_col: str = "ps",
|
|
296
|
+
p_col: str = "p_wald",
|
|
297
|
+
rs_col: str = "rs",
|
|
298
|
+
# Display params
|
|
299
|
+
snp_labels: bool = True,
|
|
300
|
+
label_top_n: int = 3, # Default for stacked is 3 (less crowded)
|
|
301
|
+
show_recombination: bool = True,
|
|
302
|
+
figsize: Tuple[float, float] = (12.0, 8.0),
|
|
303
|
+
# LD params (single for broadcast)
|
|
304
|
+
ld_reference_file: Optional[str] = None,
|
|
305
|
+
ld_col: Optional[str] = None,
|
|
306
|
+
# Stacked-specific list params
|
|
307
|
+
lead_positions: Optional[List[int]] = None,
|
|
308
|
+
panel_labels: Optional[List[str]] = None,
|
|
309
|
+
ld_reference_files: Optional[List[str]] = None,
|
|
310
|
+
) -> "StackedPlotConfig":
|
|
311
|
+
"""Create StackedPlotConfig from flat keyword arguments.
|
|
312
|
+
|
|
313
|
+
Factory method that accepts parameters matching the plot_stacked()
|
|
314
|
+
method signature, enabling backward compatibility.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
chrom: Chromosome number.
|
|
318
|
+
start: Start position (bp).
|
|
319
|
+
end: End position (bp).
|
|
320
|
+
pos_col: Column name for position.
|
|
321
|
+
p_col: Column name for p-value.
|
|
322
|
+
rs_col: Column name for SNP ID.
|
|
323
|
+
snp_labels: Whether to show SNP labels.
|
|
324
|
+
label_top_n: Number of top SNPs to label (default 3 for stacked).
|
|
325
|
+
show_recombination: Whether to show recombination overlay.
|
|
326
|
+
figsize: Figure size (width, height).
|
|
327
|
+
ld_reference_file: Single PLINK fileset (broadcast to all panels).
|
|
328
|
+
ld_col: Pre-computed LD column name.
|
|
329
|
+
lead_positions: List of lead SNP positions.
|
|
330
|
+
panel_labels: List of panel labels.
|
|
331
|
+
ld_reference_files: List of PLINK filesets.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
StackedPlotConfig with nested config objects.
|
|
335
|
+
|
|
336
|
+
Raises:
|
|
337
|
+
ValidationError: If parameters are invalid.
|
|
338
|
+
"""
|
|
339
|
+
return cls(
|
|
340
|
+
region=RegionConfig(chrom=chrom, start=start, end=end),
|
|
341
|
+
columns=ColumnConfig(pos_col=pos_col, p_col=p_col, rs_col=rs_col),
|
|
342
|
+
display=DisplayConfig(
|
|
343
|
+
snp_labels=snp_labels,
|
|
344
|
+
label_top_n=label_top_n,
|
|
345
|
+
show_recombination=show_recombination,
|
|
346
|
+
figsize=figsize,
|
|
347
|
+
),
|
|
348
|
+
ld=LDConfig(
|
|
349
|
+
ld_reference_file=ld_reference_file,
|
|
350
|
+
ld_col=ld_col,
|
|
351
|
+
),
|
|
352
|
+
lead_positions=lead_positions,
|
|
353
|
+
panel_labels=panel_labels,
|
|
354
|
+
ld_reference_files=ld_reference_files,
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
__all__ = [
|
|
359
|
+
"RegionConfig",
|
|
360
|
+
"ColumnConfig",
|
|
361
|
+
"DisplayConfig",
|
|
362
|
+
"LDConfig",
|
|
363
|
+
"PlotConfig",
|
|
364
|
+
"StackedPlotConfig",
|
|
365
|
+
]
|
pylocuszoom/ensembl.py
CHANGED
|
@@ -18,7 +18,7 @@ import pandas as pd
|
|
|
18
18
|
import requests
|
|
19
19
|
|
|
20
20
|
from .logging import logger
|
|
21
|
-
from .utils import ValidationError
|
|
21
|
+
from .utils import ValidationError, normalize_chrom
|
|
22
22
|
|
|
23
23
|
# Ensembl API limits regions to 5Mb
|
|
24
24
|
ENSEMBL_MAX_REGION_SIZE = 5_000_000
|
|
@@ -47,11 +47,6 @@ ENSEMBL_MAX_RETRIES = 3
|
|
|
47
47
|
ENSEMBL_RETRY_DELAY = 1.0 # seconds, doubles on each retry
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
def _normalize_chrom(chrom: str | int) -> str:
|
|
51
|
-
"""Normalize chromosome name by removing 'chr' prefix."""
|
|
52
|
-
return str(chrom).replace("chr", "")
|
|
53
|
-
|
|
54
|
-
|
|
55
50
|
def _validate_region_size(start: int, end: int, context: str) -> None:
|
|
56
51
|
"""Validate region size is within Ensembl API limits.
|
|
57
52
|
|
|
@@ -129,7 +124,7 @@ def get_cached_genes(
|
|
|
129
124
|
DataFrame if cache hit, None if cache miss.
|
|
130
125
|
"""
|
|
131
126
|
ensembl_species = get_ensembl_species_name(species)
|
|
132
|
-
chrom_str =
|
|
127
|
+
chrom_str = normalize_chrom(chrom)
|
|
133
128
|
cache_key = _cache_key(ensembl_species, chrom_str, start, end)
|
|
134
129
|
|
|
135
130
|
species_dir = cache_dir / ensembl_species
|
|
@@ -161,7 +156,7 @@ def save_cached_genes(
|
|
|
161
156
|
end: Region end position.
|
|
162
157
|
"""
|
|
163
158
|
ensembl_species = get_ensembl_species_name(species)
|
|
164
|
-
chrom_str =
|
|
159
|
+
chrom_str = normalize_chrom(chrom)
|
|
165
160
|
cache_key = _cache_key(ensembl_species, chrom_str, start, end)
|
|
166
161
|
|
|
167
162
|
species_dir = cache_dir / ensembl_species
|
|
@@ -266,7 +261,7 @@ def fetch_genes_from_ensembl(
|
|
|
266
261
|
_validate_region_size(start, end, "genes_df")
|
|
267
262
|
|
|
268
263
|
ensembl_species = get_ensembl_species_name(species)
|
|
269
|
-
chrom_str =
|
|
264
|
+
chrom_str = normalize_chrom(chrom)
|
|
270
265
|
|
|
271
266
|
# Build region string
|
|
272
267
|
region = f"{chrom_str}:{start}-{end}"
|
|
@@ -334,7 +329,7 @@ def fetch_exons_from_ensembl(
|
|
|
334
329
|
_validate_region_size(start, end, "exons_df")
|
|
335
330
|
|
|
336
331
|
ensembl_species = get_ensembl_species_name(species)
|
|
337
|
-
chrom_str =
|
|
332
|
+
chrom_str = normalize_chrom(chrom)
|
|
338
333
|
region = f"{chrom_str}:{start}-{end}"
|
|
339
334
|
|
|
340
335
|
url = f"{ENSEMBL_REST_URL}/overlap/region/{ensembl_species}/{region}"
|
|
@@ -408,7 +403,7 @@ def get_genes_for_region(
|
|
|
408
403
|
if cache_dir is None:
|
|
409
404
|
cache_dir = get_ensembl_cache_dir()
|
|
410
405
|
|
|
411
|
-
chrom_str =
|
|
406
|
+
chrom_str = normalize_chrom(chrom)
|
|
412
407
|
|
|
413
408
|
# Check cache first
|
|
414
409
|
if use_cache:
|
pylocuszoom/eqtl.py
CHANGED
|
@@ -9,20 +9,15 @@ from typing import List, Optional
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import pandas as pd
|
|
11
11
|
|
|
12
|
+
from .exceptions import EQTLValidationError, ValidationError
|
|
12
13
|
from .logging import logger
|
|
13
|
-
from .utils import
|
|
14
|
+
from .utils import filter_by_region
|
|
14
15
|
from .validation import DataFrameValidator
|
|
15
16
|
|
|
16
17
|
REQUIRED_EQTL_COLS = ["pos", "p_value"]
|
|
17
18
|
OPTIONAL_EQTL_COLS = ["gene", "effect_size", "rs", "se"]
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
class EQTLValidationError(ValueError):
|
|
21
|
-
"""Raised when eQTL DataFrame validation fails."""
|
|
22
|
-
|
|
23
|
-
pass
|
|
24
|
-
|
|
25
|
-
|
|
26
21
|
def validate_eqtl_df(
|
|
27
22
|
df: pd.DataFrame,
|
|
28
23
|
pos_col: str = "pos",
|
|
@@ -42,6 +37,7 @@ def validate_eqtl_df(
|
|
|
42
37
|
(
|
|
43
38
|
DataFrameValidator(df, "eQTL DataFrame")
|
|
44
39
|
.require_columns([pos_col, p_col])
|
|
40
|
+
.require_numeric([p_col])
|
|
45
41
|
.validate()
|
|
46
42
|
)
|
|
47
43
|
except ValidationError as e:
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Exception hierarchy for pyLocusZoom.
|
|
2
|
+
|
|
3
|
+
All pyLocusZoom exceptions inherit from PyLocusZoomError, enabling users to
|
|
4
|
+
catch all library errors with `except PyLocusZoomError`.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class PyLocusZoomError(Exception):
|
|
9
|
+
"""Base exception for all pyLocusZoom errors."""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ValidationError(PyLocusZoomError, ValueError):
|
|
13
|
+
"""Raised when input validation fails. Inherits ValueError for backward compat."""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class EQTLValidationError(ValidationError):
|
|
17
|
+
"""Raised when eQTL DataFrame validation fails."""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FinemappingValidationError(ValidationError):
|
|
21
|
+
"""Raised when fine-mapping DataFrame validation fails."""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class LoaderValidationError(ValidationError):
|
|
25
|
+
"""Raised when loaded data fails validation."""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class BackendError(PyLocusZoomError):
|
|
29
|
+
"""Raised when backend operations fail."""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DataDownloadError(PyLocusZoomError, RuntimeError):
|
|
33
|
+
"""Raised when data download operations fail."""
|
pylocuszoom/finemapping.py
CHANGED
|
@@ -8,8 +8,9 @@ from typing import List, Optional
|
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
|
|
11
|
+
from .exceptions import FinemappingValidationError, ValidationError
|
|
11
12
|
from .logging import logger
|
|
12
|
-
from .utils import
|
|
13
|
+
from .utils import filter_by_region
|
|
13
14
|
from .validation import DataFrameValidator
|
|
14
15
|
|
|
15
16
|
# Required columns for fine-mapping data
|
|
@@ -17,12 +18,6 @@ REQUIRED_FINEMAPPING_COLS = ["pos", "pip"]
|
|
|
17
18
|
OPTIONAL_FINEMAPPING_COLS = ["rs", "cs", "cs_id", "effect", "se"]
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
class FinemappingValidationError(ValueError):
|
|
21
|
-
"""Raised when fine-mapping DataFrame validation fails."""
|
|
22
|
-
|
|
23
|
-
pass
|
|
24
|
-
|
|
25
|
-
|
|
26
21
|
def validate_finemapping_df(
|
|
27
22
|
df: pd.DataFrame,
|
|
28
23
|
pos_col: str = "pos",
|
pylocuszoom/forest.py
CHANGED
|
@@ -31,5 +31,6 @@ def validate_forest_df(
|
|
|
31
31
|
DataFrameValidator(df, "Forest plot DataFrame")
|
|
32
32
|
.require_columns([study_col, effect_col, ci_lower_col, ci_upper_col])
|
|
33
33
|
.require_numeric([effect_col, ci_lower_col, ci_upper_col])
|
|
34
|
+
.require_ci_ordering(ci_lower_col, effect_col, ci_upper_col)
|
|
34
35
|
.validate()
|
|
35
36
|
)
|
pylocuszoom/gene_track.py
CHANGED
|
@@ -48,22 +48,23 @@ def assign_gene_positions(genes_df: pd.DataFrame, start: int, end: int) -> List[
|
|
|
48
48
|
List of integer row indices (0, 1, 2, ...) for each gene.
|
|
49
49
|
"""
|
|
50
50
|
positions = []
|
|
51
|
-
|
|
51
|
+
# Track the rightmost end position for each row (including label buffer)
|
|
52
|
+
row_ends: dict[int, int] = {} # row -> rightmost end position
|
|
52
53
|
region_width = end - start
|
|
54
|
+
label_buffer = region_width * 0.08 # Extra space for labels
|
|
53
55
|
|
|
54
56
|
for _, gene in genes_df.iterrows():
|
|
55
57
|
gene_start = max(gene["start"], start)
|
|
56
58
|
gene_end = min(gene["end"], end)
|
|
57
59
|
|
|
58
|
-
# Find first available row
|
|
60
|
+
# Find first available row where gene doesn't overlap
|
|
59
61
|
row = 0
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
if occ_row == row and occ_end > gene_start - label_buffer:
|
|
63
|
-
row = occ_row + 1
|
|
62
|
+
while row in row_ends and row_ends[row] > gene_start - label_buffer:
|
|
63
|
+
row += 1
|
|
64
64
|
|
|
65
65
|
positions.append(row)
|
|
66
|
-
|
|
66
|
+
# Update the row's end position (including buffer for next gene check)
|
|
67
|
+
row_ends[row] = gene_end
|
|
67
68
|
|
|
68
69
|
return positions
|
|
69
70
|
|
|
@@ -174,17 +175,6 @@ def _draw_strand_arrows_matplotlib(
|
|
|
174
175
|
gene_start, gene_end, region_width, strand
|
|
175
176
|
)
|
|
176
177
|
|
|
177
|
-
# Draw connecting line between arrow centers
|
|
178
|
-
if len(arrow_tip_positions) > 1:
|
|
179
|
-
ax.plot(
|
|
180
|
-
[arrow_tip_positions[0], arrow_tip_positions[-1]],
|
|
181
|
-
[y_gene, y_gene],
|
|
182
|
-
color=arrow_color,
|
|
183
|
-
linewidth=1.0,
|
|
184
|
-
zorder=4,
|
|
185
|
-
solid_capstyle="butt",
|
|
186
|
-
)
|
|
187
|
-
|
|
188
178
|
for tip_x in arrow_tip_positions:
|
|
189
179
|
if strand == "+":
|
|
190
180
|
base_x = tip_x - tri_width
|
|
@@ -223,17 +213,6 @@ def _draw_strand_arrows_generic(
|
|
|
223
213
|
gene_start, gene_end, region_width, strand
|
|
224
214
|
)
|
|
225
215
|
|
|
226
|
-
# Draw connecting line between arrow centers
|
|
227
|
-
if len(arrow_tip_positions) > 1:
|
|
228
|
-
backend.line(
|
|
229
|
-
ax,
|
|
230
|
-
x=pd.Series([arrow_tip_positions[0], arrow_tip_positions[-1]]),
|
|
231
|
-
y=pd.Series([y_gene, y_gene]),
|
|
232
|
-
color=arrow_color,
|
|
233
|
-
linewidth=1.0,
|
|
234
|
-
zorder=4,
|
|
235
|
-
)
|
|
236
|
-
|
|
237
216
|
for tip_x in arrow_tip_positions:
|
|
238
217
|
if strand == "+":
|
|
239
218
|
base_x = tip_x - tri_width
|
|
@@ -405,7 +384,7 @@ def plot_gene_track(
|
|
|
405
384
|
gene_name,
|
|
406
385
|
ha="center",
|
|
407
386
|
va="bottom",
|
|
408
|
-
fontsize=
|
|
387
|
+
fontsize=9,
|
|
409
388
|
color="#000000",
|
|
410
389
|
fontweight="medium",
|
|
411
390
|
style="italic",
|
|
@@ -552,7 +531,7 @@ def plot_gene_track_generic(
|
|
|
552
531
|
label_pos,
|
|
553
532
|
y_label,
|
|
554
533
|
gene_name,
|
|
555
|
-
fontsize=
|
|
534
|
+
fontsize=9,
|
|
556
535
|
ha="center",
|
|
557
536
|
va="bottom",
|
|
558
537
|
color="#000000",
|
pylocuszoom/labels.py
CHANGED
|
@@ -11,6 +11,8 @@ import pandas as pd
|
|
|
11
11
|
from matplotlib.axes import Axes
|
|
12
12
|
from matplotlib.text import Annotation
|
|
13
13
|
|
|
14
|
+
from pylocuszoom.logging import logger
|
|
15
|
+
|
|
14
16
|
|
|
15
17
|
def add_snp_labels(
|
|
16
18
|
ax: Axes,
|
|
@@ -111,7 +113,9 @@ def add_snp_labels(
|
|
|
111
113
|
expand_points=(1.5, 1.5),
|
|
112
114
|
)
|
|
113
115
|
except ImportError:
|
|
114
|
-
|
|
115
|
-
|
|
116
|
+
logger.warning(
|
|
117
|
+
"adjustText not installed - SNP labels may overlap. "
|
|
118
|
+
"Install with: pip install adjustText"
|
|
119
|
+
)
|
|
116
120
|
|
|
117
121
|
return texts
|