pylocuszoom 0.6.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +34 -7
- pylocuszoom/backends/__init__.py +116 -17
- pylocuszoom/backends/base.py +363 -60
- pylocuszoom/backends/bokeh_backend.py +77 -15
- pylocuszoom/backends/hover.py +198 -0
- pylocuszoom/backends/matplotlib_backend.py +263 -3
- pylocuszoom/backends/plotly_backend.py +73 -16
- pylocuszoom/config.py +365 -0
- pylocuszoom/ensembl.py +476 -0
- pylocuszoom/eqtl.py +17 -25
- pylocuszoom/exceptions.py +33 -0
- pylocuszoom/finemapping.py +18 -32
- pylocuszoom/forest.py +10 -11
- pylocuszoom/gene_track.py +169 -142
- pylocuszoom/loaders.py +3 -1
- pylocuszoom/phewas.py +10 -11
- pylocuszoom/plotter.py +311 -277
- pylocuszoom/recombination.py +19 -3
- pylocuszoom/schemas.py +1 -6
- pylocuszoom/utils.py +54 -4
- pylocuszoom/validation.py +223 -0
- {pylocuszoom-0.6.0.dist-info → pylocuszoom-1.0.0.dist-info}/METADATA +82 -37
- pylocuszoom-1.0.0.dist-info/RECORD +31 -0
- pylocuszoom-0.6.0.dist-info/RECORD +0 -26
- {pylocuszoom-0.6.0.dist-info → pylocuszoom-1.0.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-0.6.0.dist-info → pylocuszoom-1.0.0.dist-info}/licenses/LICENSE.md +0 -0
pylocuszoom/config.py
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
"""Pydantic configuration classes for pyLocusZoom plot methods.
|
|
2
|
+
|
|
3
|
+
This module provides typed, validated configuration objects that replace
|
|
4
|
+
the parameter explosion in plot methods. Each config is immutable (frozen)
|
|
5
|
+
to prevent accidental modification.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
>>> from pylocuszoom.config import RegionConfig, DisplayConfig, PlotConfig
|
|
9
|
+
>>> region = RegionConfig(chrom=1, start=1000000, end=2000000)
|
|
10
|
+
>>> display = DisplayConfig(snp_labels=False, label_top_n=3)
|
|
11
|
+
>>>
|
|
12
|
+
>>> # Using composite PlotConfig with factory method
|
|
13
|
+
>>> config = PlotConfig.from_kwargs(chrom=1, start=1000000, end=2000000)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from typing import List, Optional, Tuple
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RegionConfig(BaseModel):
|
|
22
|
+
"""Genomic region specification.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
chrom: Chromosome number (must be >= 1).
|
|
26
|
+
start: Start position in base pairs (must be >= 0).
|
|
27
|
+
end: End position in base pairs (must be > start).
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
model_config = ConfigDict(frozen=True)
|
|
31
|
+
|
|
32
|
+
chrom: int = Field(..., ge=1, description="Chromosome number")
|
|
33
|
+
start: int = Field(..., ge=0, description="Start position (bp)")
|
|
34
|
+
end: int = Field(..., gt=0, description="End position (bp)")
|
|
35
|
+
|
|
36
|
+
@model_validator(mode="after")
|
|
37
|
+
def validate_region(self) -> "RegionConfig":
|
|
38
|
+
"""Validate that start < end."""
|
|
39
|
+
if self.start >= self.end:
|
|
40
|
+
raise ValueError(f"start ({self.start}) must be < end ({self.end})")
|
|
41
|
+
return self
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ColumnConfig(BaseModel):
|
|
45
|
+
"""DataFrame column name mappings for GWAS data.
|
|
46
|
+
|
|
47
|
+
Attributes:
|
|
48
|
+
pos_col: Column name for genomic position.
|
|
49
|
+
p_col: Column name for p-value.
|
|
50
|
+
rs_col: Column name for SNP identifier.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
model_config = ConfigDict(frozen=True)
|
|
54
|
+
|
|
55
|
+
pos_col: str = Field(default="ps", description="Position column name")
|
|
56
|
+
p_col: str = Field(default="p_wald", description="P-value column name")
|
|
57
|
+
rs_col: str = Field(default="rs", description="SNP ID column name")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class DisplayConfig(BaseModel):
|
|
61
|
+
"""Display and visual options for plots.
|
|
62
|
+
|
|
63
|
+
Attributes:
|
|
64
|
+
snp_labels: Whether to show SNP labels on plot.
|
|
65
|
+
label_top_n: Number of top SNPs to label.
|
|
66
|
+
show_recombination: Whether to show recombination rate overlay.
|
|
67
|
+
figsize: Figure size as (width, height) in inches.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
model_config = ConfigDict(frozen=True)
|
|
71
|
+
|
|
72
|
+
snp_labels: bool = Field(default=True, description="Show SNP labels")
|
|
73
|
+
label_top_n: int = Field(default=5, ge=0, description="Number of top SNPs to label")
|
|
74
|
+
show_recombination: bool = Field(
|
|
75
|
+
default=True, description="Show recombination overlay"
|
|
76
|
+
)
|
|
77
|
+
figsize: Tuple[float, float] = Field(
|
|
78
|
+
default=(12.0, 8.0), description="Figure size (width, height)"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class LDConfig(BaseModel):
|
|
83
|
+
"""Linkage disequilibrium configuration.
|
|
84
|
+
|
|
85
|
+
Supports three modes:
|
|
86
|
+
1. No LD coloring: All fields None (default)
|
|
87
|
+
2. Pre-computed LD: Provide ld_col for column with R^2 values
|
|
88
|
+
3. Calculate LD: Provide lead_pos and ld_reference_file
|
|
89
|
+
|
|
90
|
+
Attributes:
|
|
91
|
+
lead_pos: Position of lead/index SNP to highlight.
|
|
92
|
+
ld_reference_file: Path to PLINK binary fileset for LD calculation.
|
|
93
|
+
ld_col: Column name for pre-computed LD (R^2) values.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
model_config = ConfigDict(frozen=True)
|
|
97
|
+
|
|
98
|
+
lead_pos: Optional[int] = Field(default=None, ge=1, description="Lead SNP position")
|
|
99
|
+
ld_reference_file: Optional[str] = Field(
|
|
100
|
+
default=None, description="PLINK binary fileset path"
|
|
101
|
+
)
|
|
102
|
+
ld_col: Optional[str] = Field(
|
|
103
|
+
default=None, description="Pre-computed LD column name"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
@model_validator(mode="after")
|
|
107
|
+
def validate_ld_config(self) -> "LDConfig":
|
|
108
|
+
"""Validate LD configuration consistency.
|
|
109
|
+
|
|
110
|
+
When ld_reference_file is provided, lead_pos is required to identify
|
|
111
|
+
the index SNP for LD calculation.
|
|
112
|
+
|
|
113
|
+
Note: For StackedPlotConfig, ld_reference_file may be provided without
|
|
114
|
+
lead_pos when lead_positions list is used (broadcast mode). This is
|
|
115
|
+
validated at the StackedPlotConfig level, not here.
|
|
116
|
+
"""
|
|
117
|
+
# Validation moved to StackedPlotConfig.validate_broadcast_ld
|
|
118
|
+
# to allow broadcast mode where lead_positions list is used instead
|
|
119
|
+
return self
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class PlotConfig(BaseModel):
|
|
123
|
+
"""Composite configuration for plot() method.
|
|
124
|
+
|
|
125
|
+
Composes all sub-configs into a single validated configuration object.
|
|
126
|
+
Use either direct construction with nested configs, or the from_kwargs()
|
|
127
|
+
factory method for backward compatibility with existing code.
|
|
128
|
+
|
|
129
|
+
Attributes:
|
|
130
|
+
region: Genomic region specification (required).
|
|
131
|
+
columns: DataFrame column name mappings.
|
|
132
|
+
display: Display and visual options.
|
|
133
|
+
ld: Linkage disequilibrium configuration.
|
|
134
|
+
|
|
135
|
+
Example:
|
|
136
|
+
>>> # Direct construction
|
|
137
|
+
>>> config = PlotConfig(
|
|
138
|
+
... region=RegionConfig(chrom=1, start=1000000, end=2000000),
|
|
139
|
+
... display=DisplayConfig(snp_labels=False),
|
|
140
|
+
... )
|
|
141
|
+
>>>
|
|
142
|
+
>>> # Factory method (backward compatible with plot() signature)
|
|
143
|
+
>>> config = PlotConfig.from_kwargs(
|
|
144
|
+
... chrom=1, start=1000000, end=2000000,
|
|
145
|
+
... snp_labels=False, lead_pos=1500000,
|
|
146
|
+
... )
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
model_config = ConfigDict(frozen=True)
|
|
150
|
+
|
|
151
|
+
region: RegionConfig
|
|
152
|
+
columns: ColumnConfig = Field(default_factory=ColumnConfig)
|
|
153
|
+
display: DisplayConfig = Field(default_factory=DisplayConfig)
|
|
154
|
+
ld: LDConfig = Field(default_factory=LDConfig)
|
|
155
|
+
|
|
156
|
+
@model_validator(mode="after")
|
|
157
|
+
def validate_ld_requires_lead_pos(self) -> "PlotConfig":
|
|
158
|
+
"""Validate that LD reference file has lead_pos for single plots."""
|
|
159
|
+
if self.ld.ld_reference_file is not None and self.ld.lead_pos is None:
|
|
160
|
+
raise ValueError("lead_pos is required when ld_reference_file is provided")
|
|
161
|
+
return self
|
|
162
|
+
|
|
163
|
+
@classmethod
|
|
164
|
+
def from_kwargs(
|
|
165
|
+
cls,
|
|
166
|
+
*,
|
|
167
|
+
# Region params (required)
|
|
168
|
+
chrom: int,
|
|
169
|
+
start: int,
|
|
170
|
+
end: int,
|
|
171
|
+
# Column params
|
|
172
|
+
pos_col: str = "ps",
|
|
173
|
+
p_col: str = "p_wald",
|
|
174
|
+
rs_col: str = "rs",
|
|
175
|
+
# Display params
|
|
176
|
+
snp_labels: bool = True,
|
|
177
|
+
label_top_n: int = 5,
|
|
178
|
+
show_recombination: bool = True,
|
|
179
|
+
figsize: Tuple[float, float] = (12.0, 8.0),
|
|
180
|
+
# LD params
|
|
181
|
+
lead_pos: Optional[int] = None,
|
|
182
|
+
ld_reference_file: Optional[str] = None,
|
|
183
|
+
ld_col: Optional[str] = None,
|
|
184
|
+
) -> "PlotConfig":
|
|
185
|
+
"""Create PlotConfig from flat keyword arguments.
|
|
186
|
+
|
|
187
|
+
Factory method that accepts parameters matching the plot() method
|
|
188
|
+
signature, enabling backward compatibility with existing code.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
chrom: Chromosome number.
|
|
192
|
+
start: Start position (bp).
|
|
193
|
+
end: End position (bp).
|
|
194
|
+
pos_col: Column name for position.
|
|
195
|
+
p_col: Column name for p-value.
|
|
196
|
+
rs_col: Column name for SNP ID.
|
|
197
|
+
snp_labels: Whether to show SNP labels.
|
|
198
|
+
label_top_n: Number of top SNPs to label.
|
|
199
|
+
show_recombination: Whether to show recombination overlay.
|
|
200
|
+
figsize: Figure size (width, height).
|
|
201
|
+
lead_pos: Position of lead SNP.
|
|
202
|
+
ld_reference_file: PLINK binary fileset path.
|
|
203
|
+
ld_col: Pre-computed LD column name.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
PlotConfig with nested config objects.
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
ValidationError: If parameters are invalid.
|
|
210
|
+
"""
|
|
211
|
+
return cls(
|
|
212
|
+
region=RegionConfig(chrom=chrom, start=start, end=end),
|
|
213
|
+
columns=ColumnConfig(pos_col=pos_col, p_col=p_col, rs_col=rs_col),
|
|
214
|
+
display=DisplayConfig(
|
|
215
|
+
snp_labels=snp_labels,
|
|
216
|
+
label_top_n=label_top_n,
|
|
217
|
+
show_recombination=show_recombination,
|
|
218
|
+
figsize=figsize,
|
|
219
|
+
),
|
|
220
|
+
ld=LDConfig(
|
|
221
|
+
lead_pos=lead_pos,
|
|
222
|
+
ld_reference_file=ld_reference_file,
|
|
223
|
+
ld_col=ld_col,
|
|
224
|
+
),
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class StackedPlotConfig(BaseModel):
|
|
229
|
+
"""Composite configuration for plot_stacked() method.
|
|
230
|
+
|
|
231
|
+
Extends PlotConfig pattern with list-based parameters for stacked plots.
|
|
232
|
+
Supports multiple lead positions, panel labels, and LD reference files.
|
|
233
|
+
|
|
234
|
+
Attributes:
|
|
235
|
+
region: Genomic region specification (required).
|
|
236
|
+
columns: DataFrame column name mappings.
|
|
237
|
+
display: Display and visual options.
|
|
238
|
+
ld: Linkage disequilibrium configuration (single file for broadcast).
|
|
239
|
+
lead_positions: List of lead SNP positions (one per panel).
|
|
240
|
+
panel_labels: List of panel labels (one per panel).
|
|
241
|
+
ld_reference_files: List of PLINK filesets (one per panel).
|
|
242
|
+
|
|
243
|
+
Example:
|
|
244
|
+
>>> config = StackedPlotConfig.from_kwargs(
|
|
245
|
+
... chrom=1, start=1000000, end=2000000,
|
|
246
|
+
... lead_positions=[1500000, 1600000],
|
|
247
|
+
... panel_labels=["Study A", "Study B"],
|
|
248
|
+
... )
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
model_config = ConfigDict(frozen=True)
|
|
252
|
+
|
|
253
|
+
region: RegionConfig
|
|
254
|
+
columns: ColumnConfig = Field(default_factory=ColumnConfig)
|
|
255
|
+
display: DisplayConfig = Field(default_factory=DisplayConfig)
|
|
256
|
+
ld: LDConfig = Field(default_factory=LDConfig)
|
|
257
|
+
|
|
258
|
+
# Stacked-specific list parameters
|
|
259
|
+
lead_positions: Optional[List[int]] = Field(
|
|
260
|
+
default=None, description="Lead SNP positions (one per panel)"
|
|
261
|
+
)
|
|
262
|
+
panel_labels: Optional[List[str]] = Field(
|
|
263
|
+
default=None, description="Panel labels (one per panel)"
|
|
264
|
+
)
|
|
265
|
+
ld_reference_files: Optional[List[str]] = Field(
|
|
266
|
+
default=None, description="PLINK filesets (one per panel)"
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
@model_validator(mode="after")
|
|
270
|
+
def validate_broadcast_ld(self) -> "StackedPlotConfig":
|
|
271
|
+
"""Validate broadcast LD configuration for stacked plots.
|
|
272
|
+
|
|
273
|
+
When ld_reference_file is provided for broadcast, lead_positions must
|
|
274
|
+
be provided to specify the reference SNP for each panel.
|
|
275
|
+
"""
|
|
276
|
+
if self.ld.ld_reference_file is not None and self.ld.lead_pos is None:
|
|
277
|
+
# Broadcast mode: ld_reference_file without lead_pos in LDConfig
|
|
278
|
+
# Requires lead_positions list instead
|
|
279
|
+
if self.lead_positions is None:
|
|
280
|
+
raise ValueError(
|
|
281
|
+
"lead_positions is required when ld_reference_file is provided "
|
|
282
|
+
"for broadcast (one lead position per panel)"
|
|
283
|
+
)
|
|
284
|
+
return self
|
|
285
|
+
|
|
286
|
+
@classmethod
|
|
287
|
+
def from_kwargs(
|
|
288
|
+
cls,
|
|
289
|
+
*,
|
|
290
|
+
# Region params (required)
|
|
291
|
+
chrom: int,
|
|
292
|
+
start: int,
|
|
293
|
+
end: int,
|
|
294
|
+
# Column params
|
|
295
|
+
pos_col: str = "ps",
|
|
296
|
+
p_col: str = "p_wald",
|
|
297
|
+
rs_col: str = "rs",
|
|
298
|
+
# Display params
|
|
299
|
+
snp_labels: bool = True,
|
|
300
|
+
label_top_n: int = 3, # Default for stacked is 3 (less crowded)
|
|
301
|
+
show_recombination: bool = True,
|
|
302
|
+
figsize: Tuple[float, float] = (12.0, 8.0),
|
|
303
|
+
# LD params (single for broadcast)
|
|
304
|
+
ld_reference_file: Optional[str] = None,
|
|
305
|
+
ld_col: Optional[str] = None,
|
|
306
|
+
# Stacked-specific list params
|
|
307
|
+
lead_positions: Optional[List[int]] = None,
|
|
308
|
+
panel_labels: Optional[List[str]] = None,
|
|
309
|
+
ld_reference_files: Optional[List[str]] = None,
|
|
310
|
+
) -> "StackedPlotConfig":
|
|
311
|
+
"""Create StackedPlotConfig from flat keyword arguments.
|
|
312
|
+
|
|
313
|
+
Factory method that accepts parameters matching the plot_stacked()
|
|
314
|
+
method signature, enabling backward compatibility.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
chrom: Chromosome number.
|
|
318
|
+
start: Start position (bp).
|
|
319
|
+
end: End position (bp).
|
|
320
|
+
pos_col: Column name for position.
|
|
321
|
+
p_col: Column name for p-value.
|
|
322
|
+
rs_col: Column name for SNP ID.
|
|
323
|
+
snp_labels: Whether to show SNP labels.
|
|
324
|
+
label_top_n: Number of top SNPs to label (default 3 for stacked).
|
|
325
|
+
show_recombination: Whether to show recombination overlay.
|
|
326
|
+
figsize: Figure size (width, height).
|
|
327
|
+
ld_reference_file: Single PLINK fileset (broadcast to all panels).
|
|
328
|
+
ld_col: Pre-computed LD column name.
|
|
329
|
+
lead_positions: List of lead SNP positions.
|
|
330
|
+
panel_labels: List of panel labels.
|
|
331
|
+
ld_reference_files: List of PLINK filesets.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
StackedPlotConfig with nested config objects.
|
|
335
|
+
|
|
336
|
+
Raises:
|
|
337
|
+
ValidationError: If parameters are invalid.
|
|
338
|
+
"""
|
|
339
|
+
return cls(
|
|
340
|
+
region=RegionConfig(chrom=chrom, start=start, end=end),
|
|
341
|
+
columns=ColumnConfig(pos_col=pos_col, p_col=p_col, rs_col=rs_col),
|
|
342
|
+
display=DisplayConfig(
|
|
343
|
+
snp_labels=snp_labels,
|
|
344
|
+
label_top_n=label_top_n,
|
|
345
|
+
show_recombination=show_recombination,
|
|
346
|
+
figsize=figsize,
|
|
347
|
+
),
|
|
348
|
+
ld=LDConfig(
|
|
349
|
+
ld_reference_file=ld_reference_file,
|
|
350
|
+
ld_col=ld_col,
|
|
351
|
+
),
|
|
352
|
+
lead_positions=lead_positions,
|
|
353
|
+
panel_labels=panel_labels,
|
|
354
|
+
ld_reference_files=ld_reference_files,
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
__all__ = [
|
|
359
|
+
"RegionConfig",
|
|
360
|
+
"ColumnConfig",
|
|
361
|
+
"DisplayConfig",
|
|
362
|
+
"LDConfig",
|
|
363
|
+
"PlotConfig",
|
|
364
|
+
"StackedPlotConfig",
|
|
365
|
+
]
|