pylocuszoom 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +52 -1
- pylocuszoom/backends/base.py +45 -0
- pylocuszoom/backends/bokeh_backend.py +138 -48
- pylocuszoom/backends/matplotlib_backend.py +104 -0
- pylocuszoom/backends/plotly_backend.py +212 -64
- pylocuszoom/colors.py +3 -1
- pylocuszoom/gene_track.py +1 -0
- pylocuszoom/loaders.py +862 -0
- pylocuszoom/plotter.py +84 -113
- pylocuszoom/py.typed +0 -0
- pylocuszoom/recombination.py +4 -4
- pylocuszoom/schemas.py +395 -0
- {pylocuszoom-0.3.0.dist-info → pylocuszoom-0.5.0.dist-info}/METADATA +104 -24
- pylocuszoom-0.5.0.dist-info/RECORD +24 -0
- pylocuszoom-0.3.0.dist-info/RECORD +0 -21
- {pylocuszoom-0.3.0.dist-info → pylocuszoom-0.5.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-0.3.0.dist-info → pylocuszoom-0.5.0.dist-info}/licenses/LICENSE.md +0 -0
pylocuszoom/schemas.py
ADDED
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
"""Pydantic validation schemas for loaded data.
|
|
2
|
+
|
|
3
|
+
Provides validation models for GWAS, eQTL, fine-mapping, and gene annotation
|
|
4
|
+
DataFrames to ensure data quality before plotting.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional, Union
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from pydantic import BaseModel, ConfigDict, field_validator, model_validator
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LoaderValidationError(Exception):
|
|
15
|
+
"""Raised when loaded data fails validation."""
|
|
16
|
+
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# =============================================================================
|
|
21
|
+
# GWAS Validation
|
|
22
|
+
# =============================================================================
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class GWASRowModel(BaseModel):
|
|
26
|
+
"""Validation model for a single GWAS row."""
|
|
27
|
+
|
|
28
|
+
model_config = ConfigDict(extra="allow")
|
|
29
|
+
|
|
30
|
+
ps: int
|
|
31
|
+
p_wald: float
|
|
32
|
+
rs: Optional[str] = None
|
|
33
|
+
chr: Optional[Union[str, int]] = None
|
|
34
|
+
|
|
35
|
+
@field_validator("ps")
|
|
36
|
+
@classmethod
|
|
37
|
+
def position_positive(cls, v: int) -> int:
|
|
38
|
+
"""Position must be positive."""
|
|
39
|
+
if v <= 0:
|
|
40
|
+
raise ValueError(f"Position must be positive, got {v}")
|
|
41
|
+
return v
|
|
42
|
+
|
|
43
|
+
@field_validator("p_wald")
|
|
44
|
+
@classmethod
|
|
45
|
+
def pvalue_in_range(cls, v: float) -> float:
|
|
46
|
+
"""P-value must be between 0 and 1."""
|
|
47
|
+
if not (0 < v <= 1):
|
|
48
|
+
raise ValueError(f"P-value must be in range (0, 1], got {v}")
|
|
49
|
+
return v
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def validate_gwas_dataframe(
|
|
53
|
+
df: pd.DataFrame,
|
|
54
|
+
pos_col: str = "ps",
|
|
55
|
+
p_col: str = "p_wald",
|
|
56
|
+
rs_col: str = "rs",
|
|
57
|
+
strict: bool = False,
|
|
58
|
+
) -> pd.DataFrame:
|
|
59
|
+
"""Validate a GWAS DataFrame.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
df: DataFrame to validate.
|
|
63
|
+
pos_col: Column name for position.
|
|
64
|
+
p_col: Column name for p-value.
|
|
65
|
+
rs_col: Column name for SNP ID.
|
|
66
|
+
strict: If True, validate every row. If False (default), validate schema only.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Validated DataFrame.
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
LoaderValidationError: If validation fails.
|
|
73
|
+
"""
|
|
74
|
+
errors = []
|
|
75
|
+
|
|
76
|
+
# Check required columns exist
|
|
77
|
+
if pos_col not in df.columns:
|
|
78
|
+
errors.append(f"Missing required column: '{pos_col}'")
|
|
79
|
+
if p_col not in df.columns:
|
|
80
|
+
errors.append(f"Missing required column: '{p_col}'")
|
|
81
|
+
|
|
82
|
+
if errors:
|
|
83
|
+
raise LoaderValidationError(
|
|
84
|
+
"GWAS validation failed:\n - " + "\n - ".join(errors)
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Check data types
|
|
88
|
+
if not pd.api.types.is_numeric_dtype(df[pos_col]):
|
|
89
|
+
errors.append(f"Column '{pos_col}' must be numeric, got {df[pos_col].dtype}")
|
|
90
|
+
|
|
91
|
+
if not pd.api.types.is_numeric_dtype(df[p_col]):
|
|
92
|
+
errors.append(f"Column '{p_col}' must be numeric, got {df[p_col].dtype}")
|
|
93
|
+
|
|
94
|
+
# Check value ranges
|
|
95
|
+
if (df[pos_col] <= 0).any():
|
|
96
|
+
n_invalid = (df[pos_col] <= 0).sum()
|
|
97
|
+
errors.append(f"Column '{pos_col}' has {n_invalid} non-positive values")
|
|
98
|
+
|
|
99
|
+
if ((df[p_col] <= 0) | (df[p_col] > 1)).any():
|
|
100
|
+
n_invalid = ((df[p_col] <= 0) | (df[p_col] > 1)).sum()
|
|
101
|
+
errors.append(f"Column '{p_col}' has {n_invalid} values outside range (0, 1]")
|
|
102
|
+
|
|
103
|
+
# Check for NaN in required columns
|
|
104
|
+
if df[pos_col].isna().any():
|
|
105
|
+
n_na = df[pos_col].isna().sum()
|
|
106
|
+
errors.append(f"Column '{pos_col}' has {n_na} missing values")
|
|
107
|
+
|
|
108
|
+
if df[p_col].isna().any():
|
|
109
|
+
n_na = df[p_col].isna().sum()
|
|
110
|
+
errors.append(f"Column '{p_col}' has {n_na} missing values")
|
|
111
|
+
|
|
112
|
+
if errors:
|
|
113
|
+
raise LoaderValidationError(
|
|
114
|
+
"GWAS validation failed:\n - " + "\n - ".join(errors)
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
return df
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# =============================================================================
|
|
121
|
+
# eQTL Validation
|
|
122
|
+
# =============================================================================
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class EQTLRowModel(BaseModel):
|
|
126
|
+
"""Validation model for a single eQTL row."""
|
|
127
|
+
|
|
128
|
+
model_config = ConfigDict(extra="allow")
|
|
129
|
+
|
|
130
|
+
pos: int
|
|
131
|
+
p_value: float
|
|
132
|
+
gene: str
|
|
133
|
+
effect: Optional[float] = None
|
|
134
|
+
|
|
135
|
+
@field_validator("pos")
|
|
136
|
+
@classmethod
|
|
137
|
+
def position_positive(cls, v: int) -> int:
|
|
138
|
+
"""Position must be positive."""
|
|
139
|
+
if v <= 0:
|
|
140
|
+
raise ValueError(f"Position must be positive, got {v}")
|
|
141
|
+
return v
|
|
142
|
+
|
|
143
|
+
@field_validator("p_value")
|
|
144
|
+
@classmethod
|
|
145
|
+
def pvalue_in_range(cls, v: float) -> float:
|
|
146
|
+
"""P-value must be between 0 and 1."""
|
|
147
|
+
if not (0 < v <= 1):
|
|
148
|
+
raise ValueError(f"P-value must be in range (0, 1], got {v}")
|
|
149
|
+
return v
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def validate_eqtl_dataframe(
|
|
153
|
+
df: pd.DataFrame,
|
|
154
|
+
strict: bool = False,
|
|
155
|
+
) -> pd.DataFrame:
|
|
156
|
+
"""Validate an eQTL DataFrame.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
df: DataFrame to validate.
|
|
160
|
+
strict: If True, validate every row.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
Validated DataFrame.
|
|
164
|
+
|
|
165
|
+
Raises:
|
|
166
|
+
LoaderValidationError: If validation fails.
|
|
167
|
+
"""
|
|
168
|
+
errors = []
|
|
169
|
+
|
|
170
|
+
# Check required columns
|
|
171
|
+
required = ["pos", "p_value", "gene"]
|
|
172
|
+
for col in required:
|
|
173
|
+
if col not in df.columns:
|
|
174
|
+
errors.append(f"Missing required column: '{col}'")
|
|
175
|
+
|
|
176
|
+
if errors:
|
|
177
|
+
raise LoaderValidationError(
|
|
178
|
+
"eQTL validation failed:\n - " + "\n - ".join(errors)
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Check data types and ranges
|
|
182
|
+
if not pd.api.types.is_numeric_dtype(df["pos"]):
|
|
183
|
+
errors.append(f"Column 'pos' must be numeric, got {df['pos'].dtype}")
|
|
184
|
+
elif (df["pos"] <= 0).any():
|
|
185
|
+
n_invalid = (df["pos"] <= 0).sum()
|
|
186
|
+
errors.append(f"Column 'pos' has {n_invalid} non-positive values")
|
|
187
|
+
|
|
188
|
+
if not pd.api.types.is_numeric_dtype(df["p_value"]):
|
|
189
|
+
errors.append(f"Column 'p_value' must be numeric, got {df['p_value'].dtype}")
|
|
190
|
+
elif ((df["p_value"] <= 0) | (df["p_value"] > 1)).any():
|
|
191
|
+
n_invalid = ((df["p_value"] <= 0) | (df["p_value"] > 1)).sum()
|
|
192
|
+
errors.append(f"Column 'p_value' has {n_invalid} values outside range (0, 1]")
|
|
193
|
+
|
|
194
|
+
if errors:
|
|
195
|
+
raise LoaderValidationError(
|
|
196
|
+
"eQTL validation failed:\n - " + "\n - ".join(errors)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
return df
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# =============================================================================
|
|
203
|
+
# Fine-mapping Validation
|
|
204
|
+
# =============================================================================
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class FinemappingRowModel(BaseModel):
|
|
208
|
+
"""Validation model for a single fine-mapping row."""
|
|
209
|
+
|
|
210
|
+
model_config = ConfigDict(extra="allow")
|
|
211
|
+
|
|
212
|
+
pos: int
|
|
213
|
+
pip: float
|
|
214
|
+
cs: Optional[int] = None
|
|
215
|
+
|
|
216
|
+
@field_validator("pos")
|
|
217
|
+
@classmethod
|
|
218
|
+
def position_positive(cls, v: int) -> int:
|
|
219
|
+
"""Position must be positive."""
|
|
220
|
+
if v <= 0:
|
|
221
|
+
raise ValueError(f"Position must be positive, got {v}")
|
|
222
|
+
return v
|
|
223
|
+
|
|
224
|
+
@field_validator("pip")
|
|
225
|
+
@classmethod
|
|
226
|
+
def pip_in_range(cls, v: float) -> float:
|
|
227
|
+
"""PIP must be between 0 and 1."""
|
|
228
|
+
if not (0 <= v <= 1):
|
|
229
|
+
raise ValueError(f"PIP must be in range [0, 1], got {v}")
|
|
230
|
+
return v
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def validate_finemapping_dataframe(
|
|
234
|
+
df: pd.DataFrame,
|
|
235
|
+
cs_col: str = "cs",
|
|
236
|
+
strict: bool = False,
|
|
237
|
+
) -> pd.DataFrame:
|
|
238
|
+
"""Validate a fine-mapping DataFrame.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
df: DataFrame to validate.
|
|
242
|
+
cs_col: Column name for credible set.
|
|
243
|
+
strict: If True, validate every row.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
Validated DataFrame.
|
|
247
|
+
|
|
248
|
+
Raises:
|
|
249
|
+
LoaderValidationError: If validation fails.
|
|
250
|
+
"""
|
|
251
|
+
errors = []
|
|
252
|
+
|
|
253
|
+
# Check required columns
|
|
254
|
+
if "pos" not in df.columns:
|
|
255
|
+
errors.append("Missing required column: 'pos'")
|
|
256
|
+
if "pip" not in df.columns:
|
|
257
|
+
errors.append("Missing required column: 'pip'")
|
|
258
|
+
|
|
259
|
+
if errors:
|
|
260
|
+
raise LoaderValidationError(
|
|
261
|
+
"Fine-mapping validation failed:\n - " + "\n - ".join(errors)
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Check data types and ranges
|
|
265
|
+
if not pd.api.types.is_numeric_dtype(df["pos"]):
|
|
266
|
+
errors.append(f"Column 'pos' must be numeric, got {df['pos'].dtype}")
|
|
267
|
+
elif (df["pos"] <= 0).any():
|
|
268
|
+
n_invalid = (df["pos"] <= 0).sum()
|
|
269
|
+
errors.append(f"Column 'pos' has {n_invalid} non-positive values")
|
|
270
|
+
|
|
271
|
+
if not pd.api.types.is_numeric_dtype(df["pip"]):
|
|
272
|
+
errors.append(f"Column 'pip' must be numeric, got {df['pip'].dtype}")
|
|
273
|
+
elif ((df["pip"] < 0) | (df["pip"] > 1)).any():
|
|
274
|
+
n_invalid = ((df["pip"] < 0) | (df["pip"] > 1)).sum()
|
|
275
|
+
errors.append(f"Column 'pip' has {n_invalid} values outside range [0, 1]")
|
|
276
|
+
|
|
277
|
+
if errors:
|
|
278
|
+
raise LoaderValidationError(
|
|
279
|
+
"Fine-mapping validation failed:\n - " + "\n - ".join(errors)
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
return df
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
# =============================================================================
|
|
286
|
+
# Gene Annotation Validation
|
|
287
|
+
# =============================================================================
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
class GeneRowModel(BaseModel):
|
|
291
|
+
"""Validation model for a single gene annotation row."""
|
|
292
|
+
|
|
293
|
+
model_config = ConfigDict(extra="allow")
|
|
294
|
+
|
|
295
|
+
chr: Union[str, int]
|
|
296
|
+
start: int
|
|
297
|
+
end: int
|
|
298
|
+
gene_name: str
|
|
299
|
+
strand: Optional[str] = None
|
|
300
|
+
|
|
301
|
+
@field_validator("start", "end")
|
|
302
|
+
@classmethod
|
|
303
|
+
def position_positive(cls, v: int) -> int:
|
|
304
|
+
"""Position must be positive."""
|
|
305
|
+
if v < 0:
|
|
306
|
+
raise ValueError(f"Position must be non-negative, got {v}")
|
|
307
|
+
return v
|
|
308
|
+
|
|
309
|
+
@model_validator(mode="after")
|
|
310
|
+
def start_before_end(self):
|
|
311
|
+
"""Start must be <= end."""
|
|
312
|
+
if self.start > self.end:
|
|
313
|
+
raise ValueError(f"Start ({self.start}) must be <= end ({self.end})")
|
|
314
|
+
return self
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def validate_genes_dataframe(
|
|
318
|
+
df: pd.DataFrame,
|
|
319
|
+
strict: bool = False,
|
|
320
|
+
) -> pd.DataFrame:
|
|
321
|
+
"""Validate a genes DataFrame.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
df: DataFrame to validate.
|
|
325
|
+
strict: If True, validate every row.
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
Validated DataFrame.
|
|
329
|
+
|
|
330
|
+
Raises:
|
|
331
|
+
LoaderValidationError: If validation fails.
|
|
332
|
+
"""
|
|
333
|
+
errors = []
|
|
334
|
+
|
|
335
|
+
# Check required columns
|
|
336
|
+
required = ["chr", "start", "end", "gene_name"]
|
|
337
|
+
for col in required:
|
|
338
|
+
if col not in df.columns:
|
|
339
|
+
errors.append(f"Missing required column: '{col}'")
|
|
340
|
+
|
|
341
|
+
if errors:
|
|
342
|
+
raise LoaderValidationError(
|
|
343
|
+
"Gene annotation validation failed:\n - " + "\n - ".join(errors)
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
# Check data types
|
|
347
|
+
if not pd.api.types.is_numeric_dtype(df["start"]):
|
|
348
|
+
errors.append(f"Column 'start' must be numeric, got {df['start'].dtype}")
|
|
349
|
+
|
|
350
|
+
if not pd.api.types.is_numeric_dtype(df["end"]):
|
|
351
|
+
errors.append(f"Column 'end' must be numeric, got {df['end'].dtype}")
|
|
352
|
+
|
|
353
|
+
# Check ranges
|
|
354
|
+
if (df["start"] < 0).any():
|
|
355
|
+
n_invalid = (df["start"] < 0).sum()
|
|
356
|
+
errors.append(f"Column 'start' has {n_invalid} negative values")
|
|
357
|
+
|
|
358
|
+
if (df["end"] < df["start"]).any():
|
|
359
|
+
n_invalid = (df["end"] < df["start"]).sum()
|
|
360
|
+
errors.append(f"Found {n_invalid} genes where end < start")
|
|
361
|
+
|
|
362
|
+
if errors:
|
|
363
|
+
raise LoaderValidationError(
|
|
364
|
+
"Gene annotation validation failed:\n - " + "\n - ".join(errors)
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
return df
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
# =============================================================================
|
|
371
|
+
# File Path Validation
|
|
372
|
+
# =============================================================================
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def validate_file_path(filepath: Union[str, Path]) -> Path:
|
|
376
|
+
"""Validate that a file path exists and is readable.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
filepath: Path to validate.
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Validated Path object.
|
|
383
|
+
|
|
384
|
+
Raises:
|
|
385
|
+
LoaderValidationError: If file doesn't exist or isn't readable.
|
|
386
|
+
"""
|
|
387
|
+
path = Path(filepath)
|
|
388
|
+
|
|
389
|
+
if not path.exists():
|
|
390
|
+
raise LoaderValidationError(f"File not found: {path}")
|
|
391
|
+
|
|
392
|
+
if not path.is_file():
|
|
393
|
+
raise LoaderValidationError(f"Not a file: {path}")
|
|
394
|
+
|
|
395
|
+
return path
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pylocuszoom
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: Publication-ready regional association plots with LD coloring, gene tracks, and recombination overlays
|
|
5
5
|
Project-URL: Homepage, https://github.com/michael-denyer/pylocuszoom
|
|
6
6
|
Project-URL: Documentation, https://github.com/michael-denyer/pylocuszoom#readme
|
|
7
7
|
Project-URL: Repository, https://github.com/michael-denyer/pylocuszoom
|
|
@@ -27,6 +27,7 @@ Requires-Dist: matplotlib>=3.5.0
|
|
|
27
27
|
Requires-Dist: numpy>=1.21.0
|
|
28
28
|
Requires-Dist: pandas>=1.4.0
|
|
29
29
|
Requires-Dist: plotly>=5.0.0
|
|
30
|
+
Requires-Dist: pydantic>=2.0.0
|
|
30
31
|
Requires-Dist: pyliftover>=0.4
|
|
31
32
|
Provides-Extra: all
|
|
32
33
|
Requires-Dist: pyspark>=3.0.0; extra == 'all'
|
|
@@ -38,39 +39,44 @@ Provides-Extra: spark
|
|
|
38
39
|
Requires-Dist: pyspark>=3.0.0; extra == 'spark'
|
|
39
40
|
Description-Content-Type: text/markdown
|
|
40
41
|
|
|
41
|
-
# pyLocusZoom
|
|
42
|
-
|
|
43
42
|
[](https://github.com/michael-denyer/pyLocusZoom/actions/workflows/ci.yml)
|
|
44
|
-
[](https://codecov.io/gh/michael-denyer/pyLocusZoom)
|
|
44
|
+
[](https://pypi.org/project/pylocuszoom/)
|
|
45
|
+
[](https://anaconda.org/bioconda/pylocuszoom)
|
|
46
|
+
[](https://www.gnu.org/licenses/gpl-3.0)
|
|
45
47
|
[](https://www.python.org/downloads/)
|
|
46
48
|
[](https://github.com/astral-sh/ruff)
|
|
47
|
-
|
|
48
49
|
[](https://matplotlib.org/)
|
|
49
50
|
[](https://plotly.com/python/)
|
|
50
51
|
[](https://bokeh.org/)
|
|
51
52
|
[](https://pandas.pydata.org/)
|
|
52
|
-
|
|
53
53
|
<img src="logo.svg" alt="pyLocusZoom logo" width="120" align="right">
|
|
54
|
+
# pyLocusZoom
|
|
54
55
|
|
|
55
|
-
|
|
56
|
+
Publication-ready regional association plots with LD coloring, gene tracks, and recombination overlays.
|
|
56
57
|
|
|
57
58
|
Inspired by [LocusZoom](http://locuszoom.org/) and [locuszoomr](https://github.com/myles-lewis/locuszoomr).
|
|
58
59
|
|
|
59
60
|
## Features
|
|
60
61
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
- **
|
|
64
|
-
- **
|
|
65
|
-
- **
|
|
66
|
-
- **
|
|
67
|
-
- **
|
|
68
|
-
- **
|
|
69
|
-
- **eQTL overlay**: Expression QTL data as separate panel
|
|
70
|
-
- **PySpark support**: Handles large-scale genomics DataFrames
|
|
62
|
+
1. **Regional association plot**:
|
|
63
|
+
|
|
64
|
+
- **Multi-species support**: Built-in reference data for *Canis lupus familiaris* (CanFam3.1/CanFam4) and *Felis catus* (FelCat9), or optionally provide your own for any species
|
|
65
|
+
- **LD coloring**: SNPs colored by linkage disequilibrium (R²) with lead variant
|
|
66
|
+
- **Gene tracks**: Annotated gene/exon positions below the association plot
|
|
67
|
+
- **Recombination rate**: Overlay showing recombination rate across region (*Canis lupus familiaris* only)
|
|
68
|
+
- **SNP labels (matplotlib)**: Automatic labeling of lead SNPs with RS ID
|
|
69
|
+
- **Tooltips (Bokeh and Plotly)**: Mouseover for detailed SNP data
|
|
71
70
|
|
|
72
71
|

|
|
73
72
|
|
|
73
|
+
2. **Stacked plots**: Compare multiple GWAS/phenotypes vertically
|
|
74
|
+
3. **eQTL plot**: Expression QTL data aligned with association plots and gene tracks
|
|
75
|
+
4. **Fine-mapping plots**: Visualize SuSiE credible sets with posterior inclusion probabilities
|
|
76
|
+
5. **Multiple charting libraries**: matplotlib (static), plotly (interactive), bokeh (dashboards)
|
|
77
|
+
6. **Pandas and PySpark support**: Works with both Pandas and PySpark DataFrames for large-scale genomics data
|
|
78
|
+
7. **Convenience data file loaders**: Load and validate common GWAS, eQTL and fine-mapping file formats
|
|
79
|
+
|
|
74
80
|
## Installation
|
|
75
81
|
|
|
76
82
|
```bash
|
|
@@ -186,13 +192,13 @@ fig.write_html("plot.html")
|
|
|
186
192
|
fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000, backend="bokeh")
|
|
187
193
|
```
|
|
188
194
|
|
|
189
|
-
| Backend | Output | Best For |
|
|
190
|
-
|
|
191
|
-
| `matplotlib` | Static PNG/PDF/SVG | Publications, presentations |
|
|
192
|
-
| `plotly` | Interactive HTML | Web reports, data exploration |
|
|
193
|
-
| `bokeh` | Interactive HTML | Dashboards, web apps |
|
|
195
|
+
| Backend | Output | Best For | Features |
|
|
196
|
+
|---------|--------|----------|----------|
|
|
197
|
+
| `matplotlib` | Static PNG/PDF/SVG | Publications, presentations | Full feature set with SNP labels |
|
|
198
|
+
| `plotly` | Interactive HTML | Web reports, data exploration | Hover tooltips, pan/zoom |
|
|
199
|
+
| `bokeh` | Interactive HTML | Dashboards, web apps | Hover tooltips, pan/zoom |
|
|
194
200
|
|
|
195
|
-
> **Note:** All backends support gene
|
|
201
|
+
> **Note:** All backends support scatter plots, gene tracks, recombination overlay, and LD legend. SNP labels (auto-positioned with adjustText) are matplotlib-only; interactive backends use hover tooltips instead.
|
|
196
202
|
|
|
197
203
|
## Stacked Plots
|
|
198
204
|
|
|
@@ -209,6 +215,8 @@ fig = plotter.plot_stacked(
|
|
|
209
215
|
)
|
|
210
216
|
```
|
|
211
217
|
|
|
218
|
+

|
|
219
|
+
|
|
212
220
|
## eQTL Overlay
|
|
213
221
|
|
|
214
222
|
Add expression QTL data as a separate panel:
|
|
@@ -229,6 +237,30 @@ fig = plotter.plot_stacked(
|
|
|
229
237
|
)
|
|
230
238
|
```
|
|
231
239
|
|
|
240
|
+

|
|
241
|
+
|
|
242
|
+
## Fine-mapping Visualization
|
|
243
|
+
|
|
244
|
+
Visualize SuSiE or other fine-mapping results with credible set coloring:
|
|
245
|
+
|
|
246
|
+
```python
|
|
247
|
+
finemapping_df = pd.DataFrame({
|
|
248
|
+
"pos": [1000500, 1001200, 1002000, 1003500],
|
|
249
|
+
"pip": [0.85, 0.12, 0.02, 0.45], # Posterior inclusion probability
|
|
250
|
+
"cs": [1, 1, 0, 2], # Credible set assignment (0 = not in CS)
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
fig = plotter.plot_stacked(
|
|
254
|
+
[gwas_df],
|
|
255
|
+
chrom=1, start=1000000, end=2000000,
|
|
256
|
+
finemapping_df=finemapping_df,
|
|
257
|
+
finemapping_cs_col="cs",
|
|
258
|
+
genes_df=genes_df,
|
|
259
|
+
)
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+

|
|
263
|
+
|
|
232
264
|
## PySpark Support
|
|
233
265
|
|
|
234
266
|
For large-scale genomics data, pass PySpark DataFrames directly:
|
|
@@ -245,6 +277,47 @@ pandas_df = to_pandas(spark_gwas_df, sample_size=100000)
|
|
|
245
277
|
|
|
246
278
|
Install PySpark support: `uv add pylocuszoom[spark]`
|
|
247
279
|
|
|
280
|
+
## Loading Data from Files
|
|
281
|
+
|
|
282
|
+
pyLocusZoom includes loaders for common GWAS, eQTL, and fine-mapping file formats:
|
|
283
|
+
|
|
284
|
+
```python
|
|
285
|
+
from pylocuszoom import (
|
|
286
|
+
# GWAS loaders
|
|
287
|
+
load_gwas, # Auto-detect format
|
|
288
|
+
load_plink_assoc, # PLINK .assoc, .assoc.linear, .qassoc
|
|
289
|
+
load_regenie, # REGENIE .regenie
|
|
290
|
+
load_bolt_lmm, # BOLT-LMM .stats
|
|
291
|
+
load_gemma, # GEMMA .assoc.txt
|
|
292
|
+
load_saige, # SAIGE output
|
|
293
|
+
# eQTL loaders
|
|
294
|
+
load_gtex_eqtl, # GTEx significant pairs
|
|
295
|
+
load_eqtl_catalogue, # eQTL Catalogue format
|
|
296
|
+
# Fine-mapping loaders
|
|
297
|
+
load_susie, # SuSiE output
|
|
298
|
+
load_finemap, # FINEMAP .snp output
|
|
299
|
+
# Gene annotations
|
|
300
|
+
load_gtf, # GTF/GFF3 files
|
|
301
|
+
load_bed, # BED files
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# Auto-detect GWAS format from filename
|
|
305
|
+
gwas_df = load_gwas("results.assoc.linear")
|
|
306
|
+
|
|
307
|
+
# Or use specific loader
|
|
308
|
+
gwas_df = load_regenie("ukb_results.regenie")
|
|
309
|
+
|
|
310
|
+
# Load gene annotations
|
|
311
|
+
genes_df = load_gtf("genes.gtf", feature_type="gene")
|
|
312
|
+
exons_df = load_gtf("genes.gtf", feature_type="exon")
|
|
313
|
+
|
|
314
|
+
# Load eQTL data
|
|
315
|
+
eqtl_df = load_gtex_eqtl("GTEx.signif_pairs.txt.gz", gene="BRCA1")
|
|
316
|
+
|
|
317
|
+
# Load fine-mapping results
|
|
318
|
+
fm_df = load_susie("susie_output.tsv")
|
|
319
|
+
```
|
|
320
|
+
|
|
248
321
|
## Data Formats
|
|
249
322
|
|
|
250
323
|
### GWAS Results DataFrame
|
|
@@ -371,6 +444,13 @@ plotter = LocusZoomPlotter(log_level="DEBUG")
|
|
|
371
444
|
Optional:
|
|
372
445
|
- pyspark >= 3.0.0 (for PySpark DataFrame support) - `uv add pylocuszoom[spark]`
|
|
373
446
|
|
|
447
|
+
## Documentation
|
|
448
|
+
|
|
449
|
+
- [User Guide](docs/USER_GUIDE.md) - Comprehensive documentation with API reference
|
|
450
|
+
- [Architecture](docs/ARCHITECTURE.md) - Design decisions and component overview
|
|
451
|
+
- [Example Notebook](examples/getting_started.ipynb) - Interactive tutorial
|
|
452
|
+
- [CHANGELOG](CHANGELOG.md) - Version history
|
|
453
|
+
|
|
374
454
|
## License
|
|
375
455
|
|
|
376
456
|
GPL-3.0-or-later
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
pylocuszoom/__init__.py,sha256=kEfcTSdVSQgP85IdHDqCQ-oEdq_-8n_Rg-xWWtHzKYk,4806
|
|
2
|
+
pylocuszoom/colors.py,sha256=IyzB6x5Q3kkulv-AnYoFVgvibgGgQYE27XjPx99BI5E,6624
|
|
3
|
+
pylocuszoom/eqtl.py,sha256=9lZJ8jT1WEj3won6D9B54xdqUvbRvxpOitf97NCUR28,6167
|
|
4
|
+
pylocuszoom/finemapping.py,sha256=PJ4HJYeCaHZecUmADCEGQxKd9HhhjrdIA1H5LQsUmLI,6332
|
|
5
|
+
pylocuszoom/gene_track.py,sha256=VWvPY0SrVFGJprTdttJ72r3JD-r3bdRDr0HDBai0oJw,18692
|
|
6
|
+
pylocuszoom/labels.py,sha256=Ams5WVZFNVT692BRiQ5wZcdbdNEAm5xtgRwmF5u0s_A,3492
|
|
7
|
+
pylocuszoom/ld.py,sha256=64xIulpDVvbMSryWUPoCQ99Odcjwf1wejpwVr_30MLU,6412
|
|
8
|
+
pylocuszoom/loaders.py,sha256=MK0jUpb09CLMuQYzIY2P1FF3hhtTwemLSiWv4RvLVf8,24350
|
|
9
|
+
pylocuszoom/logging.py,sha256=nZHEkbnjp8zoyWj_S-Hy9UQvUYLoMoxyiOWRozBT2dg,4987
|
|
10
|
+
pylocuszoom/plotter.py,sha256=A7phON4VYrzFZM0CjSlWwMPLYJmjGV1JF1uKHD8Ml2A,44205
|
|
11
|
+
pylocuszoom/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
pylocuszoom/recombination.py,sha256=_W6YVO8a8G8UmGGVda8J_MRI9dOJnffKj8491ILQf3Y,13807
|
|
13
|
+
pylocuszoom/schemas.py,sha256=LRUrtgSYH8CZ7G14VSvSL_Z-p4EQBSv2r5WzyKnROh8,11454
|
|
14
|
+
pylocuszoom/utils.py,sha256=fKNX9WSTbfHR1EpPYijt6ycNjXEjwzunQMHXAvHaK3s,5211
|
|
15
|
+
pylocuszoom/backends/__init__.py,sha256=7dlGvDoqMVK3fCtoMcI9zOP9qO0odQGPwfXhxnLfXfI,1196
|
|
16
|
+
pylocuszoom/backends/base.py,sha256=yLZkr5FRlYHs8L9ViNbTwu8hrBaHoVv_QbMujad2aTc,9793
|
|
17
|
+
pylocuszoom/backends/bokeh_backend.py,sha256=OFx_FISiDFG-A6NXcR8V-2MgkTRq2dXEcpqaWxf0YUg,21528
|
|
18
|
+
pylocuszoom/backends/matplotlib_backend.py,sha256=dUgH3ouQCkh55aufvjNIvkEqMG9oamKKvQYp2AEm4DY,11479
|
|
19
|
+
pylocuszoom/backends/plotly_backend.py,sha256=U3odXYLVCwTC6Xb-NeOs456tlr_qJQxgix7QIjJX-3Q,26922
|
|
20
|
+
pylocuszoom/reference_data/__init__.py,sha256=qqHqAUt1jebGlCN3CjqW3Z-_coHVNo5K3a3bb9o83hA,109
|
|
21
|
+
pylocuszoom-0.5.0.dist-info/METADATA,sha256=_0mgXIY3m1x5ATnQpLfAdRvBjh8iwW_FnX4i-aX4ne8,15228
|
|
22
|
+
pylocuszoom-0.5.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
23
|
+
pylocuszoom-0.5.0.dist-info/licenses/LICENSE.md,sha256=U2y_hv8RcN5lECA3uK88irU3ODUE1TDAPictcmnP0Q4,698
|
|
24
|
+
pylocuszoom-0.5.0.dist-info/RECORD,,
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
pylocuszoom/__init__.py,sha256=f8h22YYL3JkDP5P_dJftu98qlJkXvaAeyto5kVc8bzU,3785
|
|
2
|
-
pylocuszoom/colors.py,sha256=XXTCmCFfHrOrSiP6b0V8Kis7Z1tyvGEKJpdv59QDVQ8,6610
|
|
3
|
-
pylocuszoom/eqtl.py,sha256=9lZJ8jT1WEj3won6D9B54xdqUvbRvxpOitf97NCUR28,6167
|
|
4
|
-
pylocuszoom/finemapping.py,sha256=PJ4HJYeCaHZecUmADCEGQxKd9HhhjrdIA1H5LQsUmLI,6332
|
|
5
|
-
pylocuszoom/gene_track.py,sha256=CbKqIIl-3VpqIS0NWQ7p-VazhrgbF0-XDkkvoWx_2jI,18665
|
|
6
|
-
pylocuszoom/labels.py,sha256=Ams5WVZFNVT692BRiQ5wZcdbdNEAm5xtgRwmF5u0s_A,3492
|
|
7
|
-
pylocuszoom/ld.py,sha256=64xIulpDVvbMSryWUPoCQ99Odcjwf1wejpwVr_30MLU,6412
|
|
8
|
-
pylocuszoom/logging.py,sha256=nZHEkbnjp8zoyWj_S-Hy9UQvUYLoMoxyiOWRozBT2dg,4987
|
|
9
|
-
pylocuszoom/plotter.py,sha256=7wEN0b3emb0SM7gYn8bSjXBGNt7npw3y3y5AEC-Ha2k,43660
|
|
10
|
-
pylocuszoom/recombination.py,sha256=Q2tAft54nJWHlZt-vZje1r_5RP7D8_VUy5ab_deYXVw,13749
|
|
11
|
-
pylocuszoom/utils.py,sha256=fKNX9WSTbfHR1EpPYijt6ycNjXEjwzunQMHXAvHaK3s,5211
|
|
12
|
-
pylocuszoom/backends/__init__.py,sha256=7dlGvDoqMVK3fCtoMcI9zOP9qO0odQGPwfXhxnLfXfI,1196
|
|
13
|
-
pylocuszoom/backends/base.py,sha256=YEYMtaqPRTJQI-TPqK62-XPN6WvjVwqP6e6ydULK6H0,8523
|
|
14
|
-
pylocuszoom/backends/bokeh_backend.py,sha256=oOXTOhSx-tNgBzgtYfYvGUgNmuUP2vhCbtEBOZ5YZ18,18496
|
|
15
|
-
pylocuszoom/backends/matplotlib_backend.py,sha256=TIKaT7x0X3QKYUB5076XlG6RC0zbi0hcm3LSU7LGnmw,8521
|
|
16
|
-
pylocuszoom/backends/plotly_backend.py,sha256=ucQLmcz6WAdEvII5n3_rdHffZv5-a8FOp8nBGBng-hk,22222
|
|
17
|
-
pylocuszoom/reference_data/__init__.py,sha256=qqHqAUt1jebGlCN3CjqW3Z-_coHVNo5K3a3bb9o83hA,109
|
|
18
|
-
pylocuszoom-0.3.0.dist-info/METADATA,sha256=P-JbXF9KxFzixMAYKQalKO4F4TwOqxDZD0B0uciyy_c,12068
|
|
19
|
-
pylocuszoom-0.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
20
|
-
pylocuszoom-0.3.0.dist-info/licenses/LICENSE.md,sha256=U2y_hv8RcN5lECA3uK88irU3ODUE1TDAPictcmnP0Q4,698
|
|
21
|
-
pylocuszoom-0.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|