pylocuszoom 0.2.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +52 -1
- pylocuszoom/backends/base.py +47 -0
- pylocuszoom/backends/bokeh_backend.py +323 -61
- pylocuszoom/backends/matplotlib_backend.py +133 -7
- pylocuszoom/backends/plotly_backend.py +423 -33
- pylocuszoom/colors.py +3 -1
- pylocuszoom/finemapping.py +0 -1
- pylocuszoom/gene_track.py +232 -23
- pylocuszoom/loaders.py +862 -0
- pylocuszoom/plotter.py +354 -245
- pylocuszoom/py.typed +0 -0
- pylocuszoom/recombination.py +4 -4
- pylocuszoom/schemas.py +395 -0
- {pylocuszoom-0.2.0.dist-info → pylocuszoom-0.5.0.dist-info}/METADATA +125 -31
- pylocuszoom-0.5.0.dist-info/RECORD +24 -0
- pylocuszoom-0.2.0.dist-info/RECORD +0 -21
- {pylocuszoom-0.2.0.dist-info → pylocuszoom-0.5.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-0.2.0.dist-info → pylocuszoom-0.5.0.dist-info}/licenses/LICENSE.md +0 -0
pylocuszoom/py.typed
ADDED
|
File without changes
|
pylocuszoom/recombination.py
CHANGED
|
@@ -42,7 +42,7 @@ def _normalize_build(build: Optional[str]) -> Optional[str]:
|
|
|
42
42
|
if build is None:
|
|
43
43
|
return None
|
|
44
44
|
build_lower = build.lower().replace(".", "").replace("_", "")
|
|
45
|
-
if
|
|
45
|
+
if any(x in build_lower for x in ("canfam4", "uucfamgsd")):
|
|
46
46
|
return "canfam4"
|
|
47
47
|
if "canfam3" in build_lower:
|
|
48
48
|
return "canfam3"
|
|
@@ -158,9 +158,9 @@ def get_default_data_dir() -> Path:
|
|
|
158
158
|
"""Get default directory for recombination map data.
|
|
159
159
|
|
|
160
160
|
Returns platform-appropriate cache directory:
|
|
161
|
-
- macOS:
|
|
162
|
-
- Linux: ~/.cache/snp-scope-plot
|
|
161
|
+
- macOS/Linux: ~/.cache/snp-scope-plot (or $XDG_CACHE_HOME if set)
|
|
163
162
|
- Windows: %LOCALAPPDATA%/snp-scope-plot
|
|
163
|
+
- Databricks: /dbfs/FileStore/reference_data/recombination_maps
|
|
164
164
|
"""
|
|
165
165
|
if os.name == "nt": # Windows
|
|
166
166
|
base = Path(os.environ.get("LOCALAPPDATA", Path.home()))
|
|
@@ -207,7 +207,7 @@ def download_canine_recombination_maps(
|
|
|
207
207
|
# Check if already downloaded
|
|
208
208
|
if output_path.exists() and not force:
|
|
209
209
|
existing_files = list(output_path.glob("chr*_recomb.tsv"))
|
|
210
|
-
if len(existing_files) >=
|
|
210
|
+
if len(existing_files) >= 39: # 38 autosomes + X
|
|
211
211
|
return output_path
|
|
212
212
|
|
|
213
213
|
# Create output directory
|
pylocuszoom/schemas.py
ADDED
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
"""Pydantic validation schemas for loaded data.
|
|
2
|
+
|
|
3
|
+
Provides validation models for GWAS, eQTL, fine-mapping, and gene annotation
|
|
4
|
+
DataFrames to ensure data quality before plotting.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional, Union
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from pydantic import BaseModel, ConfigDict, field_validator, model_validator
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LoaderValidationError(Exception):
|
|
15
|
+
"""Raised when loaded data fails validation."""
|
|
16
|
+
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# =============================================================================
|
|
21
|
+
# GWAS Validation
|
|
22
|
+
# =============================================================================
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class GWASRowModel(BaseModel):
|
|
26
|
+
"""Validation model for a single GWAS row."""
|
|
27
|
+
|
|
28
|
+
model_config = ConfigDict(extra="allow")
|
|
29
|
+
|
|
30
|
+
ps: int
|
|
31
|
+
p_wald: float
|
|
32
|
+
rs: Optional[str] = None
|
|
33
|
+
chr: Optional[Union[str, int]] = None
|
|
34
|
+
|
|
35
|
+
@field_validator("ps")
|
|
36
|
+
@classmethod
|
|
37
|
+
def position_positive(cls, v: int) -> int:
|
|
38
|
+
"""Position must be positive."""
|
|
39
|
+
if v <= 0:
|
|
40
|
+
raise ValueError(f"Position must be positive, got {v}")
|
|
41
|
+
return v
|
|
42
|
+
|
|
43
|
+
@field_validator("p_wald")
|
|
44
|
+
@classmethod
|
|
45
|
+
def pvalue_in_range(cls, v: float) -> float:
|
|
46
|
+
"""P-value must be between 0 and 1."""
|
|
47
|
+
if not (0 < v <= 1):
|
|
48
|
+
raise ValueError(f"P-value must be in range (0, 1], got {v}")
|
|
49
|
+
return v
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def validate_gwas_dataframe(
|
|
53
|
+
df: pd.DataFrame,
|
|
54
|
+
pos_col: str = "ps",
|
|
55
|
+
p_col: str = "p_wald",
|
|
56
|
+
rs_col: str = "rs",
|
|
57
|
+
strict: bool = False,
|
|
58
|
+
) -> pd.DataFrame:
|
|
59
|
+
"""Validate a GWAS DataFrame.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
df: DataFrame to validate.
|
|
63
|
+
pos_col: Column name for position.
|
|
64
|
+
p_col: Column name for p-value.
|
|
65
|
+
rs_col: Column name for SNP ID.
|
|
66
|
+
strict: If True, validate every row. If False (default), validate schema only.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Validated DataFrame.
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
LoaderValidationError: If validation fails.
|
|
73
|
+
"""
|
|
74
|
+
errors = []
|
|
75
|
+
|
|
76
|
+
# Check required columns exist
|
|
77
|
+
if pos_col not in df.columns:
|
|
78
|
+
errors.append(f"Missing required column: '{pos_col}'")
|
|
79
|
+
if p_col not in df.columns:
|
|
80
|
+
errors.append(f"Missing required column: '{p_col}'")
|
|
81
|
+
|
|
82
|
+
if errors:
|
|
83
|
+
raise LoaderValidationError(
|
|
84
|
+
"GWAS validation failed:\n - " + "\n - ".join(errors)
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Check data types
|
|
88
|
+
if not pd.api.types.is_numeric_dtype(df[pos_col]):
|
|
89
|
+
errors.append(f"Column '{pos_col}' must be numeric, got {df[pos_col].dtype}")
|
|
90
|
+
|
|
91
|
+
if not pd.api.types.is_numeric_dtype(df[p_col]):
|
|
92
|
+
errors.append(f"Column '{p_col}' must be numeric, got {df[p_col].dtype}")
|
|
93
|
+
|
|
94
|
+
# Check value ranges
|
|
95
|
+
if (df[pos_col] <= 0).any():
|
|
96
|
+
n_invalid = (df[pos_col] <= 0).sum()
|
|
97
|
+
errors.append(f"Column '{pos_col}' has {n_invalid} non-positive values")
|
|
98
|
+
|
|
99
|
+
if ((df[p_col] <= 0) | (df[p_col] > 1)).any():
|
|
100
|
+
n_invalid = ((df[p_col] <= 0) | (df[p_col] > 1)).sum()
|
|
101
|
+
errors.append(f"Column '{p_col}' has {n_invalid} values outside range (0, 1]")
|
|
102
|
+
|
|
103
|
+
# Check for NaN in required columns
|
|
104
|
+
if df[pos_col].isna().any():
|
|
105
|
+
n_na = df[pos_col].isna().sum()
|
|
106
|
+
errors.append(f"Column '{pos_col}' has {n_na} missing values")
|
|
107
|
+
|
|
108
|
+
if df[p_col].isna().any():
|
|
109
|
+
n_na = df[p_col].isna().sum()
|
|
110
|
+
errors.append(f"Column '{p_col}' has {n_na} missing values")
|
|
111
|
+
|
|
112
|
+
if errors:
|
|
113
|
+
raise LoaderValidationError(
|
|
114
|
+
"GWAS validation failed:\n - " + "\n - ".join(errors)
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
return df
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# =============================================================================
|
|
121
|
+
# eQTL Validation
|
|
122
|
+
# =============================================================================
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class EQTLRowModel(BaseModel):
|
|
126
|
+
"""Validation model for a single eQTL row."""
|
|
127
|
+
|
|
128
|
+
model_config = ConfigDict(extra="allow")
|
|
129
|
+
|
|
130
|
+
pos: int
|
|
131
|
+
p_value: float
|
|
132
|
+
gene: str
|
|
133
|
+
effect: Optional[float] = None
|
|
134
|
+
|
|
135
|
+
@field_validator("pos")
|
|
136
|
+
@classmethod
|
|
137
|
+
def position_positive(cls, v: int) -> int:
|
|
138
|
+
"""Position must be positive."""
|
|
139
|
+
if v <= 0:
|
|
140
|
+
raise ValueError(f"Position must be positive, got {v}")
|
|
141
|
+
return v
|
|
142
|
+
|
|
143
|
+
@field_validator("p_value")
|
|
144
|
+
@classmethod
|
|
145
|
+
def pvalue_in_range(cls, v: float) -> float:
|
|
146
|
+
"""P-value must be between 0 and 1."""
|
|
147
|
+
if not (0 < v <= 1):
|
|
148
|
+
raise ValueError(f"P-value must be in range (0, 1], got {v}")
|
|
149
|
+
return v
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def validate_eqtl_dataframe(
|
|
153
|
+
df: pd.DataFrame,
|
|
154
|
+
strict: bool = False,
|
|
155
|
+
) -> pd.DataFrame:
|
|
156
|
+
"""Validate an eQTL DataFrame.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
df: DataFrame to validate.
|
|
160
|
+
strict: If True, validate every row.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
Validated DataFrame.
|
|
164
|
+
|
|
165
|
+
Raises:
|
|
166
|
+
LoaderValidationError: If validation fails.
|
|
167
|
+
"""
|
|
168
|
+
errors = []
|
|
169
|
+
|
|
170
|
+
# Check required columns
|
|
171
|
+
required = ["pos", "p_value", "gene"]
|
|
172
|
+
for col in required:
|
|
173
|
+
if col not in df.columns:
|
|
174
|
+
errors.append(f"Missing required column: '{col}'")
|
|
175
|
+
|
|
176
|
+
if errors:
|
|
177
|
+
raise LoaderValidationError(
|
|
178
|
+
"eQTL validation failed:\n - " + "\n - ".join(errors)
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Check data types and ranges
|
|
182
|
+
if not pd.api.types.is_numeric_dtype(df["pos"]):
|
|
183
|
+
errors.append(f"Column 'pos' must be numeric, got {df['pos'].dtype}")
|
|
184
|
+
elif (df["pos"] <= 0).any():
|
|
185
|
+
n_invalid = (df["pos"] <= 0).sum()
|
|
186
|
+
errors.append(f"Column 'pos' has {n_invalid} non-positive values")
|
|
187
|
+
|
|
188
|
+
if not pd.api.types.is_numeric_dtype(df["p_value"]):
|
|
189
|
+
errors.append(f"Column 'p_value' must be numeric, got {df['p_value'].dtype}")
|
|
190
|
+
elif ((df["p_value"] <= 0) | (df["p_value"] > 1)).any():
|
|
191
|
+
n_invalid = ((df["p_value"] <= 0) | (df["p_value"] > 1)).sum()
|
|
192
|
+
errors.append(f"Column 'p_value' has {n_invalid} values outside range (0, 1]")
|
|
193
|
+
|
|
194
|
+
if errors:
|
|
195
|
+
raise LoaderValidationError(
|
|
196
|
+
"eQTL validation failed:\n - " + "\n - ".join(errors)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
return df
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# =============================================================================
|
|
203
|
+
# Fine-mapping Validation
|
|
204
|
+
# =============================================================================
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class FinemappingRowModel(BaseModel):
|
|
208
|
+
"""Validation model for a single fine-mapping row."""
|
|
209
|
+
|
|
210
|
+
model_config = ConfigDict(extra="allow")
|
|
211
|
+
|
|
212
|
+
pos: int
|
|
213
|
+
pip: float
|
|
214
|
+
cs: Optional[int] = None
|
|
215
|
+
|
|
216
|
+
@field_validator("pos")
|
|
217
|
+
@classmethod
|
|
218
|
+
def position_positive(cls, v: int) -> int:
|
|
219
|
+
"""Position must be positive."""
|
|
220
|
+
if v <= 0:
|
|
221
|
+
raise ValueError(f"Position must be positive, got {v}")
|
|
222
|
+
return v
|
|
223
|
+
|
|
224
|
+
@field_validator("pip")
|
|
225
|
+
@classmethod
|
|
226
|
+
def pip_in_range(cls, v: float) -> float:
|
|
227
|
+
"""PIP must be between 0 and 1."""
|
|
228
|
+
if not (0 <= v <= 1):
|
|
229
|
+
raise ValueError(f"PIP must be in range [0, 1], got {v}")
|
|
230
|
+
return v
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def validate_finemapping_dataframe(
|
|
234
|
+
df: pd.DataFrame,
|
|
235
|
+
cs_col: str = "cs",
|
|
236
|
+
strict: bool = False,
|
|
237
|
+
) -> pd.DataFrame:
|
|
238
|
+
"""Validate a fine-mapping DataFrame.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
df: DataFrame to validate.
|
|
242
|
+
cs_col: Column name for credible set.
|
|
243
|
+
strict: If True, validate every row.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
Validated DataFrame.
|
|
247
|
+
|
|
248
|
+
Raises:
|
|
249
|
+
LoaderValidationError: If validation fails.
|
|
250
|
+
"""
|
|
251
|
+
errors = []
|
|
252
|
+
|
|
253
|
+
# Check required columns
|
|
254
|
+
if "pos" not in df.columns:
|
|
255
|
+
errors.append("Missing required column: 'pos'")
|
|
256
|
+
if "pip" not in df.columns:
|
|
257
|
+
errors.append("Missing required column: 'pip'")
|
|
258
|
+
|
|
259
|
+
if errors:
|
|
260
|
+
raise LoaderValidationError(
|
|
261
|
+
"Fine-mapping validation failed:\n - " + "\n - ".join(errors)
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Check data types and ranges
|
|
265
|
+
if not pd.api.types.is_numeric_dtype(df["pos"]):
|
|
266
|
+
errors.append(f"Column 'pos' must be numeric, got {df['pos'].dtype}")
|
|
267
|
+
elif (df["pos"] <= 0).any():
|
|
268
|
+
n_invalid = (df["pos"] <= 0).sum()
|
|
269
|
+
errors.append(f"Column 'pos' has {n_invalid} non-positive values")
|
|
270
|
+
|
|
271
|
+
if not pd.api.types.is_numeric_dtype(df["pip"]):
|
|
272
|
+
errors.append(f"Column 'pip' must be numeric, got {df['pip'].dtype}")
|
|
273
|
+
elif ((df["pip"] < 0) | (df["pip"] > 1)).any():
|
|
274
|
+
n_invalid = ((df["pip"] < 0) | (df["pip"] > 1)).sum()
|
|
275
|
+
errors.append(f"Column 'pip' has {n_invalid} values outside range [0, 1]")
|
|
276
|
+
|
|
277
|
+
if errors:
|
|
278
|
+
raise LoaderValidationError(
|
|
279
|
+
"Fine-mapping validation failed:\n - " + "\n - ".join(errors)
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
return df
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
# =============================================================================
|
|
286
|
+
# Gene Annotation Validation
|
|
287
|
+
# =============================================================================
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
class GeneRowModel(BaseModel):
|
|
291
|
+
"""Validation model for a single gene annotation row."""
|
|
292
|
+
|
|
293
|
+
model_config = ConfigDict(extra="allow")
|
|
294
|
+
|
|
295
|
+
chr: Union[str, int]
|
|
296
|
+
start: int
|
|
297
|
+
end: int
|
|
298
|
+
gene_name: str
|
|
299
|
+
strand: Optional[str] = None
|
|
300
|
+
|
|
301
|
+
@field_validator("start", "end")
|
|
302
|
+
@classmethod
|
|
303
|
+
def position_positive(cls, v: int) -> int:
|
|
304
|
+
"""Position must be positive."""
|
|
305
|
+
if v < 0:
|
|
306
|
+
raise ValueError(f"Position must be non-negative, got {v}")
|
|
307
|
+
return v
|
|
308
|
+
|
|
309
|
+
@model_validator(mode="after")
|
|
310
|
+
def start_before_end(self):
|
|
311
|
+
"""Start must be <= end."""
|
|
312
|
+
if self.start > self.end:
|
|
313
|
+
raise ValueError(f"Start ({self.start}) must be <= end ({self.end})")
|
|
314
|
+
return self
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def validate_genes_dataframe(
|
|
318
|
+
df: pd.DataFrame,
|
|
319
|
+
strict: bool = False,
|
|
320
|
+
) -> pd.DataFrame:
|
|
321
|
+
"""Validate a genes DataFrame.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
df: DataFrame to validate.
|
|
325
|
+
strict: If True, validate every row.
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
Validated DataFrame.
|
|
329
|
+
|
|
330
|
+
Raises:
|
|
331
|
+
LoaderValidationError: If validation fails.
|
|
332
|
+
"""
|
|
333
|
+
errors = []
|
|
334
|
+
|
|
335
|
+
# Check required columns
|
|
336
|
+
required = ["chr", "start", "end", "gene_name"]
|
|
337
|
+
for col in required:
|
|
338
|
+
if col not in df.columns:
|
|
339
|
+
errors.append(f"Missing required column: '{col}'")
|
|
340
|
+
|
|
341
|
+
if errors:
|
|
342
|
+
raise LoaderValidationError(
|
|
343
|
+
"Gene annotation validation failed:\n - " + "\n - ".join(errors)
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
# Check data types
|
|
347
|
+
if not pd.api.types.is_numeric_dtype(df["start"]):
|
|
348
|
+
errors.append(f"Column 'start' must be numeric, got {df['start'].dtype}")
|
|
349
|
+
|
|
350
|
+
if not pd.api.types.is_numeric_dtype(df["end"]):
|
|
351
|
+
errors.append(f"Column 'end' must be numeric, got {df['end'].dtype}")
|
|
352
|
+
|
|
353
|
+
# Check ranges
|
|
354
|
+
if (df["start"] < 0).any():
|
|
355
|
+
n_invalid = (df["start"] < 0).sum()
|
|
356
|
+
errors.append(f"Column 'start' has {n_invalid} negative values")
|
|
357
|
+
|
|
358
|
+
if (df["end"] < df["start"]).any():
|
|
359
|
+
n_invalid = (df["end"] < df["start"]).sum()
|
|
360
|
+
errors.append(f"Found {n_invalid} genes where end < start")
|
|
361
|
+
|
|
362
|
+
if errors:
|
|
363
|
+
raise LoaderValidationError(
|
|
364
|
+
"Gene annotation validation failed:\n - " + "\n - ".join(errors)
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
return df
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
# =============================================================================
|
|
371
|
+
# File Path Validation
|
|
372
|
+
# =============================================================================
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def validate_file_path(filepath: Union[str, Path]) -> Path:
|
|
376
|
+
"""Validate that a file path exists and is readable.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
filepath: Path to validate.
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Validated Path object.
|
|
383
|
+
|
|
384
|
+
Raises:
|
|
385
|
+
LoaderValidationError: If file doesn't exist or isn't readable.
|
|
386
|
+
"""
|
|
387
|
+
path = Path(filepath)
|
|
388
|
+
|
|
389
|
+
if not path.exists():
|
|
390
|
+
raise LoaderValidationError(f"File not found: {path}")
|
|
391
|
+
|
|
392
|
+
if not path.is_file():
|
|
393
|
+
raise LoaderValidationError(f"Not a file: {path}")
|
|
394
|
+
|
|
395
|
+
return path
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pylocuszoom
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: Publication-ready regional association plots with LD coloring, gene tracks, and recombination overlays
|
|
5
5
|
Project-URL: Homepage, https://github.com/michael-denyer/pylocuszoom
|
|
6
6
|
Project-URL: Documentation, https://github.com/michael-denyer/pylocuszoom#readme
|
|
7
7
|
Project-URL: Repository, https://github.com/michael-denyer/pylocuszoom
|
|
@@ -19,6 +19,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
19
19
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
20
20
|
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
21
21
|
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: adjusttext>=0.8
|
|
22
23
|
Requires-Dist: bokeh>=3.8.2
|
|
23
24
|
Requires-Dist: kaleido>=0.2.0
|
|
24
25
|
Requires-Dist: loguru>=0.7.0
|
|
@@ -26,63 +27,72 @@ Requires-Dist: matplotlib>=3.5.0
|
|
|
26
27
|
Requires-Dist: numpy>=1.21.0
|
|
27
28
|
Requires-Dist: pandas>=1.4.0
|
|
28
29
|
Requires-Dist: plotly>=5.0.0
|
|
30
|
+
Requires-Dist: pydantic>=2.0.0
|
|
29
31
|
Requires-Dist: pyliftover>=0.4
|
|
30
32
|
Provides-Extra: all
|
|
31
|
-
Requires-Dist: adjusttext>=0.8; extra == 'all'
|
|
32
33
|
Requires-Dist: pyspark>=3.0.0; extra == 'all'
|
|
33
34
|
Provides-Extra: dev
|
|
34
35
|
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
35
36
|
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
36
37
|
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
37
|
-
Provides-Extra: labels
|
|
38
|
-
Requires-Dist: adjusttext>=0.8; extra == 'labels'
|
|
39
38
|
Provides-Extra: spark
|
|
40
39
|
Requires-Dist: pyspark>=3.0.0; extra == 'spark'
|
|
41
40
|
Description-Content-Type: text/markdown
|
|
42
41
|
|
|
43
|
-
# pyLocusZoom
|
|
44
|
-
|
|
45
42
|
[](https://github.com/michael-denyer/pyLocusZoom/actions/workflows/ci.yml)
|
|
46
|
-
[](https://codecov.io/gh/michael-denyer/pyLocusZoom)
|
|
44
|
+
[](https://pypi.org/project/pylocuszoom/)
|
|
45
|
+
[](https://anaconda.org/bioconda/pylocuszoom)
|
|
46
|
+
[](https://www.gnu.org/licenses/gpl-3.0)
|
|
47
47
|
[](https://www.python.org/downloads/)
|
|
48
48
|
[](https://github.com/astral-sh/ruff)
|
|
49
|
-
|
|
50
49
|
[](https://matplotlib.org/)
|
|
51
50
|
[](https://plotly.com/python/)
|
|
52
51
|
[](https://bokeh.org/)
|
|
53
52
|
[](https://pandas.pydata.org/)
|
|
54
|
-
|
|
55
53
|
<img src="logo.svg" alt="pyLocusZoom logo" width="120" align="right">
|
|
54
|
+
# pyLocusZoom
|
|
56
55
|
|
|
57
|
-
|
|
56
|
+
Publication-ready regional association plots with LD coloring, gene tracks, and recombination overlays.
|
|
58
57
|
|
|
59
58
|
Inspired by [LocusZoom](http://locuszoom.org/) and [locuszoomr](https://github.com/myles-lewis/locuszoomr).
|
|
60
59
|
|
|
61
60
|
## Features
|
|
62
61
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
- **
|
|
66
|
-
- **
|
|
67
|
-
- **
|
|
68
|
-
- **
|
|
69
|
-
- **
|
|
70
|
-
- **
|
|
71
|
-
- **eQTL overlay**: Expression QTL data as separate panel
|
|
72
|
-
- **PySpark support**: Handles large-scale genomics DataFrames
|
|
62
|
+
1. **Regional association plot**:
|
|
63
|
+
|
|
64
|
+
- **Multi-species support**: Built-in reference data for *Canis lupus familiaris* (CanFam3.1/CanFam4) and *Felis catus* (FelCat9), or optionally provide your own for any species
|
|
65
|
+
- **LD coloring**: SNPs colored by linkage disequilibrium (R²) with lead variant
|
|
66
|
+
- **Gene tracks**: Annotated gene/exon positions below the association plot
|
|
67
|
+
- **Recombination rate**: Overlay showing recombination rate across region (*Canis lupus familiaris* only)
|
|
68
|
+
- **SNP labels (matplotlib)**: Automatic labeling of lead SNPs with RS ID
|
|
69
|
+
- **Tooltips (Bokeh and Plotly)**: Mouseover for detailed SNP data
|
|
73
70
|
|
|
74
71
|

|
|
75
72
|
|
|
73
|
+
2. **Stacked plots**: Compare multiple GWAS/phenotypes vertically
|
|
74
|
+
3. **eQTL plot**: Expression QTL data aligned with association plots and gene tracks
|
|
75
|
+
4. **Fine-mapping plots**: Visualize SuSiE credible sets with posterior inclusion probabilities
|
|
76
|
+
5. **Multiple charting libraries**: matplotlib (static), plotly (interactive), bokeh (dashboards)
|
|
77
|
+
6. **Pandas and PySpark support**: Works with both Pandas and PySpark DataFrames for large-scale genomics data
|
|
78
|
+
7. **Convenience data file loaders**: Load and validate common GWAS, eQTL and fine-mapping file formats
|
|
79
|
+
|
|
76
80
|
## Installation
|
|
77
81
|
|
|
82
|
+
```bash
|
|
83
|
+
pip install pylocuszoom
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Or with uv:
|
|
87
|
+
|
|
78
88
|
```bash
|
|
79
89
|
uv add pylocuszoom
|
|
80
90
|
```
|
|
81
91
|
|
|
82
|
-
Or with
|
|
92
|
+
Or with conda (Bioconda):
|
|
83
93
|
|
|
84
94
|
```bash
|
|
85
|
-
|
|
95
|
+
conda install -c bioconda pylocuszoom
|
|
86
96
|
```
|
|
87
97
|
|
|
88
98
|
## Quick Start
|
|
@@ -165,20 +175,30 @@ fig = plotter.plot(
|
|
|
165
175
|
)
|
|
166
176
|
```
|
|
167
177
|
|
|
168
|
-
##
|
|
178
|
+
## Backends
|
|
169
179
|
|
|
170
|
-
|
|
180
|
+
pyLocusZoom supports multiple rendering backends:
|
|
171
181
|
|
|
172
182
|
```python
|
|
173
|
-
# Static publication-quality plot (default
|
|
174
|
-
|
|
175
|
-
fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000)
|
|
183
|
+
# Static publication-quality plot (default)
|
|
184
|
+
fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000, backend="matplotlib")
|
|
176
185
|
fig.savefig("plot.png", dpi=150)
|
|
186
|
+
|
|
187
|
+
# Interactive Plotly (hover tooltips, pan/zoom)
|
|
188
|
+
fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000, backend="plotly")
|
|
189
|
+
fig.write_html("plot.html")
|
|
190
|
+
|
|
191
|
+
# Interactive Bokeh (dashboard-ready)
|
|
192
|
+
fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000, backend="bokeh")
|
|
177
193
|
```
|
|
178
194
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
195
|
+
| Backend | Output | Best For | Features |
|
|
196
|
+
|---------|--------|----------|----------|
|
|
197
|
+
| `matplotlib` | Static PNG/PDF/SVG | Publications, presentations | Full feature set with SNP labels |
|
|
198
|
+
| `plotly` | Interactive HTML | Web reports, data exploration | Hover tooltips, pan/zoom |
|
|
199
|
+
| `bokeh` | Interactive HTML | Dashboards, web apps | Hover tooltips, pan/zoom |
|
|
200
|
+
|
|
201
|
+
> **Note:** All backends support scatter plots, gene tracks, recombination overlay, and LD legend. SNP labels (auto-positioned with adjustText) are matplotlib-only; interactive backends use hover tooltips instead.
|
|
182
202
|
|
|
183
203
|
## Stacked Plots
|
|
184
204
|
|
|
@@ -195,6 +215,8 @@ fig = plotter.plot_stacked(
|
|
|
195
215
|
)
|
|
196
216
|
```
|
|
197
217
|
|
|
218
|
+

|
|
219
|
+
|
|
198
220
|
## eQTL Overlay
|
|
199
221
|
|
|
200
222
|
Add expression QTL data as a separate panel:
|
|
@@ -215,6 +237,30 @@ fig = plotter.plot_stacked(
|
|
|
215
237
|
)
|
|
216
238
|
```
|
|
217
239
|
|
|
240
|
+

|
|
241
|
+
|
|
242
|
+
## Fine-mapping Visualization
|
|
243
|
+
|
|
244
|
+
Visualize SuSiE or other fine-mapping results with credible set coloring:
|
|
245
|
+
|
|
246
|
+
```python
|
|
247
|
+
finemapping_df = pd.DataFrame({
|
|
248
|
+
"pos": [1000500, 1001200, 1002000, 1003500],
|
|
249
|
+
"pip": [0.85, 0.12, 0.02, 0.45], # Posterior inclusion probability
|
|
250
|
+
"cs": [1, 1, 0, 2], # Credible set assignment (0 = not in CS)
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
fig = plotter.plot_stacked(
|
|
254
|
+
[gwas_df],
|
|
255
|
+
chrom=1, start=1000000, end=2000000,
|
|
256
|
+
finemapping_df=finemapping_df,
|
|
257
|
+
finemapping_cs_col="cs",
|
|
258
|
+
genes_df=genes_df,
|
|
259
|
+
)
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+

|
|
263
|
+
|
|
218
264
|
## PySpark Support
|
|
219
265
|
|
|
220
266
|
For large-scale genomics data, pass PySpark DataFrames directly:
|
|
@@ -231,6 +277,47 @@ pandas_df = to_pandas(spark_gwas_df, sample_size=100000)
|
|
|
231
277
|
|
|
232
278
|
Install PySpark support: `uv add pylocuszoom[spark]`
|
|
233
279
|
|
|
280
|
+
## Loading Data from Files
|
|
281
|
+
|
|
282
|
+
pyLocusZoom includes loaders for common GWAS, eQTL, and fine-mapping file formats:
|
|
283
|
+
|
|
284
|
+
```python
|
|
285
|
+
from pylocuszoom import (
|
|
286
|
+
# GWAS loaders
|
|
287
|
+
load_gwas, # Auto-detect format
|
|
288
|
+
load_plink_assoc, # PLINK .assoc, .assoc.linear, .qassoc
|
|
289
|
+
load_regenie, # REGENIE .regenie
|
|
290
|
+
load_bolt_lmm, # BOLT-LMM .stats
|
|
291
|
+
load_gemma, # GEMMA .assoc.txt
|
|
292
|
+
load_saige, # SAIGE output
|
|
293
|
+
# eQTL loaders
|
|
294
|
+
load_gtex_eqtl, # GTEx significant pairs
|
|
295
|
+
load_eqtl_catalogue, # eQTL Catalogue format
|
|
296
|
+
# Fine-mapping loaders
|
|
297
|
+
load_susie, # SuSiE output
|
|
298
|
+
load_finemap, # FINEMAP .snp output
|
|
299
|
+
# Gene annotations
|
|
300
|
+
load_gtf, # GTF/GFF3 files
|
|
301
|
+
load_bed, # BED files
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# Auto-detect GWAS format from filename
|
|
305
|
+
gwas_df = load_gwas("results.assoc.linear")
|
|
306
|
+
|
|
307
|
+
# Or use specific loader
|
|
308
|
+
gwas_df = load_regenie("ukb_results.regenie")
|
|
309
|
+
|
|
310
|
+
# Load gene annotations
|
|
311
|
+
genes_df = load_gtf("genes.gtf", feature_type="gene")
|
|
312
|
+
exons_df = load_gtf("genes.gtf", feature_type="exon")
|
|
313
|
+
|
|
314
|
+
# Load eQTL data
|
|
315
|
+
eqtl_df = load_gtex_eqtl("GTEx.signif_pairs.txt.gz", gene="BRCA1")
|
|
316
|
+
|
|
317
|
+
# Load fine-mapping results
|
|
318
|
+
fm_df = load_susie("susie_output.tsv")
|
|
319
|
+
```
|
|
320
|
+
|
|
234
321
|
## Data Formats
|
|
235
322
|
|
|
236
323
|
### GWAS Results DataFrame
|
|
@@ -357,6 +444,13 @@ plotter = LocusZoomPlotter(log_level="DEBUG")
|
|
|
357
444
|
Optional:
|
|
358
445
|
- pyspark >= 3.0.0 (for PySpark DataFrame support) - `uv add pylocuszoom[spark]`
|
|
359
446
|
|
|
447
|
+
## Documentation
|
|
448
|
+
|
|
449
|
+
- [User Guide](docs/USER_GUIDE.md) - Comprehensive documentation with API reference
|
|
450
|
+
- [Architecture](docs/ARCHITECTURE.md) - Design decisions and component overview
|
|
451
|
+
- [Example Notebook](examples/getting_started.ipynb) - Interactive tutorial
|
|
452
|
+
- [CHANGELOG](CHANGELOG.md) - Version history
|
|
453
|
+
|
|
360
454
|
## License
|
|
361
455
|
|
|
362
456
|
GPL-3.0-or-later
|