pylocuszoom 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +120 -0
- pylocuszoom/backends/__init__.py +52 -0
- pylocuszoom/backends/base.py +341 -0
- pylocuszoom/backends/bokeh_backend.py +441 -0
- pylocuszoom/backends/matplotlib_backend.py +288 -0
- pylocuszoom/backends/plotly_backend.py +474 -0
- pylocuszoom/colors.py +107 -0
- pylocuszoom/eqtl.py +218 -0
- pylocuszoom/gene_track.py +311 -0
- pylocuszoom/labels.py +118 -0
- pylocuszoom/ld.py +209 -0
- pylocuszoom/logging.py +153 -0
- pylocuszoom/plotter.py +733 -0
- pylocuszoom/recombination.py +432 -0
- pylocuszoom/reference_data/__init__.py +4 -0
- pylocuszoom/utils.py +194 -0
- pylocuszoom-0.1.0.dist-info/METADATA +367 -0
- pylocuszoom-0.1.0.dist-info/RECORD +20 -0
- pylocuszoom-0.1.0.dist-info/WHEEL +4 -0
- pylocuszoom-0.1.0.dist-info/licenses/LICENSE.md +17 -0
pylocuszoom/ld.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""LD (Linkage Disequilibrium) calculation using PLINK.
|
|
2
|
+
|
|
3
|
+
Calculates R² values between a lead SNP and all other SNPs in a region
|
|
4
|
+
using PLINK 1.9's --r2 command.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import shutil
|
|
9
|
+
import subprocess
|
|
10
|
+
import tempfile
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
from .logging import logger
|
|
16
|
+
from .utils import validate_plink_files
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def find_plink() -> Optional[str]:
|
|
20
|
+
"""Find PLINK executable on PATH.
|
|
21
|
+
|
|
22
|
+
Checks for plink1.9 first, then plink.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Path to PLINK executable, or None if not found.
|
|
26
|
+
"""
|
|
27
|
+
for name in ["plink1.9", "plink"]:
|
|
28
|
+
path = shutil.which(name)
|
|
29
|
+
if path:
|
|
30
|
+
return path
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def build_ld_command(
|
|
35
|
+
plink_path: str,
|
|
36
|
+
bfile_path: str,
|
|
37
|
+
lead_snp: str,
|
|
38
|
+
output_path: str,
|
|
39
|
+
window_kb: int = 500,
|
|
40
|
+
ld_window_r2: float = 0.0,
|
|
41
|
+
species: str = "dog",
|
|
42
|
+
threads: Optional[int] = None,
|
|
43
|
+
) -> list:
|
|
44
|
+
"""Build PLINK command for LD calculation.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
plink_path: Path to PLINK executable.
|
|
48
|
+
bfile_path: Input binary fileset prefix (.bed/.bim/.fam).
|
|
49
|
+
lead_snp: SNP ID to calculate LD against.
|
|
50
|
+
output_path: Output prefix (creates .ld file).
|
|
51
|
+
window_kb: Window size in kilobases.
|
|
52
|
+
ld_window_r2: Minimum R² to report (0.0 reports all).
|
|
53
|
+
species: Species flag for PLINK ('dog', 'cat', or None for human).
|
|
54
|
+
threads: Number of threads (auto-detect if None).
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
List of command arguments for subprocess.
|
|
58
|
+
"""
|
|
59
|
+
cmd = [plink_path]
|
|
60
|
+
|
|
61
|
+
# Species flag
|
|
62
|
+
if species == "dog":
|
|
63
|
+
cmd.append("--dog")
|
|
64
|
+
elif species == "cat":
|
|
65
|
+
# PLINK doesn't have --cat, use --chr-set for 18 autosomes + X
|
|
66
|
+
cmd.extend(["--chr-set", "18"])
|
|
67
|
+
|
|
68
|
+
# Input and output
|
|
69
|
+
cmd.extend(["--bfile", bfile_path])
|
|
70
|
+
cmd.extend(["--out", output_path])
|
|
71
|
+
|
|
72
|
+
# LD calculation flags
|
|
73
|
+
cmd.append("--r2")
|
|
74
|
+
cmd.extend(["--ld-snp", lead_snp])
|
|
75
|
+
cmd.extend(["--ld-window-kb", str(window_kb)])
|
|
76
|
+
cmd.extend(["--ld-window", "99999"]) # Remove default 10 SNP limit
|
|
77
|
+
cmd.extend(["--ld-window-r2", str(ld_window_r2)])
|
|
78
|
+
|
|
79
|
+
# Threads
|
|
80
|
+
if threads is None:
|
|
81
|
+
threads = os.cpu_count() or 1
|
|
82
|
+
cmd.extend(["--threads", str(threads)])
|
|
83
|
+
|
|
84
|
+
return cmd
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def parse_ld_output(ld_file: str, lead_snp: str) -> pd.DataFrame:
|
|
88
|
+
"""Parse PLINK .ld output file.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
ld_file: Path to .ld output file.
|
|
92
|
+
lead_snp: SNP ID of the lead variant.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
DataFrame with columns: SNP, R2.
|
|
96
|
+
"""
|
|
97
|
+
if not os.path.exists(ld_file):
|
|
98
|
+
return pd.DataFrame(columns=["SNP", "R2"])
|
|
99
|
+
|
|
100
|
+
# PLINK outputs whitespace-separated: CHR_A BP_A SNP_A CHR_B BP_B SNP_B R2
|
|
101
|
+
ld_df = pd.read_csv(ld_file, sep=r"\s+")
|
|
102
|
+
|
|
103
|
+
if ld_df.empty:
|
|
104
|
+
return pd.DataFrame(columns=["SNP", "R2"])
|
|
105
|
+
|
|
106
|
+
# We want SNP_B (the other SNPs) and their R2 with lead SNP (SNP_A)
|
|
107
|
+
result = ld_df[["SNP_B", "R2"]].rename(columns={"SNP_B": "SNP"})
|
|
108
|
+
|
|
109
|
+
# Add the lead SNP itself with R2=1.0
|
|
110
|
+
lead_row = pd.DataFrame({"SNP": [lead_snp], "R2": [1.0]})
|
|
111
|
+
result = pd.concat([result, lead_row], ignore_index=True)
|
|
112
|
+
|
|
113
|
+
return result
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def calculate_ld(
|
|
117
|
+
bfile_path: str,
|
|
118
|
+
lead_snp: str,
|
|
119
|
+
window_kb: int = 500,
|
|
120
|
+
plink_path: Optional[str] = None,
|
|
121
|
+
working_dir: Optional[str] = None,
|
|
122
|
+
species: str = "dog",
|
|
123
|
+
threads: Optional[int] = None,
|
|
124
|
+
) -> pd.DataFrame:
|
|
125
|
+
"""Calculate LD (R²) between a lead SNP and all SNPs in a region.
|
|
126
|
+
|
|
127
|
+
Runs PLINK --r2 to compute pairwise LD values, then returns a DataFrame
|
|
128
|
+
that can be merged with GWAS results for regional plot coloring.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
bfile_path: Path to PLINK binary fileset (.bed/.bim/.fam prefix).
|
|
132
|
+
lead_snp: SNP ID of the lead variant to calculate LD against.
|
|
133
|
+
window_kb: Window size in kilobases around lead SNP.
|
|
134
|
+
plink_path: Path to PLINK executable. Auto-detects if None.
|
|
135
|
+
working_dir: Directory for PLINK output files. Uses temp dir if None.
|
|
136
|
+
species: Species flag ('dog', 'cat', or None for human).
|
|
137
|
+
threads: Number of threads for PLINK.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
DataFrame with columns: SNP (rsid), R2 (LD with lead SNP).
|
|
141
|
+
Returns empty DataFrame if PLINK fails or no LD values found.
|
|
142
|
+
|
|
143
|
+
Raises:
|
|
144
|
+
FileNotFoundError: If PLINK executable not found.
|
|
145
|
+
|
|
146
|
+
Example:
|
|
147
|
+
>>> ld_df = calculate_ld(
|
|
148
|
+
... bfile_path="/path/to/genotypes",
|
|
149
|
+
... lead_snp="rs12345",
|
|
150
|
+
... window_kb=500,
|
|
151
|
+
... )
|
|
152
|
+
>>> # Merge with GWAS results for plotting
|
|
153
|
+
>>> gwas_with_ld = gwas_df.merge(ld_df, left_on="rs", right_on="SNP")
|
|
154
|
+
"""
|
|
155
|
+
# Find PLINK first (tests mock this to return None)
|
|
156
|
+
if plink_path is None:
|
|
157
|
+
plink_path = find_plink()
|
|
158
|
+
if plink_path is None:
|
|
159
|
+
raise FileNotFoundError(
|
|
160
|
+
"PLINK not found. Install PLINK 1.9 or specify plink_path."
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
logger.debug(f"Using PLINK at {plink_path}")
|
|
164
|
+
|
|
165
|
+
# Validate PLINK files exist
|
|
166
|
+
validate_plink_files(bfile_path)
|
|
167
|
+
|
|
168
|
+
# Use temp directory if working_dir not specified
|
|
169
|
+
cleanup_working_dir = False
|
|
170
|
+
if working_dir is None:
|
|
171
|
+
working_dir = tempfile.mkdtemp(prefix="snp_scope_ld_")
|
|
172
|
+
cleanup_working_dir = True
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
os.makedirs(working_dir, exist_ok=True)
|
|
176
|
+
output_prefix = os.path.join(working_dir, f"ld_{lead_snp}")
|
|
177
|
+
|
|
178
|
+
# Build and run PLINK command
|
|
179
|
+
cmd = build_ld_command(
|
|
180
|
+
plink_path=plink_path,
|
|
181
|
+
bfile_path=bfile_path,
|
|
182
|
+
lead_snp=lead_snp,
|
|
183
|
+
output_path=output_prefix,
|
|
184
|
+
window_kb=window_kb,
|
|
185
|
+
species=species,
|
|
186
|
+
threads=threads,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
logger.debug(f"Running PLINK command: {' '.join(cmd)}")
|
|
190
|
+
|
|
191
|
+
result = subprocess.run(
|
|
192
|
+
cmd,
|
|
193
|
+
cwd=working_dir,
|
|
194
|
+
capture_output=True,
|
|
195
|
+
text=True,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
if result.returncode != 0:
|
|
199
|
+
logger.warning(f"PLINK LD calculation failed: {result.stderr[:200]}")
|
|
200
|
+
return pd.DataFrame(columns=["SNP", "R2"])
|
|
201
|
+
|
|
202
|
+
# Parse output
|
|
203
|
+
ld_file = f"{output_prefix}.ld"
|
|
204
|
+
return parse_ld_output(ld_file, lead_snp)
|
|
205
|
+
|
|
206
|
+
finally:
|
|
207
|
+
# Clean up temp directory
|
|
208
|
+
if cleanup_working_dir and os.path.exists(working_dir):
|
|
209
|
+
shutil.rmtree(working_dir, ignore_errors=True)
|
pylocuszoom/logging.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Logging configuration for pylocuszoom.
|
|
2
|
+
|
|
3
|
+
Provides logging with sensible defaults:
|
|
4
|
+
- Logging is enabled by default at INFO level
|
|
5
|
+
- Uses loguru (included as dependency)
|
|
6
|
+
- Users can adjust level via enable_logging() or disable via disable_logging()
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
>>> from pylocuszoom.logging import enable_logging, disable_logging
|
|
10
|
+
>>> enable_logging("DEBUG") # Enable DEBUG level for troubleshooting
|
|
11
|
+
>>> disable_logging() # Suppress all logging output
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
# Try to use loguru, fall back to stdlib logging
|
|
17
|
+
try:
|
|
18
|
+
from loguru import logger as _loguru_logger
|
|
19
|
+
|
|
20
|
+
_HAS_LOGURU = True
|
|
21
|
+
except ImportError:
|
|
22
|
+
import logging as _stdlib_logging
|
|
23
|
+
|
|
24
|
+
_HAS_LOGURU = False
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class _LoguruWrapper:
|
|
28
|
+
"""Wrapper around loguru logger with enable/disable support."""
|
|
29
|
+
|
|
30
|
+
def __init__(self):
|
|
31
|
+
self._enabled = False
|
|
32
|
+
self._handler_id = None
|
|
33
|
+
# Remove default handler
|
|
34
|
+
_loguru_logger.remove()
|
|
35
|
+
|
|
36
|
+
def enable(self, level: str = "INFO", sink=sys.stderr) -> None:
|
|
37
|
+
"""Enable logging at the specified level."""
|
|
38
|
+
if self._handler_id is not None:
|
|
39
|
+
try:
|
|
40
|
+
_loguru_logger.remove(self._handler_id)
|
|
41
|
+
except ValueError:
|
|
42
|
+
# Handler was already removed (e.g., by another module calling logger.remove())
|
|
43
|
+
pass
|
|
44
|
+
self._handler_id = _loguru_logger.add(
|
|
45
|
+
sink,
|
|
46
|
+
level=level,
|
|
47
|
+
format="<level>{level: <8}</level> | <cyan>pylocuszoom</cyan> | {message}",
|
|
48
|
+
filter=lambda record: record["name"].startswith("pylocuszoom"),
|
|
49
|
+
)
|
|
50
|
+
self._enabled = True
|
|
51
|
+
|
|
52
|
+
def disable(self) -> None:
|
|
53
|
+
"""Disable logging."""
|
|
54
|
+
if self._handler_id is not None:
|
|
55
|
+
try:
|
|
56
|
+
_loguru_logger.remove(self._handler_id)
|
|
57
|
+
except ValueError:
|
|
58
|
+
# Handler was already removed (e.g., by another module calling logger.remove())
|
|
59
|
+
pass
|
|
60
|
+
self._handler_id = None
|
|
61
|
+
self._enabled = False
|
|
62
|
+
|
|
63
|
+
def debug(self, msg: str, *args, **kwargs) -> None:
|
|
64
|
+
if self._enabled:
|
|
65
|
+
_loguru_logger.opt(depth=1).debug(msg, *args, **kwargs)
|
|
66
|
+
|
|
67
|
+
def info(self, msg: str, *args, **kwargs) -> None:
|
|
68
|
+
if self._enabled:
|
|
69
|
+
_loguru_logger.opt(depth=1).info(msg, *args, **kwargs)
|
|
70
|
+
|
|
71
|
+
def warning(self, msg: str, *args, **kwargs) -> None:
|
|
72
|
+
if self._enabled:
|
|
73
|
+
_loguru_logger.opt(depth=1).warning(msg, *args, **kwargs)
|
|
74
|
+
|
|
75
|
+
def error(self, msg: str, *args, **kwargs) -> None:
|
|
76
|
+
if self._enabled:
|
|
77
|
+
_loguru_logger.opt(depth=1).error(msg, *args, **kwargs)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class _StdlibWrapper:
|
|
81
|
+
"""Wrapper around stdlib logging with enable/disable support."""
|
|
82
|
+
|
|
83
|
+
def __init__(self):
|
|
84
|
+
self._logger = _stdlib_logging.getLogger("pylocuszoom")
|
|
85
|
+
self._logger.setLevel(_stdlib_logging.WARNING)
|
|
86
|
+
self._handler = None
|
|
87
|
+
self._enabled = False
|
|
88
|
+
|
|
89
|
+
def enable(self, level: str = "INFO", sink=sys.stderr) -> None:
|
|
90
|
+
"""Enable logging at the specified level."""
|
|
91
|
+
if self._handler is not None:
|
|
92
|
+
self._logger.removeHandler(self._handler)
|
|
93
|
+
self._handler = _stdlib_logging.StreamHandler(sink)
|
|
94
|
+
self._handler.setFormatter(
|
|
95
|
+
_stdlib_logging.Formatter("%(levelname)-8s | pylocuszoom | %(message)s")
|
|
96
|
+
)
|
|
97
|
+
self._logger.addHandler(self._handler)
|
|
98
|
+
self._logger.setLevel(getattr(_stdlib_logging, level.upper()))
|
|
99
|
+
self._enabled = True
|
|
100
|
+
|
|
101
|
+
def disable(self) -> None:
|
|
102
|
+
"""Disable logging."""
|
|
103
|
+
if self._handler is not None:
|
|
104
|
+
self._logger.removeHandler(self._handler)
|
|
105
|
+
self._handler = None
|
|
106
|
+
self._logger.setLevel(_stdlib_logging.WARNING)
|
|
107
|
+
self._enabled = False
|
|
108
|
+
|
|
109
|
+
def debug(self, msg: str, *args, **kwargs) -> None:
|
|
110
|
+
if self._enabled:
|
|
111
|
+
self._logger.debug(msg, *args, **kwargs)
|
|
112
|
+
|
|
113
|
+
def info(self, msg: str, *args, **kwargs) -> None:
|
|
114
|
+
if self._enabled:
|
|
115
|
+
self._logger.info(msg, *args, **kwargs)
|
|
116
|
+
|
|
117
|
+
def warning(self, msg: str, *args, **kwargs) -> None:
|
|
118
|
+
if self._enabled:
|
|
119
|
+
self._logger.warning(msg, *args, **kwargs)
|
|
120
|
+
|
|
121
|
+
def error(self, msg: str, *args, **kwargs) -> None:
|
|
122
|
+
if self._enabled:
|
|
123
|
+
self._logger.error(msg, *args, **kwargs)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
# Create the logger instance
|
|
127
|
+
if _HAS_LOGURU:
|
|
128
|
+
logger = _LoguruWrapper()
|
|
129
|
+
else:
|
|
130
|
+
logger = _StdlibWrapper()
|
|
131
|
+
|
|
132
|
+
# Enable logging at INFO level by default
|
|
133
|
+
logger.enable("INFO")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def enable_logging(level: str = "INFO", sink=sys.stderr) -> None:
|
|
137
|
+
"""Enable logging output.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
level: Log level ("DEBUG", "INFO", "WARNING", "ERROR").
|
|
141
|
+
sink: Output destination (default: stderr).
|
|
142
|
+
|
|
143
|
+
Example:
|
|
144
|
+
>>> from pylocuszoom.logging import enable_logging
|
|
145
|
+
>>> enable_logging() # INFO level
|
|
146
|
+
>>> enable_logging("DEBUG") # DEBUG level for troubleshooting
|
|
147
|
+
"""
|
|
148
|
+
logger.enable(level, sink)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def disable_logging() -> None:
|
|
152
|
+
"""Disable logging output."""
|
|
153
|
+
logger.disable()
|