pylocuszoom 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pylocuszoom/ld.py ADDED
@@ -0,0 +1,209 @@
1
+ """LD (Linkage Disequilibrium) calculation using PLINK.
2
+
3
+ Calculates R² values between a lead SNP and all other SNPs in a region
4
+ using PLINK 1.9's --r2 command.
5
+ """
6
+
7
+ import os
8
+ import shutil
9
+ import subprocess
10
+ import tempfile
11
+ from typing import Optional
12
+
13
+ import pandas as pd
14
+
15
+ from .logging import logger
16
+ from .utils import validate_plink_files
17
+
18
+
19
+ def find_plink() -> Optional[str]:
20
+ """Find PLINK executable on PATH.
21
+
22
+ Checks for plink1.9 first, then plink.
23
+
24
+ Returns:
25
+ Path to PLINK executable, or None if not found.
26
+ """
27
+ for name in ["plink1.9", "plink"]:
28
+ path = shutil.which(name)
29
+ if path:
30
+ return path
31
+ return None
32
+
33
+
34
+ def build_ld_command(
35
+ plink_path: str,
36
+ bfile_path: str,
37
+ lead_snp: str,
38
+ output_path: str,
39
+ window_kb: int = 500,
40
+ ld_window_r2: float = 0.0,
41
+ species: str = "dog",
42
+ threads: Optional[int] = None,
43
+ ) -> list:
44
+ """Build PLINK command for LD calculation.
45
+
46
+ Args:
47
+ plink_path: Path to PLINK executable.
48
+ bfile_path: Input binary fileset prefix (.bed/.bim/.fam).
49
+ lead_snp: SNP ID to calculate LD against.
50
+ output_path: Output prefix (creates .ld file).
51
+ window_kb: Window size in kilobases.
52
+ ld_window_r2: Minimum R² to report (0.0 reports all).
53
+ species: Species flag for PLINK ('dog', 'cat', or None for human).
54
+ threads: Number of threads (auto-detect if None).
55
+
56
+ Returns:
57
+ List of command arguments for subprocess.
58
+ """
59
+ cmd = [plink_path]
60
+
61
+ # Species flag
62
+ if species == "dog":
63
+ cmd.append("--dog")
64
+ elif species == "cat":
65
+ # PLINK doesn't have --cat, use --chr-set for 18 autosomes + X
66
+ cmd.extend(["--chr-set", "18"])
67
+
68
+ # Input and output
69
+ cmd.extend(["--bfile", bfile_path])
70
+ cmd.extend(["--out", output_path])
71
+
72
+ # LD calculation flags
73
+ cmd.append("--r2")
74
+ cmd.extend(["--ld-snp", lead_snp])
75
+ cmd.extend(["--ld-window-kb", str(window_kb)])
76
+ cmd.extend(["--ld-window", "99999"]) # Remove default 10 SNP limit
77
+ cmd.extend(["--ld-window-r2", str(ld_window_r2)])
78
+
79
+ # Threads
80
+ if threads is None:
81
+ threads = os.cpu_count() or 1
82
+ cmd.extend(["--threads", str(threads)])
83
+
84
+ return cmd
85
+
86
+
87
+ def parse_ld_output(ld_file: str, lead_snp: str) -> pd.DataFrame:
88
+ """Parse PLINK .ld output file.
89
+
90
+ Args:
91
+ ld_file: Path to .ld output file.
92
+ lead_snp: SNP ID of the lead variant.
93
+
94
+ Returns:
95
+ DataFrame with columns: SNP, R2.
96
+ """
97
+ if not os.path.exists(ld_file):
98
+ return pd.DataFrame(columns=["SNP", "R2"])
99
+
100
+ # PLINK outputs whitespace-separated: CHR_A BP_A SNP_A CHR_B BP_B SNP_B R2
101
+ ld_df = pd.read_csv(ld_file, sep=r"\s+")
102
+
103
+ if ld_df.empty:
104
+ return pd.DataFrame(columns=["SNP", "R2"])
105
+
106
+ # We want SNP_B (the other SNPs) and their R2 with lead SNP (SNP_A)
107
+ result = ld_df[["SNP_B", "R2"]].rename(columns={"SNP_B": "SNP"})
108
+
109
+ # Add the lead SNP itself with R2=1.0
110
+ lead_row = pd.DataFrame({"SNP": [lead_snp], "R2": [1.0]})
111
+ result = pd.concat([result, lead_row], ignore_index=True)
112
+
113
+ return result
114
+
115
+
116
+ def calculate_ld(
117
+ bfile_path: str,
118
+ lead_snp: str,
119
+ window_kb: int = 500,
120
+ plink_path: Optional[str] = None,
121
+ working_dir: Optional[str] = None,
122
+ species: str = "dog",
123
+ threads: Optional[int] = None,
124
+ ) -> pd.DataFrame:
125
+ """Calculate LD (R²) between a lead SNP and all SNPs in a region.
126
+
127
+ Runs PLINK --r2 to compute pairwise LD values, then returns a DataFrame
128
+ that can be merged with GWAS results for regional plot coloring.
129
+
130
+ Args:
131
+ bfile_path: Path to PLINK binary fileset (.bed/.bim/.fam prefix).
132
+ lead_snp: SNP ID of the lead variant to calculate LD against.
133
+ window_kb: Window size in kilobases around lead SNP.
134
+ plink_path: Path to PLINK executable. Auto-detects if None.
135
+ working_dir: Directory for PLINK output files. Uses temp dir if None.
136
+ species: Species flag ('dog', 'cat', or None for human).
137
+ threads: Number of threads for PLINK.
138
+
139
+ Returns:
140
+ DataFrame with columns: SNP (rsid), R2 (LD with lead SNP).
141
+ Returns empty DataFrame if PLINK fails or no LD values found.
142
+
143
+ Raises:
144
+ FileNotFoundError: If PLINK executable not found.
145
+
146
+ Example:
147
+ >>> ld_df = calculate_ld(
148
+ ... bfile_path="/path/to/genotypes",
149
+ ... lead_snp="rs12345",
150
+ ... window_kb=500,
151
+ ... )
152
+ >>> # Merge with GWAS results for plotting
153
+ >>> gwas_with_ld = gwas_df.merge(ld_df, left_on="rs", right_on="SNP")
154
+ """
155
+ # Find PLINK first (tests mock this to return None)
156
+ if plink_path is None:
157
+ plink_path = find_plink()
158
+ if plink_path is None:
159
+ raise FileNotFoundError(
160
+ "PLINK not found. Install PLINK 1.9 or specify plink_path."
161
+ )
162
+
163
+ logger.debug(f"Using PLINK at {plink_path}")
164
+
165
+ # Validate PLINK files exist
166
+ validate_plink_files(bfile_path)
167
+
168
+ # Use temp directory if working_dir not specified
169
+ cleanup_working_dir = False
170
+ if working_dir is None:
171
+ working_dir = tempfile.mkdtemp(prefix="snp_scope_ld_")
172
+ cleanup_working_dir = True
173
+
174
+ try:
175
+ os.makedirs(working_dir, exist_ok=True)
176
+ output_prefix = os.path.join(working_dir, f"ld_{lead_snp}")
177
+
178
+ # Build and run PLINK command
179
+ cmd = build_ld_command(
180
+ plink_path=plink_path,
181
+ bfile_path=bfile_path,
182
+ lead_snp=lead_snp,
183
+ output_path=output_prefix,
184
+ window_kb=window_kb,
185
+ species=species,
186
+ threads=threads,
187
+ )
188
+
189
+ logger.debug(f"Running PLINK command: {' '.join(cmd)}")
190
+
191
+ result = subprocess.run(
192
+ cmd,
193
+ cwd=working_dir,
194
+ capture_output=True,
195
+ text=True,
196
+ )
197
+
198
+ if result.returncode != 0:
199
+ logger.warning(f"PLINK LD calculation failed: {result.stderr[:200]}")
200
+ return pd.DataFrame(columns=["SNP", "R2"])
201
+
202
+ # Parse output
203
+ ld_file = f"{output_prefix}.ld"
204
+ return parse_ld_output(ld_file, lead_snp)
205
+
206
+ finally:
207
+ # Clean up temp directory
208
+ if cleanup_working_dir and os.path.exists(working_dir):
209
+ shutil.rmtree(working_dir, ignore_errors=True)
pylocuszoom/logging.py ADDED
@@ -0,0 +1,153 @@
1
+ """Logging configuration for pylocuszoom.
2
+
3
+ Provides logging with sensible defaults:
4
+ - Logging is enabled by default at INFO level
5
+ - Uses loguru (included as dependency)
6
+ - Users can adjust level via enable_logging() or disable via disable_logging()
7
+
8
+ Usage:
9
+ >>> from pylocuszoom.logging import enable_logging, disable_logging
10
+ >>> enable_logging("DEBUG") # Enable DEBUG level for troubleshooting
11
+ >>> disable_logging() # Suppress all logging output
12
+ """
13
+
14
+ import sys
15
+
16
+ # Try to use loguru, fall back to stdlib logging
17
+ try:
18
+ from loguru import logger as _loguru_logger
19
+
20
+ _HAS_LOGURU = True
21
+ except ImportError:
22
+ import logging as _stdlib_logging
23
+
24
+ _HAS_LOGURU = False
25
+
26
+
27
+ class _LoguruWrapper:
28
+ """Wrapper around loguru logger with enable/disable support."""
29
+
30
+ def __init__(self):
31
+ self._enabled = False
32
+ self._handler_id = None
33
+ # Remove default handler
34
+ _loguru_logger.remove()
35
+
36
+ def enable(self, level: str = "INFO", sink=sys.stderr) -> None:
37
+ """Enable logging at the specified level."""
38
+ if self._handler_id is not None:
39
+ try:
40
+ _loguru_logger.remove(self._handler_id)
41
+ except ValueError:
42
+ # Handler was already removed (e.g., by another module calling logger.remove())
43
+ pass
44
+ self._handler_id = _loguru_logger.add(
45
+ sink,
46
+ level=level,
47
+ format="<level>{level: <8}</level> | <cyan>pylocuszoom</cyan> | {message}",
48
+ filter=lambda record: record["name"].startswith("pylocuszoom"),
49
+ )
50
+ self._enabled = True
51
+
52
+ def disable(self) -> None:
53
+ """Disable logging."""
54
+ if self._handler_id is not None:
55
+ try:
56
+ _loguru_logger.remove(self._handler_id)
57
+ except ValueError:
58
+ # Handler was already removed (e.g., by another module calling logger.remove())
59
+ pass
60
+ self._handler_id = None
61
+ self._enabled = False
62
+
63
+ def debug(self, msg: str, *args, **kwargs) -> None:
64
+ if self._enabled:
65
+ _loguru_logger.opt(depth=1).debug(msg, *args, **kwargs)
66
+
67
+ def info(self, msg: str, *args, **kwargs) -> None:
68
+ if self._enabled:
69
+ _loguru_logger.opt(depth=1).info(msg, *args, **kwargs)
70
+
71
+ def warning(self, msg: str, *args, **kwargs) -> None:
72
+ if self._enabled:
73
+ _loguru_logger.opt(depth=1).warning(msg, *args, **kwargs)
74
+
75
+ def error(self, msg: str, *args, **kwargs) -> None:
76
+ if self._enabled:
77
+ _loguru_logger.opt(depth=1).error(msg, *args, **kwargs)
78
+
79
+
80
+ class _StdlibWrapper:
81
+ """Wrapper around stdlib logging with enable/disable support."""
82
+
83
+ def __init__(self):
84
+ self._logger = _stdlib_logging.getLogger("pylocuszoom")
85
+ self._logger.setLevel(_stdlib_logging.WARNING)
86
+ self._handler = None
87
+ self._enabled = False
88
+
89
+ def enable(self, level: str = "INFO", sink=sys.stderr) -> None:
90
+ """Enable logging at the specified level."""
91
+ if self._handler is not None:
92
+ self._logger.removeHandler(self._handler)
93
+ self._handler = _stdlib_logging.StreamHandler(sink)
94
+ self._handler.setFormatter(
95
+ _stdlib_logging.Formatter("%(levelname)-8s | pylocuszoom | %(message)s")
96
+ )
97
+ self._logger.addHandler(self._handler)
98
+ self._logger.setLevel(getattr(_stdlib_logging, level.upper()))
99
+ self._enabled = True
100
+
101
+ def disable(self) -> None:
102
+ """Disable logging."""
103
+ if self._handler is not None:
104
+ self._logger.removeHandler(self._handler)
105
+ self._handler = None
106
+ self._logger.setLevel(_stdlib_logging.WARNING)
107
+ self._enabled = False
108
+
109
+ def debug(self, msg: str, *args, **kwargs) -> None:
110
+ if self._enabled:
111
+ self._logger.debug(msg, *args, **kwargs)
112
+
113
+ def info(self, msg: str, *args, **kwargs) -> None:
114
+ if self._enabled:
115
+ self._logger.info(msg, *args, **kwargs)
116
+
117
+ def warning(self, msg: str, *args, **kwargs) -> None:
118
+ if self._enabled:
119
+ self._logger.warning(msg, *args, **kwargs)
120
+
121
+ def error(self, msg: str, *args, **kwargs) -> None:
122
+ if self._enabled:
123
+ self._logger.error(msg, *args, **kwargs)
124
+
125
+
126
+ # Create the logger instance
127
+ if _HAS_LOGURU:
128
+ logger = _LoguruWrapper()
129
+ else:
130
+ logger = _StdlibWrapper()
131
+
132
+ # Enable logging at INFO level by default
133
+ logger.enable("INFO")
134
+
135
+
136
+ def enable_logging(level: str = "INFO", sink=sys.stderr) -> None:
137
+ """Enable logging output.
138
+
139
+ Args:
140
+ level: Log level ("DEBUG", "INFO", "WARNING", "ERROR").
141
+ sink: Output destination (default: stderr).
142
+
143
+ Example:
144
+ >>> from pylocuszoom.logging import enable_logging
145
+ >>> enable_logging() # INFO level
146
+ >>> enable_logging("DEBUG") # DEBUG level for troubleshooting
147
+ """
148
+ logger.enable(level, sink)
149
+
150
+
151
+ def disable_logging() -> None:
152
+ """Disable logging output."""
153
+ logger.disable()