pylocuszoom 1.1.2__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +20 -2
- pylocuszoom/backends/base.py +94 -2
- pylocuszoom/backends/bokeh_backend.py +160 -6
- pylocuszoom/backends/matplotlib_backend.py +142 -2
- pylocuszoom/backends/plotly_backend.py +101 -1
- pylocuszoom/coloc.py +82 -0
- pylocuszoom/coloc_plotter.py +390 -0
- pylocuszoom/colors.py +26 -0
- pylocuszoom/config.py +61 -0
- pylocuszoom/finemapping.py +111 -3
- pylocuszoom/labels.py +41 -16
- pylocuszoom/ld.py +239 -0
- pylocuszoom/ld_heatmap_plotter.py +252 -0
- pylocuszoom/miami_plotter.py +490 -0
- pylocuszoom/plotter.py +483 -342
- pylocuszoom/recombination.py +39 -0
- {pylocuszoom-1.1.2.dist-info → pylocuszoom-1.3.1.dist-info}/METADATA +183 -31
- {pylocuszoom-1.1.2.dist-info → pylocuszoom-1.3.1.dist-info}/RECORD +20 -16
- pylocuszoom-1.3.1.dist-info/licenses/LICENSE.md +595 -0
- pylocuszoom-1.1.2.dist-info/licenses/LICENSE.md +0 -17
- {pylocuszoom-1.1.2.dist-info → pylocuszoom-1.3.1.dist-info}/WHEEL +0 -0
pylocuszoom/finemapping.py
CHANGED
|
@@ -4,18 +4,20 @@ Provides utilities for loading, validating, and preparing statistical
|
|
|
4
4
|
fine-mapping results (SuSiE, FINEMAP, etc.) for visualization.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
from typing import List, Optional
|
|
7
|
+
from typing import Any, List, Optional
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
|
|
11
|
+
from .backends.base import PlotBackend
|
|
12
|
+
from .backends.hover import HoverConfig, HoverDataBuilder
|
|
13
|
+
from .colors import PIP_LINE_COLOR, get_credible_set_color
|
|
11
14
|
from .exceptions import FinemappingValidationError, ValidationError
|
|
12
15
|
from .logging import logger
|
|
13
16
|
from .utils import filter_by_region
|
|
14
17
|
from .validation import DataFrameValidator
|
|
15
18
|
|
|
16
|
-
# Required columns for fine-mapping data
|
|
19
|
+
# Required columns for fine-mapping data (default column names)
|
|
17
20
|
REQUIRED_FINEMAPPING_COLS = ["pos", "pip"]
|
|
18
|
-
OPTIONAL_FINEMAPPING_COLS = ["rs", "cs", "cs_id", "effect", "se"]
|
|
19
21
|
|
|
20
22
|
|
|
21
23
|
def validate_finemapping_df(
|
|
@@ -207,3 +209,109 @@ def calculate_credible_set_coverage(
|
|
|
207
209
|
coverage[cs_id] = cs_data[pip_col].sum()
|
|
208
210
|
|
|
209
211
|
return coverage
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def plot_finemapping(
|
|
215
|
+
backend: PlotBackend,
|
|
216
|
+
ax: Any,
|
|
217
|
+
df: pd.DataFrame,
|
|
218
|
+
pos_col: str = "pos",
|
|
219
|
+
pip_col: str = "pip",
|
|
220
|
+
cs_col: Optional[str] = "cs",
|
|
221
|
+
show_credible_sets: bool = True,
|
|
222
|
+
pip_threshold: float = 0.0,
|
|
223
|
+
) -> None:
|
|
224
|
+
"""Plot fine-mapping results (PIP line with credible set coloring).
|
|
225
|
+
|
|
226
|
+
Renders posterior inclusion probabilities as a line plot, with optional
|
|
227
|
+
scatter points colored by credible set membership.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
backend: Plotting backend implementing PlotBackend protocol.
|
|
231
|
+
ax: Axes or panel to plot on.
|
|
232
|
+
df: Fine-mapping DataFrame with pos and pip columns.
|
|
233
|
+
pos_col: Column name for position.
|
|
234
|
+
pip_col: Column name for posterior inclusion probability.
|
|
235
|
+
cs_col: Column name for credible set assignment (optional).
|
|
236
|
+
show_credible_sets: Whether to color points by credible set.
|
|
237
|
+
pip_threshold: Minimum PIP to display as scatter point.
|
|
238
|
+
"""
|
|
239
|
+
# Build hover data using HoverDataBuilder
|
|
240
|
+
extra_cols = {pip_col: "PIP"}
|
|
241
|
+
if cs_col and cs_col in df.columns:
|
|
242
|
+
extra_cols[cs_col] = "Credible Set"
|
|
243
|
+
hover_config = HoverConfig(
|
|
244
|
+
pos_col=pos_col if pos_col in df.columns else None,
|
|
245
|
+
extra_cols=extra_cols,
|
|
246
|
+
)
|
|
247
|
+
hover_builder = HoverDataBuilder(hover_config)
|
|
248
|
+
|
|
249
|
+
# Sort by position for line plotting
|
|
250
|
+
df = df.sort_values(pos_col)
|
|
251
|
+
|
|
252
|
+
# Plot PIP as line
|
|
253
|
+
backend.line(
|
|
254
|
+
ax,
|
|
255
|
+
df[pos_col],
|
|
256
|
+
df[pip_col],
|
|
257
|
+
color=PIP_LINE_COLOR,
|
|
258
|
+
linewidth=1.5,
|
|
259
|
+
alpha=0.8,
|
|
260
|
+
zorder=1,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
# Check if credible sets are available
|
|
264
|
+
has_cs = cs_col is not None and cs_col in df.columns and show_credible_sets
|
|
265
|
+
credible_sets = get_credible_sets(df, cs_col) if has_cs else []
|
|
266
|
+
|
|
267
|
+
if credible_sets:
|
|
268
|
+
# Plot points colored by credible set
|
|
269
|
+
for cs_id in credible_sets:
|
|
270
|
+
cs_data = df[df[cs_col] == cs_id]
|
|
271
|
+
color = get_credible_set_color(cs_id)
|
|
272
|
+
backend.scatter(
|
|
273
|
+
ax,
|
|
274
|
+
cs_data[pos_col],
|
|
275
|
+
cs_data[pip_col],
|
|
276
|
+
colors=color,
|
|
277
|
+
sizes=50,
|
|
278
|
+
marker="o",
|
|
279
|
+
edgecolor="black",
|
|
280
|
+
linewidth=0.5,
|
|
281
|
+
zorder=3,
|
|
282
|
+
hover_data=hover_builder.build_dataframe(cs_data),
|
|
283
|
+
)
|
|
284
|
+
# Plot variants not in any credible set (only if threshold is set)
|
|
285
|
+
if pip_threshold > 0:
|
|
286
|
+
non_cs_data = df[(df[cs_col].isna()) | (df[cs_col] == 0)]
|
|
287
|
+
non_cs_data = non_cs_data[non_cs_data[pip_col] >= pip_threshold]
|
|
288
|
+
if not non_cs_data.empty:
|
|
289
|
+
backend.scatter(
|
|
290
|
+
ax,
|
|
291
|
+
non_cs_data[pos_col],
|
|
292
|
+
non_cs_data[pip_col],
|
|
293
|
+
colors="#BEBEBE",
|
|
294
|
+
sizes=30,
|
|
295
|
+
marker="o",
|
|
296
|
+
edgecolor="black",
|
|
297
|
+
linewidth=0.3,
|
|
298
|
+
zorder=2,
|
|
299
|
+
hover_data=hover_builder.build_dataframe(non_cs_data),
|
|
300
|
+
)
|
|
301
|
+
else:
|
|
302
|
+
# No credible sets - show all points above threshold
|
|
303
|
+
if pip_threshold > 0:
|
|
304
|
+
high_pip = df[df[pip_col] >= pip_threshold]
|
|
305
|
+
if not high_pip.empty:
|
|
306
|
+
backend.scatter(
|
|
307
|
+
ax,
|
|
308
|
+
high_pip[pos_col],
|
|
309
|
+
high_pip[pip_col],
|
|
310
|
+
colors=PIP_LINE_COLOR,
|
|
311
|
+
sizes=50,
|
|
312
|
+
marker="o",
|
|
313
|
+
edgecolor="black",
|
|
314
|
+
linewidth=0.5,
|
|
315
|
+
zorder=3,
|
|
316
|
+
hover_data=hover_builder.build_dataframe(high_pip),
|
|
317
|
+
)
|
pylocuszoom/labels.py
CHANGED
|
@@ -24,6 +24,7 @@ def add_snp_labels(
|
|
|
24
24
|
genes_df: Optional[pd.DataFrame] = None,
|
|
25
25
|
chrom: Optional[Union[int, str]] = None,
|
|
26
26
|
max_label_length: int = 15,
|
|
27
|
+
adjust: bool = True,
|
|
27
28
|
**kwargs: Any,
|
|
28
29
|
) -> List[Annotation]:
|
|
29
30
|
"""Add text labels to top SNPs in the regional plot.
|
|
@@ -41,6 +42,8 @@ def add_snp_labels(
|
|
|
41
42
|
genes_df: Unused, kept for backward compatibility.
|
|
42
43
|
chrom: Unused, kept for backward compatibility.
|
|
43
44
|
max_label_length: Maximum label length before truncation.
|
|
45
|
+
adjust: If True, run adjustText immediately. If False, caller must
|
|
46
|
+
call adjust_snp_labels() after setting axis limits.
|
|
44
47
|
|
|
45
48
|
Returns:
|
|
46
49
|
List of matplotlib text annotation objects.
|
|
@@ -101,21 +104,43 @@ def add_snp_labels(
|
|
|
101
104
|
)
|
|
102
105
|
texts.append(text)
|
|
103
106
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
try:
|
|
107
|
-
from adjustText import adjust_text
|
|
108
|
-
|
|
109
|
-
adjust_text(
|
|
110
|
-
texts,
|
|
111
|
-
ax=ax,
|
|
112
|
-
arrowprops=dict(arrowstyle="-", color="gray", lw=0.5),
|
|
113
|
-
expand_points=(1.5, 1.5),
|
|
114
|
-
)
|
|
115
|
-
except ImportError:
|
|
116
|
-
logger.warning(
|
|
117
|
-
"adjustText not installed - SNP labels may overlap. "
|
|
118
|
-
"Install with: pip install adjustText"
|
|
119
|
-
)
|
|
107
|
+
if adjust:
|
|
108
|
+
adjust_snp_labels(ax, texts)
|
|
120
109
|
|
|
121
110
|
return texts
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def adjust_snp_labels(ax: Axes, texts: List[Annotation]) -> None:
|
|
114
|
+
"""Adjust SNP label positions to avoid overlaps.
|
|
115
|
+
|
|
116
|
+
This function should be called AFTER all axis limits have been set,
|
|
117
|
+
as adjustText needs to know the final plot bounds to position labels
|
|
118
|
+
correctly within the visible area.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
ax: Matplotlib axes object.
|
|
122
|
+
texts: List of text annotation objects from add_snp_labels().
|
|
123
|
+
|
|
124
|
+
Example:
|
|
125
|
+
>>> texts = add_snp_labels(ax, df, adjust=False)
|
|
126
|
+
>>> ax.set_xlim(start, end)
|
|
127
|
+
>>> ax.set_ylim(0, max_y)
|
|
128
|
+
>>> adjust_snp_labels(ax, texts)
|
|
129
|
+
"""
|
|
130
|
+
if len(texts) <= 1:
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
from adjustText import adjust_text
|
|
135
|
+
|
|
136
|
+
adjust_text(
|
|
137
|
+
texts,
|
|
138
|
+
ax=ax,
|
|
139
|
+
arrowprops=dict(arrowstyle="-", color="gray", lw=0.5),
|
|
140
|
+
expand_points=(1.5, 1.5),
|
|
141
|
+
)
|
|
142
|
+
except ImportError:
|
|
143
|
+
logger.warning(
|
|
144
|
+
"adjustText not installed - SNP labels may overlap. "
|
|
145
|
+
"Install with: pip install adjustText"
|
|
146
|
+
)
|
pylocuszoom/ld.py
CHANGED
|
@@ -16,6 +16,72 @@ from .logging import logger
|
|
|
16
16
|
from .utils import validate_plink_files
|
|
17
17
|
|
|
18
18
|
|
|
19
|
+
def build_pairwise_ld_command(
|
|
20
|
+
plink_path: str,
|
|
21
|
+
bfile_path: str,
|
|
22
|
+
output_path: str,
|
|
23
|
+
snp_list_file: Optional[str] = None,
|
|
24
|
+
chrom: Optional[int] = None,
|
|
25
|
+
start: Optional[int] = None,
|
|
26
|
+
end: Optional[int] = None,
|
|
27
|
+
species: Optional[str] = "canine",
|
|
28
|
+
metric: str = "r2",
|
|
29
|
+
) -> list:
|
|
30
|
+
"""Build PLINK command for pairwise LD matrix computation.
|
|
31
|
+
|
|
32
|
+
Generates command for computing an N x N LD matrix using PLINK's
|
|
33
|
+
--r2 square (or --r dprime square) command.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
plink_path: Path to PLINK executable.
|
|
37
|
+
bfile_path: Input binary fileset prefix (.bed/.bim/.fam).
|
|
38
|
+
output_path: Output prefix (creates .ld and .snplist files).
|
|
39
|
+
snp_list_file: Path to file with SNP IDs to extract (one per line).
|
|
40
|
+
chrom: Chromosome number for region-based extraction.
|
|
41
|
+
start: Start position (bp) for region-based extraction.
|
|
42
|
+
end: End position (bp) for region-based extraction.
|
|
43
|
+
species: Species flag ('canine', 'feline', or None for human).
|
|
44
|
+
metric: LD metric ('r2' or 'dprime').
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
List of command arguments for subprocess.
|
|
48
|
+
"""
|
|
49
|
+
cmd = [plink_path]
|
|
50
|
+
|
|
51
|
+
# Species flag
|
|
52
|
+
if species == "canine":
|
|
53
|
+
cmd.append("--dog")
|
|
54
|
+
elif species == "feline":
|
|
55
|
+
cmd.extend(["--chr-set", "18"])
|
|
56
|
+
|
|
57
|
+
# Input and output
|
|
58
|
+
cmd.extend(["--bfile", bfile_path])
|
|
59
|
+
cmd.extend(["--out", output_path])
|
|
60
|
+
|
|
61
|
+
# LD metric and square matrix flag
|
|
62
|
+
if metric == "dprime":
|
|
63
|
+
cmd.extend(["--r", "dprime", "square"])
|
|
64
|
+
else:
|
|
65
|
+
cmd.extend(["--r2", "square"])
|
|
66
|
+
|
|
67
|
+
# Track SNP order in output
|
|
68
|
+
cmd.append("--write-snplist")
|
|
69
|
+
|
|
70
|
+
# SNP extraction mode
|
|
71
|
+
if snp_list_file:
|
|
72
|
+
cmd.extend(["--extract", snp_list_file])
|
|
73
|
+
|
|
74
|
+
# Region-based extraction
|
|
75
|
+
if chrom is not None:
|
|
76
|
+
cmd.extend(["--chr", str(chrom)])
|
|
77
|
+
if start is not None:
|
|
78
|
+
cmd.extend(["--from-bp", str(start)])
|
|
79
|
+
if end is not None:
|
|
80
|
+
cmd.extend(["--to-bp", str(end)])
|
|
81
|
+
|
|
82
|
+
return cmd
|
|
83
|
+
|
|
84
|
+
|
|
19
85
|
def find_plink() -> Optional[str]:
|
|
20
86
|
"""Find PLINK executable on PATH.
|
|
21
87
|
|
|
@@ -84,6 +150,51 @@ def build_ld_command(
|
|
|
84
150
|
return cmd
|
|
85
151
|
|
|
86
152
|
|
|
153
|
+
def parse_pairwise_ld_output(
|
|
154
|
+
ld_file: str, snplist_file: str
|
|
155
|
+
) -> tuple[pd.DataFrame, list[str]]:
|
|
156
|
+
"""Parse PLINK pairwise LD matrix output files.
|
|
157
|
+
|
|
158
|
+
PLINK --r2 square outputs:
|
|
159
|
+
- .ld file: N x N matrix of R2/D' values (whitespace-separated, no headers)
|
|
160
|
+
- .snplist file: SNP IDs in order (one per line)
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
ld_file: Path to .ld output file (square matrix).
|
|
164
|
+
snplist_file: Path to .snplist output file (SNP IDs).
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Tuple of (DataFrame with R2/D' values, list of SNP IDs).
|
|
168
|
+
DataFrame has SNP IDs as both index and columns.
|
|
169
|
+
Returns (empty DataFrame, empty list) if files not found.
|
|
170
|
+
"""
|
|
171
|
+
# Check if files exist
|
|
172
|
+
if not os.path.exists(ld_file) or not os.path.exists(snplist_file):
|
|
173
|
+
return pd.DataFrame(), []
|
|
174
|
+
|
|
175
|
+
# Read SNP list
|
|
176
|
+
with open(snplist_file) as f:
|
|
177
|
+
snp_ids = [line.strip() for line in f if line.strip()]
|
|
178
|
+
|
|
179
|
+
if not snp_ids:
|
|
180
|
+
return pd.DataFrame(), []
|
|
181
|
+
|
|
182
|
+
# Read LD matrix (whitespace-separated, no headers)
|
|
183
|
+
# Values can be numbers or 'nan'
|
|
184
|
+
matrix = pd.read_csv(
|
|
185
|
+
ld_file,
|
|
186
|
+
sep=r"\s+",
|
|
187
|
+
header=None,
|
|
188
|
+
names=snp_ids,
|
|
189
|
+
index_col=False,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Set SNP IDs as row index
|
|
193
|
+
matrix.index = snp_ids
|
|
194
|
+
|
|
195
|
+
return matrix, snp_ids
|
|
196
|
+
|
|
197
|
+
|
|
87
198
|
def parse_ld_output(ld_file: str, lead_snp: str) -> pd.DataFrame:
|
|
88
199
|
"""Parse PLINK .ld output file.
|
|
89
200
|
|
|
@@ -208,3 +319,131 @@ def calculate_ld(
|
|
|
208
319
|
# Clean up temp directory
|
|
209
320
|
if cleanup_working_dir and os.path.exists(working_dir):
|
|
210
321
|
shutil.rmtree(working_dir, ignore_errors=True)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def calculate_pairwise_ld(
|
|
325
|
+
bfile_path: str,
|
|
326
|
+
snp_list: list[str] | None = None,
|
|
327
|
+
chrom: int | None = None,
|
|
328
|
+
start: int | None = None,
|
|
329
|
+
end: int | None = None,
|
|
330
|
+
plink_path: str | None = None,
|
|
331
|
+
working_dir: str | None = None,
|
|
332
|
+
species: str = "canine",
|
|
333
|
+
metric: str = "r2",
|
|
334
|
+
) -> tuple[pd.DataFrame, list[str]]:
|
|
335
|
+
"""Calculate pairwise LD matrix for a set of variants.
|
|
336
|
+
|
|
337
|
+
Runs PLINK --r2 square to compute an N x N LD matrix, suitable for
|
|
338
|
+
LD heatmap visualization.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
bfile_path: Path to PLINK binary fileset (.bed/.bim/.fam prefix).
|
|
342
|
+
snp_list: List of SNP IDs to compute pairwise LD between.
|
|
343
|
+
chrom: Chromosome number for region-based extraction.
|
|
344
|
+
start: Start position (bp) for region-based extraction.
|
|
345
|
+
end: End position (bp) for region-based extraction.
|
|
346
|
+
plink_path: Path to PLINK executable. Auto-detects if None.
|
|
347
|
+
working_dir: Directory for PLINK output files. Uses temp dir if None.
|
|
348
|
+
species: Species flag ('canine', 'feline', or None for human).
|
|
349
|
+
metric: LD metric ('r2' or 'dprime').
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
Tuple of (LD matrix DataFrame, list of SNP IDs).
|
|
353
|
+
DataFrame has SNP IDs as both index and columns.
|
|
354
|
+
Returns (empty DataFrame, empty list) if PLINK fails.
|
|
355
|
+
|
|
356
|
+
Raises:
|
|
357
|
+
FileNotFoundError: If PLINK executable not found.
|
|
358
|
+
ValidationError: If PLINK binary files (.bed/.bim/.fam) are missing.
|
|
359
|
+
ValidationError: If requested SNPs are not found in reference panel.
|
|
360
|
+
|
|
361
|
+
Example:
|
|
362
|
+
>>> matrix, snp_ids = calculate_pairwise_ld(
|
|
363
|
+
... bfile_path="/path/to/genotypes",
|
|
364
|
+
... snp_list=["rs1", "rs2", "rs3"],
|
|
365
|
+
... )
|
|
366
|
+
>>> # matrix is 3x3 DataFrame with LD values
|
|
367
|
+
>>> matrix.loc["rs1", "rs2"] # LD between rs1 and rs2
|
|
368
|
+
"""
|
|
369
|
+
from .utils import ValidationError
|
|
370
|
+
|
|
371
|
+
# Find PLINK
|
|
372
|
+
if plink_path is None:
|
|
373
|
+
plink_path = find_plink()
|
|
374
|
+
if plink_path is None:
|
|
375
|
+
raise FileNotFoundError(
|
|
376
|
+
"PLINK not found. Install PLINK 1.9 or specify plink_path."
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
logger.debug(f"Using PLINK at {plink_path}")
|
|
380
|
+
|
|
381
|
+
# Validate PLINK files exist
|
|
382
|
+
validate_plink_files(bfile_path)
|
|
383
|
+
|
|
384
|
+
# Use temp directory if working_dir not specified
|
|
385
|
+
cleanup_working_dir = False
|
|
386
|
+
if working_dir is None:
|
|
387
|
+
working_dir = tempfile.mkdtemp(prefix="snp_scope_pairwise_ld_")
|
|
388
|
+
cleanup_working_dir = True
|
|
389
|
+
|
|
390
|
+
try:
|
|
391
|
+
os.makedirs(working_dir, exist_ok=True)
|
|
392
|
+
output_prefix = os.path.join(working_dir, "pairwise_ld")
|
|
393
|
+
|
|
394
|
+
# Write SNP list to file if provided
|
|
395
|
+
snp_list_file = None
|
|
396
|
+
if snp_list:
|
|
397
|
+
snp_list_file = os.path.join(working_dir, "snp_list.txt")
|
|
398
|
+
with open(snp_list_file, "w") as f:
|
|
399
|
+
for snp in snp_list:
|
|
400
|
+
f.write(f"{snp}\n")
|
|
401
|
+
|
|
402
|
+
# Build and run PLINK command
|
|
403
|
+
cmd = build_pairwise_ld_command(
|
|
404
|
+
plink_path=plink_path,
|
|
405
|
+
bfile_path=bfile_path,
|
|
406
|
+
output_path=output_prefix,
|
|
407
|
+
snp_list_file=snp_list_file,
|
|
408
|
+
chrom=chrom,
|
|
409
|
+
start=start,
|
|
410
|
+
end=end,
|
|
411
|
+
species=species,
|
|
412
|
+
metric=metric,
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
logger.debug(f"Running PLINK command: {' '.join(cmd)}")
|
|
416
|
+
|
|
417
|
+
result = subprocess.run(
|
|
418
|
+
cmd,
|
|
419
|
+
cwd=working_dir,
|
|
420
|
+
capture_output=True,
|
|
421
|
+
text=True,
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
if result.returncode != 0:
|
|
425
|
+
logger.warning(
|
|
426
|
+
f"PLINK pairwise LD calculation failed: {result.stderr[:200]}"
|
|
427
|
+
)
|
|
428
|
+
return pd.DataFrame(), []
|
|
429
|
+
|
|
430
|
+
# Parse output
|
|
431
|
+
ld_file = f"{output_prefix}.ld"
|
|
432
|
+
snplist_file = f"{output_prefix}.snplist"
|
|
433
|
+
|
|
434
|
+
matrix, found_snps = parse_pairwise_ld_output(ld_file, snplist_file)
|
|
435
|
+
|
|
436
|
+
# Validate all requested SNPs were found
|
|
437
|
+
if snp_list:
|
|
438
|
+
missing_snps = set(snp_list) - set(found_snps)
|
|
439
|
+
if missing_snps:
|
|
440
|
+
raise ValidationError(
|
|
441
|
+
f"SNPs not found in reference panel: {', '.join(sorted(missing_snps))}"
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
return matrix, found_snps
|
|
445
|
+
|
|
446
|
+
finally:
|
|
447
|
+
# Clean up temp directory
|
|
448
|
+
if cleanup_working_dir and os.path.exists(working_dir):
|
|
449
|
+
shutil.rmtree(working_dir, ignore_errors=True)
|