allelix 1.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- allelix/__init__.py +12 -0
- allelix/annotators/__init__.py +90 -0
- allelix/annotators/alphamissense.py +228 -0
- allelix/annotators/base.py +214 -0
- allelix/annotators/cadd.py +283 -0
- allelix/annotators/clinvar.py +404 -0
- allelix/annotators/gnomad.py +212 -0
- allelix/annotators/gwas.py +354 -0
- allelix/annotators/pharmgkb.py +406 -0
- allelix/annotators/snpedia.py +276 -0
- allelix/cli.py +1524 -0
- allelix/compare.py +149 -0
- allelix/config.py +143 -0
- allelix/data/__init__.py +3 -0
- allelix/data/high_value_snps.yaml +64 -0
- allelix/databases/__init__.py +30 -0
- allelix/databases/_versions.py +16 -0
- allelix/databases/alphamissense_loader.py +48 -0
- allelix/databases/cadd_loader.py +49 -0
- allelix/databases/cpic_loader.py +234 -0
- allelix/databases/gnomad_loader.py +49 -0
- allelix/databases/gwas_loader.py +546 -0
- allelix/databases/loader_utils.py +80 -0
- allelix/databases/manager.py +515 -0
- allelix/databases/pharmgkb_loader.py +437 -0
- allelix/databases/schema.py +165 -0
- allelix/databases/snpedia_loader.py +44 -0
- allelix/databases/snpedia_parser.py +342 -0
- allelix/exporters/__init__.py +3 -0
- allelix/exporters/plink.py +144 -0
- allelix/models.py +117 -0
- allelix/parsers/__init__.py +73 -0
- allelix/parsers/_helpers.py +41 -0
- allelix/parsers/ancestrydna.py +130 -0
- allelix/parsers/base.py +97 -0
- allelix/parsers/ftdna.py +129 -0
- allelix/parsers/livingdna.py +121 -0
- allelix/parsers/myhappygenes.py +135 -0
- allelix/parsers/myheritage.py +118 -0
- allelix/parsers/twentythreeandme.py +150 -0
- allelix/py.typed +0 -0
- allelix/reports/__init__.py +40 -0
- allelix/reports/_pipeline.py +497 -0
- allelix/reports/diff.py +169 -0
- allelix/reports/high_value.py +133 -0
- allelix/reports/html.py +1130 -0
- allelix/reports/json_report.py +163 -0
- allelix/reports/methylation.py +50 -0
- allelix/reports/terminal.py +203 -0
- allelix/utils/__init__.py +3 -0
- allelix/utils/allele.py +87 -0
- allelix/utils/build_detect.py +203 -0
- allelix-1.8.1.dist-info/METADATA +276 -0
- allelix-1.8.1.dist-info/RECORD +58 -0
- allelix-1.8.1.dist-info/WHEEL +5 -0
- allelix-1.8.1.dist-info/entry_points.txt +2 -0
- allelix-1.8.1.dist-info/licenses/LICENSE +671 -0
- allelix-1.8.1.dist-info/top_level.txt +1 -0
allelix/cli.py
ADDED
|
@@ -0,0 +1,1524 @@
|
|
|
1
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
2
|
+
# Copyright (C) 2026 dial481
|
|
3
|
+
"""Allelix command-line interface."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import sys
|
|
9
|
+
import time
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import TYPE_CHECKING, NamedTuple
|
|
12
|
+
|
|
13
|
+
import click
|
|
14
|
+
from rich.console import Console
|
|
15
|
+
from rich.table import Table
|
|
16
|
+
|
|
17
|
+
from allelix import __version__
|
|
18
|
+
from allelix.annotators import get_annotators
|
|
19
|
+
from allelix.databases import resolve_data_dir
|
|
20
|
+
from allelix.parsers import ParserNotFoundError, detect_parser, get_parser_by_name
|
|
21
|
+
from allelix.reports._pipeline import run_analysis
|
|
22
|
+
from allelix.reports.diff import compute_diff, load_previous_report
|
|
23
|
+
from allelix.reports.high_value import format_warnings, load_high_value_snps, scan_no_calls
|
|
24
|
+
from allelix.reports.html import render_html
|
|
25
|
+
from allelix.reports.json_report import render_json
|
|
26
|
+
from allelix.reports.methylation import METHYLATION_PANEL_GENES
|
|
27
|
+
from allelix.reports.terminal import render_terminal, render_terminal_diff
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from allelix.annotators.base import Annotator
|
|
31
|
+
from allelix.models import Variant
|
|
32
|
+
from allelix.parsers.base import GenotypeParser
|
|
33
|
+
|
|
34
|
+
console = Console()
|
|
35
|
+
|
|
36
|
+
# Sort 1-22 numerically, then X, Y, MT, then anything else alphabetically.
|
|
37
|
+
_NAMED_CHROM_ORDER = {"X": 0, "Y": 1, "MT": 2}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _chrom_sort_key(chrom: str) -> tuple[int, int, str]:
|
|
41
|
+
"""Sort key: autosomes (1-22), then X/Y/MT, then unknowns alphabetically."""
|
|
42
|
+
if chrom.isdigit():
|
|
43
|
+
return (0, int(chrom), "")
|
|
44
|
+
if chrom in _NAMED_CHROM_ORDER:
|
|
45
|
+
return (1, _NAMED_CHROM_ORDER[chrom], "")
|
|
46
|
+
return (2, 0, chrom)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _percent(part: int, total: int) -> str:
|
|
50
|
+
if total == 0:
|
|
51
|
+
return "0.00%"
|
|
52
|
+
return f"{part / total * 100:.2f}%"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class _WarningCounter(logging.Handler):
|
|
56
|
+
"""Count warning records emitted by the parser pipeline."""
|
|
57
|
+
|
|
58
|
+
def __init__(self) -> None:
|
|
59
|
+
super().__init__(level=logging.WARNING)
|
|
60
|
+
self.count = 0
|
|
61
|
+
|
|
62
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
63
|
+
self.count += 1
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class _LoggerSnapshot(NamedTuple):
|
|
67
|
+
"""Captured state of a Python logger for restoration after CLI mutates it."""
|
|
68
|
+
|
|
69
|
+
level: int
|
|
70
|
+
propagate: bool
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _wire_parser_logging() -> tuple[_WarningCounter, logging.Handler, _LoggerSnapshot]:
|
|
74
|
+
"""Attach warning capture + stderr surfacing to the parsers logger."""
|
|
75
|
+
parser_logger = logging.getLogger("allelix.parsers")
|
|
76
|
+
counter = _WarningCounter()
|
|
77
|
+
stderr_handler = logging.StreamHandler(sys.stderr)
|
|
78
|
+
stderr_handler.setLevel(logging.WARNING)
|
|
79
|
+
stderr_handler.setFormatter(logging.Formatter("warning: %(message)s"))
|
|
80
|
+
snapshot = _LoggerSnapshot(level=parser_logger.level, propagate=parser_logger.propagate)
|
|
81
|
+
parser_logger.addHandler(counter)
|
|
82
|
+
parser_logger.addHandler(stderr_handler)
|
|
83
|
+
parser_logger.setLevel(logging.WARNING)
|
|
84
|
+
parser_logger.propagate = False
|
|
85
|
+
return counter, stderr_handler, snapshot
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _unwire_parser_logging(
|
|
89
|
+
counter: _WarningCounter,
|
|
90
|
+
stderr_handler: logging.Handler,
|
|
91
|
+
snapshot: _LoggerSnapshot,
|
|
92
|
+
) -> None:
|
|
93
|
+
parser_logger = logging.getLogger("allelix.parsers")
|
|
94
|
+
parser_logger.removeHandler(counter)
|
|
95
|
+
parser_logger.removeHandler(stderr_handler)
|
|
96
|
+
parser_logger.setLevel(snapshot.level)
|
|
97
|
+
parser_logger.propagate = snapshot.propagate
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _resolve_parser(file_path: Path, fmt: str | None) -> GenotypeParser:
|
|
101
|
+
try:
|
|
102
|
+
return get_parser_by_name(fmt) if fmt else detect_parser(file_path)
|
|
103
|
+
except ParserNotFoundError as exc:
|
|
104
|
+
raise click.ClickException(str(exc)) from exc
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _ready_annotators(
|
|
108
|
+
data_dir: Path | None,
|
|
109
|
+
*,
|
|
110
|
+
include_benign: bool = False,
|
|
111
|
+
gwas_filter_traits: bool = True,
|
|
112
|
+
cadd_full: bool = False,
|
|
113
|
+
) -> tuple[Path, list[Annotator], list[Annotator]]:
|
|
114
|
+
resolved = resolve_data_dir(data_dir)
|
|
115
|
+
annotators = get_annotators(
|
|
116
|
+
resolved,
|
|
117
|
+
include_benign=include_benign,
|
|
118
|
+
gwas_filter_traits=gwas_filter_traits,
|
|
119
|
+
cadd_full=cadd_full,
|
|
120
|
+
)
|
|
121
|
+
ready: list[Annotator] = []
|
|
122
|
+
not_ready: list[Annotator] = []
|
|
123
|
+
for a in annotators:
|
|
124
|
+
if a.is_ready():
|
|
125
|
+
ready.append(a)
|
|
126
|
+
else:
|
|
127
|
+
not_ready.append(a)
|
|
128
|
+
if not ready:
|
|
129
|
+
names = ", ".join(a.name for a in annotators)
|
|
130
|
+
raise click.ClickException(
|
|
131
|
+
f"No annotators are ready. Run `allelix db update` first. Registered: {names}"
|
|
132
|
+
)
|
|
133
|
+
return resolved, ready, not_ready
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
_STALENESS_SECONDS = 7 * 24 * 60 * 60 # 7 days
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _maybe_refresh_databases(data_dir: Path) -> None:
|
|
140
|
+
"""Check database mtimes; refresh any that are stale and have a changed remote signal.
|
|
141
|
+
|
|
142
|
+
Only runs for annotators that download data (SNPedia excluded).
|
|
143
|
+
If the network is unreachable, warns and continues with stale caches.
|
|
144
|
+
"""
|
|
145
|
+
now = time.time()
|
|
146
|
+
annotators = get_annotators(data_dir)
|
|
147
|
+
for annotator in annotators:
|
|
148
|
+
with annotator:
|
|
149
|
+
if not annotator.requires_download or not annotator.is_ready():
|
|
150
|
+
continue
|
|
151
|
+
# Code-driven sources (commit-pinned HF caches) never change
|
|
152
|
+
# at a fixed URL — skip the HEAD request. See ADR-0030.
|
|
153
|
+
if not annotator.server_driven_freshness:
|
|
154
|
+
continue
|
|
155
|
+
db_files = list(data_dir.glob(f"{annotator.name}*sqlite*"))
|
|
156
|
+
if not db_files:
|
|
157
|
+
continue
|
|
158
|
+
newest_mtime = max(f.stat().st_mtime for f in db_files)
|
|
159
|
+
age = now - newest_mtime
|
|
160
|
+
if age <= _STALENESS_SECONDS:
|
|
161
|
+
continue
|
|
162
|
+
|
|
163
|
+
remote = annotator.fetch_remote_signal()
|
|
164
|
+
if remote is None:
|
|
165
|
+
age_days = int(age / 86400)
|
|
166
|
+
console.print(
|
|
167
|
+
f"[yellow]{annotator.display_name} database is {age_days} days old. "
|
|
168
|
+
"Run `allelix db update` when online.[/yellow]"
|
|
169
|
+
)
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
cached = annotator.cached_remote_signal()
|
|
173
|
+
if cached == remote:
|
|
174
|
+
continue
|
|
175
|
+
|
|
176
|
+
console.print(f"[bold]Updating {annotator.display_name}…[/bold]")
|
|
177
|
+
if _run_setup(annotator):
|
|
178
|
+
console.print(
|
|
179
|
+
f"[green]✓ {annotator.display_name} updated[/green] "
|
|
180
|
+
f"(version {annotator.version() or '(unknown)'})"
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _format_from_path(output: Path, override: str | None) -> str:
|
|
185
|
+
if override:
|
|
186
|
+
return override.lower()
|
|
187
|
+
suffix = output.suffix.lower()
|
|
188
|
+
if suffix == ".html":
|
|
189
|
+
return "html"
|
|
190
|
+
if suffix == ".json":
|
|
191
|
+
return "json"
|
|
192
|
+
raise click.ClickException(
|
|
193
|
+
f"Cannot infer report format from {output.name!r}. "
|
|
194
|
+
"Pass --report-format html|json explicitly."
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _run_analysis_command(
|
|
199
|
+
file_path: Path,
|
|
200
|
+
fmt: str | None,
|
|
201
|
+
data_dir: Path | None,
|
|
202
|
+
output: Path | None,
|
|
203
|
+
report_format: str | None,
|
|
204
|
+
min_magnitude: float,
|
|
205
|
+
category: str | None,
|
|
206
|
+
genes: frozenset[str] | None,
|
|
207
|
+
build: str | None = None,
|
|
208
|
+
include_benign: bool = False,
|
|
209
|
+
gwas_min_magnitude: float | None = None,
|
|
210
|
+
snpedia_min_magnitude: float | None = None,
|
|
211
|
+
exclude_sources: frozenset[str] | None = None,
|
|
212
|
+
gwas_all: bool = False,
|
|
213
|
+
diff_path: Path | None = None,
|
|
214
|
+
no_update: bool = False,
|
|
215
|
+
no_gnomad: bool = False,
|
|
216
|
+
no_alphamissense: bool = False,
|
|
217
|
+
) -> None:
|
|
218
|
+
resolved = resolve_data_dir(data_dir)
|
|
219
|
+
if not no_update:
|
|
220
|
+
_maybe_refresh_databases(resolved)
|
|
221
|
+
parser = _resolve_parser(file_path, fmt)
|
|
222
|
+
|
|
223
|
+
from allelix.config import load_config
|
|
224
|
+
|
|
225
|
+
cfg = load_config(resolved)
|
|
226
|
+
_, ready, not_ready = _ready_annotators(
|
|
227
|
+
data_dir,
|
|
228
|
+
include_benign=include_benign,
|
|
229
|
+
gwas_filter_traits=not gwas_all,
|
|
230
|
+
cadd_full=cfg.cadd_full,
|
|
231
|
+
)
|
|
232
|
+
annotator_classes = {type(a).name: type(a) for a in ready}
|
|
233
|
+
ready = [a for a in ready if cfg.is_enabled(a.name, annotator_classes)]
|
|
234
|
+
|
|
235
|
+
if exclude_sources:
|
|
236
|
+
ready = [a for a in ready if a.name not in exclude_sources]
|
|
237
|
+
|
|
238
|
+
gnomad_annotator = None
|
|
239
|
+
if not no_gnomad:
|
|
240
|
+
from allelix.annotators.gnomad import GnomadAnnotator
|
|
241
|
+
|
|
242
|
+
for a in ready:
|
|
243
|
+
if isinstance(a, GnomadAnnotator):
|
|
244
|
+
gnomad_annotator = a
|
|
245
|
+
break
|
|
246
|
+
ready = [a for a in ready if a.name != "gnomad"]
|
|
247
|
+
|
|
248
|
+
am_annotator = None
|
|
249
|
+
if not no_alphamissense:
|
|
250
|
+
from allelix.annotators.alphamissense import AlphaMissenseAnnotator
|
|
251
|
+
|
|
252
|
+
for a in ready:
|
|
253
|
+
if isinstance(a, AlphaMissenseAnnotator):
|
|
254
|
+
am_annotator = a
|
|
255
|
+
break
|
|
256
|
+
ready = [a for a in ready if a.name != "alphamissense"]
|
|
257
|
+
|
|
258
|
+
cadd_annotator = None
|
|
259
|
+
from allelix.annotators.cadd import CaddAnnotator
|
|
260
|
+
|
|
261
|
+
for a in ready:
|
|
262
|
+
if isinstance(a, CaddAnnotator):
|
|
263
|
+
cadd_annotator = a
|
|
264
|
+
break
|
|
265
|
+
ready = [a for a in ready if a.name != "cadd"]
|
|
266
|
+
|
|
267
|
+
if not_ready:
|
|
268
|
+
names = [a.name for a in not_ready]
|
|
269
|
+
console.print(
|
|
270
|
+
f"[yellow]Skipping unready annotators: {', '.join(names)}[/yellow] "
|
|
271
|
+
"(run `allelix db update` to populate)"
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
all_active: list[Annotator] = list(ready)
|
|
275
|
+
if gnomad_annotator is not None and gnomad_annotator.is_ready():
|
|
276
|
+
all_active.append(gnomad_annotator)
|
|
277
|
+
if am_annotator is not None and am_annotator.is_ready():
|
|
278
|
+
all_active.append(am_annotator)
|
|
279
|
+
if cadd_annotator is not None and cadd_annotator.is_ready():
|
|
280
|
+
all_active.append(cadd_annotator)
|
|
281
|
+
versions = ", ".join(f"{a.display_name} ({a.version() or 'unknown'})" for a in all_active)
|
|
282
|
+
console.print(f"[dim]Analyzing against: {versions}[/dim]")
|
|
283
|
+
|
|
284
|
+
counter, stderr_handler, snapshot = _wire_parser_logging()
|
|
285
|
+
try:
|
|
286
|
+
result = run_analysis(
|
|
287
|
+
file_path,
|
|
288
|
+
parser,
|
|
289
|
+
ready,
|
|
290
|
+
skipped_count_provider=lambda: counter.count,
|
|
291
|
+
build_override=build,
|
|
292
|
+
gnomad=gnomad_annotator,
|
|
293
|
+
alphamissense=am_annotator,
|
|
294
|
+
cadd=cadd_annotator,
|
|
295
|
+
)
|
|
296
|
+
finally:
|
|
297
|
+
_unwire_parser_logging(counter, stderr_handler, snapshot)
|
|
298
|
+
|
|
299
|
+
_emit_build_diagnostics(result)
|
|
300
|
+
|
|
301
|
+
high_value = load_high_value_snps()
|
|
302
|
+
hv_rsids = set(high_value)
|
|
303
|
+
hv_variants: list[Variant] = [v for v in parser.parse(file_path) if v.rsid in hv_rsids]
|
|
304
|
+
hv_warnings = scan_no_calls(hv_variants, high_value)
|
|
305
|
+
if hv_warnings:
|
|
306
|
+
console.print(
|
|
307
|
+
f"[bold red]Warning:[/bold red] {len(hv_warnings)} high-value SNP(s) returned no-call:"
|
|
308
|
+
)
|
|
309
|
+
for line in format_warnings(hv_warnings):
|
|
310
|
+
console.print(f" [red]⚠[/red] {line}")
|
|
311
|
+
|
|
312
|
+
if counter.count:
|
|
313
|
+
console.print(
|
|
314
|
+
f"[yellow]Note:[/yellow] {counter.count:,} malformed line(s) skipped "
|
|
315
|
+
"(see warnings on stderr)."
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
source_floors: dict[str, float] | None = None
|
|
319
|
+
if gwas_min_magnitude is not None or snpedia_min_magnitude is not None:
|
|
320
|
+
source_floors = {}
|
|
321
|
+
if gwas_min_magnitude is not None:
|
|
322
|
+
source_floors["gwas"] = gwas_min_magnitude
|
|
323
|
+
if snpedia_min_magnitude is not None:
|
|
324
|
+
source_floors["snpedia"] = snpedia_min_magnitude
|
|
325
|
+
|
|
326
|
+
diff_result = None
|
|
327
|
+
if diff_path is not None:
|
|
328
|
+
try:
|
|
329
|
+
prev = load_previous_report(diff_path)
|
|
330
|
+
except ValueError as exc:
|
|
331
|
+
raise click.ClickException(str(exc)) from exc
|
|
332
|
+
filtered_for_diff = result.filter(
|
|
333
|
+
min_magnitude=min_magnitude,
|
|
334
|
+
category=category,
|
|
335
|
+
genes=genes,
|
|
336
|
+
source_min_magnitudes=source_floors,
|
|
337
|
+
)
|
|
338
|
+
from allelix.reports._pipeline import rollup_gwas_duplicates
|
|
339
|
+
|
|
340
|
+
filtered_for_diff = rollup_gwas_duplicates(filtered_for_diff)
|
|
341
|
+
diff_result = compute_diff(
|
|
342
|
+
filtered_for_diff,
|
|
343
|
+
prev["annotations"],
|
|
344
|
+
prev.get("generated_at", ""),
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
if output is None:
|
|
348
|
+
if diff_result is not None:
|
|
349
|
+
rendered = render_terminal_diff(diff_result, console)
|
|
350
|
+
else:
|
|
351
|
+
rendered = render_terminal(
|
|
352
|
+
result,
|
|
353
|
+
console=console,
|
|
354
|
+
min_magnitude=min_magnitude,
|
|
355
|
+
category=category,
|
|
356
|
+
genes=genes,
|
|
357
|
+
source_min_magnitudes=source_floors,
|
|
358
|
+
)
|
|
359
|
+
else:
|
|
360
|
+
chosen = _format_from_path(output, report_format)
|
|
361
|
+
hv_warning_lines = format_warnings(hv_warnings) if hv_warnings else None
|
|
362
|
+
if chosen == "json":
|
|
363
|
+
hv_dicts = (
|
|
364
|
+
[{"rsid": w.snp.rsid, "gene": w.snp.gene, "note": w.snp.note} for w in hv_warnings]
|
|
365
|
+
if hv_warnings
|
|
366
|
+
else None
|
|
367
|
+
)
|
|
368
|
+
rendered = render_json(
|
|
369
|
+
result,
|
|
370
|
+
output_path=output,
|
|
371
|
+
min_magnitude=min_magnitude,
|
|
372
|
+
category=category,
|
|
373
|
+
genes=genes,
|
|
374
|
+
source_min_magnitudes=source_floors,
|
|
375
|
+
diff=diff_result,
|
|
376
|
+
high_value_no_calls=hv_dicts,
|
|
377
|
+
)
|
|
378
|
+
else:
|
|
379
|
+
rendered = render_html(
|
|
380
|
+
result,
|
|
381
|
+
output_path=output,
|
|
382
|
+
min_magnitude=min_magnitude,
|
|
383
|
+
category=category,
|
|
384
|
+
genes=genes,
|
|
385
|
+
source_min_magnitudes=source_floors,
|
|
386
|
+
diff=diff_result,
|
|
387
|
+
high_value_no_calls=hv_warning_lines,
|
|
388
|
+
)
|
|
389
|
+
console.print(f"[green]Wrote {rendered:,} annotation(s) to {output}[/green]")
|
|
390
|
+
|
|
391
|
+
console.print(
|
|
392
|
+
f"[dim]{len(result.annotations):,} total annotation(s) from {len(ready)} "
|
|
393
|
+
f"database(s) across {result.total_variants:,} variant(s).[/dim]"
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
@click.group()
|
|
398
|
+
@click.version_option(version=__version__, prog_name="allelix")
|
|
399
|
+
def main() -> None:
|
|
400
|
+
"""Allelix: open-source genotype analysis toolkit."""
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
@main.command()
|
|
404
|
+
@click.argument(
|
|
405
|
+
"file_path",
|
|
406
|
+
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
|
407
|
+
)
|
|
408
|
+
@click.option(
|
|
409
|
+
"--format",
|
|
410
|
+
"fmt",
|
|
411
|
+
default=None,
|
|
412
|
+
help="Force a specific parser (e.g., myhappygenes). Default: auto-detect.",
|
|
413
|
+
)
|
|
414
|
+
def stats(file_path: Path, fmt: str | None) -> None:
|
|
415
|
+
"""Show summary statistics for a genotype file."""
|
|
416
|
+
parser = _resolve_parser(file_path, fmt)
|
|
417
|
+
counter, stderr_handler, snapshot = _wire_parser_logging()
|
|
418
|
+
|
|
419
|
+
high_value = load_high_value_snps()
|
|
420
|
+
hv_rsids = set(high_value)
|
|
421
|
+
hv_variants: list[Variant] = []
|
|
422
|
+
|
|
423
|
+
total = 0
|
|
424
|
+
no_calls = 0
|
|
425
|
+
het = 0
|
|
426
|
+
hom = 0
|
|
427
|
+
chrom_counts: dict[str, int] = {}
|
|
428
|
+
try:
|
|
429
|
+
metadata = parser.get_metadata(file_path)
|
|
430
|
+
for variant in parser.parse(file_path):
|
|
431
|
+
total += 1
|
|
432
|
+
if variant.rsid in hv_rsids:
|
|
433
|
+
hv_variants.append(variant)
|
|
434
|
+
if variant.is_no_call:
|
|
435
|
+
no_calls += 1
|
|
436
|
+
elif variant.is_heterozygous:
|
|
437
|
+
het += 1
|
|
438
|
+
else:
|
|
439
|
+
hom += 1
|
|
440
|
+
chrom_counts[variant.chromosome] = chrom_counts.get(variant.chromosome, 0) + 1
|
|
441
|
+
finally:
|
|
442
|
+
_unwire_parser_logging(counter, stderr_handler, snapshot)
|
|
443
|
+
|
|
444
|
+
summary = Table(title=f"Genotype File Stats: {file_path.name}")
|
|
445
|
+
summary.add_column("Metric", style="cyan", no_wrap=True)
|
|
446
|
+
summary.add_column("Value", style="green")
|
|
447
|
+
summary.add_row("Format", parser.display_name)
|
|
448
|
+
summary.add_row("Sample ID", metadata["sample_id"] or "(unknown)")
|
|
449
|
+
summary.add_row("Build", metadata["build"])
|
|
450
|
+
summary.add_row("Total SNPs", f"{total:,}")
|
|
451
|
+
summary.add_row("No-calls", f"{no_calls:,} ({_percent(no_calls, total)})")
|
|
452
|
+
summary.add_row("Heterozygous", f"{het:,} ({_percent(het, total)})")
|
|
453
|
+
summary.add_row("Homozygous", f"{hom:,} ({_percent(hom, total)})")
|
|
454
|
+
if counter.count:
|
|
455
|
+
summary.add_row(
|
|
456
|
+
"Skipped (malformed)",
|
|
457
|
+
f"[yellow]{counter.count:,}[/yellow] (see warnings on stderr)",
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
hv_warnings = scan_no_calls(hv_variants, high_value)
|
|
461
|
+
if hv_warnings:
|
|
462
|
+
summary.add_row(
|
|
463
|
+
"High-value no-calls",
|
|
464
|
+
f"[red]{len(hv_warnings)}[/red]",
|
|
465
|
+
)
|
|
466
|
+
console.print(summary)
|
|
467
|
+
|
|
468
|
+
if hv_warnings:
|
|
469
|
+
for line in format_warnings(hv_warnings):
|
|
470
|
+
console.print(f" [red]⚠[/red] {line}")
|
|
471
|
+
|
|
472
|
+
chrom_table = Table(title="Variants per Chromosome")
|
|
473
|
+
chrom_table.add_column("Chromosome", style="cyan", no_wrap=True)
|
|
474
|
+
chrom_table.add_column("Count", style="green", justify="right")
|
|
475
|
+
for chrom in sorted(chrom_counts, key=_chrom_sort_key):
|
|
476
|
+
chrom_table.add_row(chrom, f"{chrom_counts[chrom]:,}")
|
|
477
|
+
console.print(chrom_table)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
_FILE_ARG = click.argument(
|
|
481
|
+
"file_path", type=click.Path(exists=True, dir_okay=False, path_type=Path)
|
|
482
|
+
)
|
|
483
|
+
_FORMAT_OPT = click.option(
|
|
484
|
+
"--format", "fmt", default=None, help="Force a specific parser. Default: auto-detect."
|
|
485
|
+
)
|
|
486
|
+
_DATA_DIR_OPT = click.option(
|
|
487
|
+
"--data-dir",
|
|
488
|
+
type=click.Path(file_okay=False, path_type=Path),
|
|
489
|
+
default=None,
|
|
490
|
+
help="Override database cache location.",
|
|
491
|
+
)
|
|
492
|
+
_MIN_MAG_OPT = click.option(
|
|
493
|
+
"--min-magnitude",
|
|
494
|
+
type=float,
|
|
495
|
+
default=5.0,
|
|
496
|
+
show_default=True,
|
|
497
|
+
help="Filter annotations below this magnitude. Use 0 for the full unfiltered set.",
|
|
498
|
+
)
|
|
499
|
+
_OUTPUT_OPT = click.option(
|
|
500
|
+
"--output",
|
|
501
|
+
type=click.Path(dir_okay=False, path_type=Path),
|
|
502
|
+
default=None,
|
|
503
|
+
help="Write a report file (.html or .json). Omit for terminal output.",
|
|
504
|
+
)
|
|
505
|
+
_REPORT_FORMAT_OPT = click.option(
|
|
506
|
+
"--report-format",
|
|
507
|
+
type=click.Choice(["html", "json"], case_sensitive=False),
|
|
508
|
+
default=None,
|
|
509
|
+
help="Override report format detection (otherwise inferred from --output extension).",
|
|
510
|
+
)
|
|
511
|
+
_INCLUDE_BENIGN_OPT = click.option(
|
|
512
|
+
"--include-benign",
|
|
513
|
+
is_flag=True,
|
|
514
|
+
default=False,
|
|
515
|
+
help="Include ClinVar Benign/Likely_benign annotations (suppressed by default).",
|
|
516
|
+
)
|
|
517
|
+
_GWAS_MIN_MAG_OPT = click.option(
|
|
518
|
+
"--gwas-min-magnitude",
|
|
519
|
+
type=float,
|
|
520
|
+
default=9.0,
|
|
521
|
+
show_default=True,
|
|
522
|
+
help="Magnitude floor for GWAS Catalog annotations (overrides --min-magnitude for GWAS).",
|
|
523
|
+
)
|
|
524
|
+
_SNPEDIA_MIN_MAG_OPT = click.option(
|
|
525
|
+
"--snpedia-min-magnitude",
|
|
526
|
+
type=float,
|
|
527
|
+
default=2.0,
|
|
528
|
+
show_default=True,
|
|
529
|
+
help="Magnitude floor for SNPedia annotations (overrides --min-magnitude for SNPedia).",
|
|
530
|
+
)
|
|
531
|
+
_INCLUDE_GWAS_OPT = click.option(
|
|
532
|
+
"--include-gwas",
|
|
533
|
+
is_flag=True,
|
|
534
|
+
default=False,
|
|
535
|
+
help="Include GWAS Catalog annotations (excluded by default in focused reports).",
|
|
536
|
+
)
|
|
537
|
+
_EXCLUDE_SNPEDIA_OPT = click.option(
|
|
538
|
+
"--exclude-snpedia",
|
|
539
|
+
is_flag=True,
|
|
540
|
+
default=False,
|
|
541
|
+
help="Exclude SNPedia annotations. Required for commercial use (CC BY-NC-SA 3.0).",
|
|
542
|
+
)
|
|
543
|
+
_GWAS_ALL_OPT = click.option(
|
|
544
|
+
"--gwas-all",
|
|
545
|
+
is_flag=True,
|
|
546
|
+
default=False,
|
|
547
|
+
help="Include all GWAS trait categories (disables default noise filtering).",
|
|
548
|
+
)
|
|
549
|
+
_DIFF_OPT = click.option(
|
|
550
|
+
"--diff",
|
|
551
|
+
"diff_path",
|
|
552
|
+
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
|
553
|
+
default=None,
|
|
554
|
+
help=(
|
|
555
|
+
"Dev/QA tool: compare current output against a previous JSON report "
|
|
556
|
+
"to detect regressions from code changes, database refreshes, or "
|
|
557
|
+
"filter adjustments. Shows new, changed, and removed annotations. "
|
|
558
|
+
"Not a monitoring tool — use for version-to-version validation."
|
|
559
|
+
),
|
|
560
|
+
)
|
|
561
|
+
_NO_UPDATE_OPT = click.option(
|
|
562
|
+
"--no-update",
|
|
563
|
+
is_flag=True,
|
|
564
|
+
default=False,
|
|
565
|
+
help="Skip the pre-analysis database freshness check.",
|
|
566
|
+
)
|
|
567
|
+
_NO_GNOMAD_OPT = click.option(
|
|
568
|
+
"--no-gnomad",
|
|
569
|
+
is_flag=True,
|
|
570
|
+
default=False,
|
|
571
|
+
help="Skip gnomAD population frequency enrichment.",
|
|
572
|
+
)
|
|
573
|
+
_NO_ALPHAMISSENSE_OPT = click.option(
|
|
574
|
+
"--no-alphamissense",
|
|
575
|
+
is_flag=True,
|
|
576
|
+
default=False,
|
|
577
|
+
help="Skip AlphaMissense variant pathogenicity enrichment.",
|
|
578
|
+
)
|
|
579
|
+
_BUILD_OPT = click.option(
|
|
580
|
+
"--build",
|
|
581
|
+
type=click.Choice(["grch37", "grch38", "auto"], case_sensitive=False),
|
|
582
|
+
default="auto",
|
|
583
|
+
help=(
|
|
584
|
+
"Genome build of the input file. 'auto' detects from position data "
|
|
585
|
+
"(ADR-0021) and ignores the file header. 'grch37' / 'grch38' force a "
|
|
586
|
+
"specific build, skipping detection."
|
|
587
|
+
),
|
|
588
|
+
)
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def _resolve_clinvar_builds(value: str) -> tuple[str, ...]:
|
|
592
|
+
"""Map a `db update --build` value to a tuple of build identifiers."""
|
|
593
|
+
v = (value or "both").strip().lower()
|
|
594
|
+
if v == "both":
|
|
595
|
+
return ("GRCh37", "GRCh38")
|
|
596
|
+
if v == "grch37":
|
|
597
|
+
return ("GRCh37",)
|
|
598
|
+
if v == "grch38":
|
|
599
|
+
return ("GRCh38",)
|
|
600
|
+
raise click.ClickException(f"Unknown --build value {value!r}")
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def _normalize_cli_build(value: str | None) -> str | None:
|
|
604
|
+
"""Map a --build CLI value to a canonical build identifier or None for auto."""
|
|
605
|
+
if value is None:
|
|
606
|
+
return None
|
|
607
|
+
v = value.strip().lower()
|
|
608
|
+
if v in ("", "auto"):
|
|
609
|
+
return None
|
|
610
|
+
if v == "grch37":
|
|
611
|
+
return "GRCh37"
|
|
612
|
+
if v == "grch38":
|
|
613
|
+
return "GRCh38"
|
|
614
|
+
raise click.ClickException(f"Unknown --build value {value!r}")
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
def _emit_build_diagnostics(result: object) -> None:
|
|
618
|
+
"""Print a one-line build banner and a warning on header/data mismatch."""
|
|
619
|
+
diag = getattr(result, "build_diagnostics", None)
|
|
620
|
+
if diag is None:
|
|
621
|
+
return
|
|
622
|
+
matched = f"{diag.matched_count}/{diag.inspected_count}" if diag.inspected_count else "0/0"
|
|
623
|
+
if diag.override:
|
|
624
|
+
source = "override"
|
|
625
|
+
elif diag.detected_build:
|
|
626
|
+
source = "detected"
|
|
627
|
+
elif diag.header_build:
|
|
628
|
+
source = "header (no position confirmation)"
|
|
629
|
+
else:
|
|
630
|
+
source = "fallback (no known SNPs matched)"
|
|
631
|
+
console.print(
|
|
632
|
+
f"[dim]Build: {diag.effective_build} ({source}; "
|
|
633
|
+
f"{matched} known-SNP positions matched)[/dim]"
|
|
634
|
+
)
|
|
635
|
+
if diag.mismatch:
|
|
636
|
+
console.print(
|
|
637
|
+
f"[yellow]Build mismatch: file header claims {diag.header_build} but "
|
|
638
|
+
f"position data is {diag.detected_build}. Using {diag.detected_build}. "
|
|
639
|
+
f"This is a real-world data-quality issue — your provider may have "
|
|
640
|
+
f"mislabeled the build (see ADR-0021).[/yellow]"
|
|
641
|
+
)
|
|
642
|
+
if diag.effective_build == "GRCh36":
|
|
643
|
+
console.print(
|
|
644
|
+
"[yellow]Warning: GRCh36 (hg18) detected. rsID-based annotations "
|
|
645
|
+
"(PharmGKB, GWAS Catalog, SNPedia, gnomAD) are complete. ClinVar "
|
|
646
|
+
"position-matching is skipped (no GRCh36 cache — see ADR-0025). "
|
|
647
|
+
"For full ClinVar coverage, liftOver to GRCh38 first: "
|
|
648
|
+
"docs/grch36-liftover.md[/yellow]"
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
@main.command()
|
|
653
|
+
@_FILE_ARG
|
|
654
|
+
@_FORMAT_OPT
|
|
655
|
+
@_DATA_DIR_OPT
|
|
656
|
+
@_MIN_MAG_OPT
|
|
657
|
+
@click.option(
|
|
658
|
+
"--category",
|
|
659
|
+
type=str,
|
|
660
|
+
default=None,
|
|
661
|
+
help="Filter to a single bucket (clinical, pharma).",
|
|
662
|
+
)
|
|
663
|
+
@_OUTPUT_OPT
|
|
664
|
+
@_REPORT_FORMAT_OPT
|
|
665
|
+
@_BUILD_OPT
|
|
666
|
+
@_INCLUDE_BENIGN_OPT
|
|
667
|
+
@_GWAS_MIN_MAG_OPT
|
|
668
|
+
@_SNPEDIA_MIN_MAG_OPT
|
|
669
|
+
@_GWAS_ALL_OPT
|
|
670
|
+
@_EXCLUDE_SNPEDIA_OPT
|
|
671
|
+
@_DIFF_OPT
|
|
672
|
+
@_NO_UPDATE_OPT
|
|
673
|
+
@_NO_GNOMAD_OPT
|
|
674
|
+
@_NO_ALPHAMISSENSE_OPT
|
|
675
|
+
def analyze(
|
|
676
|
+
file_path: Path,
|
|
677
|
+
fmt: str | None,
|
|
678
|
+
data_dir: Path | None,
|
|
679
|
+
min_magnitude: float,
|
|
680
|
+
category: str | None,
|
|
681
|
+
output: Path | None,
|
|
682
|
+
report_format: str | None,
|
|
683
|
+
build: str,
|
|
684
|
+
include_benign: bool,
|
|
685
|
+
gwas_min_magnitude: float,
|
|
686
|
+
snpedia_min_magnitude: float,
|
|
687
|
+
gwas_all: bool,
|
|
688
|
+
exclude_snpedia: bool,
|
|
689
|
+
diff_path: Path | None,
|
|
690
|
+
no_update: bool,
|
|
691
|
+
no_gnomad: bool,
|
|
692
|
+
no_alphamissense: bool,
|
|
693
|
+
) -> None:
|
|
694
|
+
"""Annotate a genotype file against all ready reference databases."""
|
|
695
|
+
_run_analysis_command(
|
|
696
|
+
file_path=file_path,
|
|
697
|
+
fmt=fmt,
|
|
698
|
+
data_dir=data_dir,
|
|
699
|
+
output=output,
|
|
700
|
+
report_format=report_format,
|
|
701
|
+
min_magnitude=min_magnitude,
|
|
702
|
+
category=category,
|
|
703
|
+
genes=None,
|
|
704
|
+
build=_normalize_cli_build(build),
|
|
705
|
+
include_benign=include_benign,
|
|
706
|
+
gwas_min_magnitude=gwas_min_magnitude,
|
|
707
|
+
snpedia_min_magnitude=snpedia_min_magnitude,
|
|
708
|
+
exclude_sources=frozenset({"snpedia"}) if exclude_snpedia else None,
|
|
709
|
+
gwas_all=gwas_all,
|
|
710
|
+
diff_path=diff_path,
|
|
711
|
+
no_update=no_update,
|
|
712
|
+
no_gnomad=no_gnomad,
|
|
713
|
+
no_alphamissense=no_alphamissense,
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
@main.command()
|
|
718
|
+
@_FILE_ARG
|
|
719
|
+
@_FORMAT_OPT
|
|
720
|
+
@click.option(
|
|
721
|
+
"--snps",
|
|
722
|
+
required=True,
|
|
723
|
+
help="Comma-separated rsIDs to extract (e.g., rs1801133,rs4680).",
|
|
724
|
+
)
|
|
725
|
+
def extract(file_path: Path, fmt: str | None, snps: str) -> None:
|
|
726
|
+
"""Print diploid genotypes for specific rsIDs — spot-check carrier status.
|
|
727
|
+
|
|
728
|
+
Useful for verifying ClinVar / PharmGKB hits against the actual file
|
|
729
|
+
before trusting them. The "Genotype" column shows the diploid call as
|
|
730
|
+
the array (or VCF) reported it; "Het?" and "No-call?" answer the
|
|
731
|
+
questions the carrier rule (ADR-0007) actually checks.
|
|
732
|
+
"""
|
|
733
|
+
parser = _resolve_parser(file_path, fmt)
|
|
734
|
+
wanted = {s.strip() for s in snps.split(",") if s.strip()}
|
|
735
|
+
if not wanted:
|
|
736
|
+
raise click.ClickException("--snps cannot be empty.")
|
|
737
|
+
|
|
738
|
+
counter, stderr_handler, snapshot = _wire_parser_logging()
|
|
739
|
+
found: dict[str, object] = {}
|
|
740
|
+
try:
|
|
741
|
+
for variant in parser.parse(file_path):
|
|
742
|
+
if variant.rsid in wanted:
|
|
743
|
+
found[variant.rsid] = variant
|
|
744
|
+
if len(found) == len(wanted):
|
|
745
|
+
break # streaming early-exit once we have everything
|
|
746
|
+
finally:
|
|
747
|
+
_unwire_parser_logging(counter, stderr_handler, snapshot)
|
|
748
|
+
|
|
749
|
+
table = Table(title=f"Genotypes from {file_path.name}")
|
|
750
|
+
table.add_column("rsID", style="cyan", no_wrap=True)
|
|
751
|
+
table.add_column("Chr", no_wrap=True)
|
|
752
|
+
table.add_column("Position", justify="right")
|
|
753
|
+
table.add_column("Genotype", style="yellow", no_wrap=True)
|
|
754
|
+
table.add_column("Het?", justify="center")
|
|
755
|
+
table.add_column("No-call?", justify="center")
|
|
756
|
+
for rsid in sorted(wanted):
|
|
757
|
+
variant = found.get(rsid)
|
|
758
|
+
if variant is None:
|
|
759
|
+
table.add_row(rsid, "—", "—", "[red]not in file[/red]", "—", "—")
|
|
760
|
+
continue
|
|
761
|
+
table.add_row(
|
|
762
|
+
variant.rsid,
|
|
763
|
+
variant.chromosome,
|
|
764
|
+
f"{variant.position:,}",
|
|
765
|
+
variant.genotype,
|
|
766
|
+
"yes" if variant.is_heterozygous else "no",
|
|
767
|
+
"[red]yes[/red]" if variant.is_no_call else "no",
|
|
768
|
+
)
|
|
769
|
+
console.print(table)
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
@main.command()
|
|
773
|
+
@click.argument("file1", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
|
774
|
+
@click.argument("file2", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
|
775
|
+
@click.option("--format1", "fmt1", default=None, help="Force parser for file 1.")
|
|
776
|
+
@click.option("--format2", "fmt2", default=None, help="Force parser for file 2.")
|
|
777
|
+
def compare(file1: Path, file2: Path, fmt1: str | None, fmt2: str | None) -> None:
|
|
778
|
+
"""Compare two genotype files — coverage overlap and concordance.
|
|
779
|
+
|
|
780
|
+
Reports shared rsIDs, file-specific rsIDs, genotype agreement,
|
|
781
|
+
strand-flip matches (complementary alleles on opposite strands),
|
|
782
|
+
discordant calls, and strand-ambiguous positions.
|
|
783
|
+
"""
|
|
784
|
+
from allelix.compare import compare_variants
|
|
785
|
+
from allelix.utils.build_detect import detect_build
|
|
786
|
+
|
|
787
|
+
parser1 = _resolve_parser(file1, fmt1)
|
|
788
|
+
parser2 = _resolve_parser(file2, fmt2)
|
|
789
|
+
variants1 = list(parser1.parse(file1))
|
|
790
|
+
variants2 = list(parser2.parse(file2))
|
|
791
|
+
|
|
792
|
+
det1 = detect_build(variants1)
|
|
793
|
+
det2 = detect_build(variants2)
|
|
794
|
+
build1 = det1.build or parser1.get_metadata(file1).get("build", "unknown")
|
|
795
|
+
build2 = det2.build or parser2.get_metadata(file2).get("build", "unknown")
|
|
796
|
+
|
|
797
|
+
result = compare_variants(variants1, variants2, build1=build1, build2=build2)
|
|
798
|
+
|
|
799
|
+
if result.build1 != result.build2:
|
|
800
|
+
console.print(
|
|
801
|
+
f"[yellow]Warning: builds differ ({result.build1} vs {result.build2}). "
|
|
802
|
+
"Position-based comparisons may be unreliable.[/yellow]"
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
table = Table(title="Coverage Summary")
|
|
806
|
+
table.add_column("Metric", style="bold")
|
|
807
|
+
table.add_column("Value", justify="right")
|
|
808
|
+
table.add_row("File 1", f"{file1.name} ({result.file1_total:,} variants)")
|
|
809
|
+
table.add_row("File 2", f"{file2.name} ({result.file2_total:,} variants)")
|
|
810
|
+
table.add_row("Build (file 1)", result.build1)
|
|
811
|
+
table.add_row("Build (file 2)", result.build2)
|
|
812
|
+
table.add_row("Shared rsIDs", f"{result.shared:,}")
|
|
813
|
+
table.add_row("File 1 only", f"{result.file1_only:,}")
|
|
814
|
+
table.add_row("File 2 only", f"{result.file2_only:,}")
|
|
815
|
+
console.print(table)
|
|
816
|
+
|
|
817
|
+
conc_table = Table(title="Genotype Concordance")
|
|
818
|
+
conc_table.add_column("Category", style="bold")
|
|
819
|
+
conc_table.add_column("Count", justify="right")
|
|
820
|
+
conc_table.add_column("%", justify="right")
|
|
821
|
+
for label, count in [
|
|
822
|
+
("Concordant", result.concordant),
|
|
823
|
+
("Strand-flip match", result.strand_flip_match),
|
|
824
|
+
("Discordant", result.discordant),
|
|
825
|
+
("Strand-ambiguous", result.strand_ambiguous),
|
|
826
|
+
("No-call (either file)", result.no_call),
|
|
827
|
+
]:
|
|
828
|
+
pct = _percent(count, result.shared) if result.shared else "—"
|
|
829
|
+
conc_table.add_row(label, f"{count:,}", pct)
|
|
830
|
+
console.print(conc_table)
|
|
831
|
+
|
|
832
|
+
if result.chromosome_counts:
|
|
833
|
+
chrom_table = Table(title="Per-Chromosome Breakdown")
|
|
834
|
+
chrom_table.add_column("Chr", style="cyan", no_wrap=True)
|
|
835
|
+
chrom_table.add_column("Concordant", justify="right")
|
|
836
|
+
chrom_table.add_column("Flip", justify="right")
|
|
837
|
+
chrom_table.add_column("Discordant", justify="right")
|
|
838
|
+
chrom_table.add_column("Ambiguous", justify="right")
|
|
839
|
+
chrom_table.add_column("No-call", justify="right")
|
|
840
|
+
for chrom in sorted(result.chromosome_counts, key=_chrom_sort_key):
|
|
841
|
+
c = result.chromosome_counts[chrom]
|
|
842
|
+
chrom_table.add_row(
|
|
843
|
+
chrom,
|
|
844
|
+
str(c.get("concordant", 0)),
|
|
845
|
+
str(c.get("strand_flip_match", 0)),
|
|
846
|
+
str(c.get("discordant", 0)),
|
|
847
|
+
str(c.get("strand_ambiguous", 0)),
|
|
848
|
+
str(c.get("no_call", 0)),
|
|
849
|
+
)
|
|
850
|
+
console.print(chrom_table)
|
|
851
|
+
|
|
852
|
+
|
|
853
|
+
@main.command()
|
|
854
|
+
@_FILE_ARG
|
|
855
|
+
@_FORMAT_OPT
|
|
856
|
+
@_DATA_DIR_OPT
|
|
857
|
+
@_MIN_MAG_OPT
|
|
858
|
+
@_OUTPUT_OPT
|
|
859
|
+
@_REPORT_FORMAT_OPT
|
|
860
|
+
@_BUILD_OPT
|
|
861
|
+
@_INCLUDE_BENIGN_OPT
|
|
862
|
+
@_GWAS_MIN_MAG_OPT
|
|
863
|
+
@_SNPEDIA_MIN_MAG_OPT
|
|
864
|
+
@_INCLUDE_GWAS_OPT
|
|
865
|
+
@_GWAS_ALL_OPT
|
|
866
|
+
@_EXCLUDE_SNPEDIA_OPT
|
|
867
|
+
@_DIFF_OPT
|
|
868
|
+
@_NO_UPDATE_OPT
|
|
869
|
+
@_NO_GNOMAD_OPT
|
|
870
|
+
@_NO_ALPHAMISSENSE_OPT
|
|
871
|
+
def methylation(
|
|
872
|
+
file_path: Path,
|
|
873
|
+
fmt: str | None,
|
|
874
|
+
data_dir: Path | None,
|
|
875
|
+
min_magnitude: float,
|
|
876
|
+
output: Path | None,
|
|
877
|
+
report_format: str | None,
|
|
878
|
+
build: str,
|
|
879
|
+
include_benign: bool,
|
|
880
|
+
gwas_min_magnitude: float,
|
|
881
|
+
snpedia_min_magnitude: float,
|
|
882
|
+
include_gwas: bool,
|
|
883
|
+
gwas_all: bool,
|
|
884
|
+
exclude_snpedia: bool,
|
|
885
|
+
diff_path: Path | None,
|
|
886
|
+
no_update: bool,
|
|
887
|
+
no_gnomad: bool,
|
|
888
|
+
no_alphamissense: bool,
|
|
889
|
+
) -> None:
|
|
890
|
+
"""Methylation-pathway-focused report (MTHFR, MTR, MTRR, COMT, CBS, …)."""
|
|
891
|
+
excluded: set[str] = set()
|
|
892
|
+
if not include_gwas:
|
|
893
|
+
excluded.add("gwas")
|
|
894
|
+
if exclude_snpedia:
|
|
895
|
+
excluded.add("snpedia")
|
|
896
|
+
_run_analysis_command(
|
|
897
|
+
file_path=file_path,
|
|
898
|
+
fmt=fmt,
|
|
899
|
+
data_dir=data_dir,
|
|
900
|
+
output=output,
|
|
901
|
+
report_format=report_format,
|
|
902
|
+
min_magnitude=min_magnitude,
|
|
903
|
+
category=None,
|
|
904
|
+
genes=METHYLATION_PANEL_GENES,
|
|
905
|
+
build=_normalize_cli_build(build),
|
|
906
|
+
include_benign=include_benign,
|
|
907
|
+
gwas_min_magnitude=gwas_min_magnitude,
|
|
908
|
+
snpedia_min_magnitude=snpedia_min_magnitude,
|
|
909
|
+
exclude_sources=frozenset(excluded) if excluded else None,
|
|
910
|
+
gwas_all=gwas_all,
|
|
911
|
+
diff_path=diff_path,
|
|
912
|
+
no_update=no_update,
|
|
913
|
+
no_gnomad=no_gnomad,
|
|
914
|
+
no_alphamissense=no_alphamissense,
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
|
|
918
|
+
@main.command()
|
|
919
|
+
@_FILE_ARG
|
|
920
|
+
@_FORMAT_OPT
|
|
921
|
+
@_DATA_DIR_OPT
|
|
922
|
+
@_MIN_MAG_OPT
|
|
923
|
+
@_OUTPUT_OPT
|
|
924
|
+
@_REPORT_FORMAT_OPT
|
|
925
|
+
@_BUILD_OPT
|
|
926
|
+
@_INCLUDE_BENIGN_OPT
|
|
927
|
+
@_GWAS_MIN_MAG_OPT
|
|
928
|
+
@_SNPEDIA_MIN_MAG_OPT
|
|
929
|
+
@_INCLUDE_GWAS_OPT
|
|
930
|
+
@_GWAS_ALL_OPT
|
|
931
|
+
@_EXCLUDE_SNPEDIA_OPT
|
|
932
|
+
@_DIFF_OPT
|
|
933
|
+
@_NO_UPDATE_OPT
|
|
934
|
+
@_NO_GNOMAD_OPT
|
|
935
|
+
@_NO_ALPHAMISSENSE_OPT
|
|
936
|
+
def pharmacogenomics(
|
|
937
|
+
file_path: Path,
|
|
938
|
+
fmt: str | None,
|
|
939
|
+
data_dir: Path | None,
|
|
940
|
+
min_magnitude: float,
|
|
941
|
+
output: Path | None,
|
|
942
|
+
report_format: str | None,
|
|
943
|
+
build: str,
|
|
944
|
+
include_benign: bool,
|
|
945
|
+
gwas_min_magnitude: float,
|
|
946
|
+
snpedia_min_magnitude: float,
|
|
947
|
+
include_gwas: bool,
|
|
948
|
+
gwas_all: bool,
|
|
949
|
+
exclude_snpedia: bool,
|
|
950
|
+
diff_path: Path | None,
|
|
951
|
+
no_update: bool,
|
|
952
|
+
no_gnomad: bool,
|
|
953
|
+
no_alphamissense: bool,
|
|
954
|
+
) -> None:
|
|
955
|
+
"""Pharmacogenomics-focused report (annotations from PharmGKB-style sources)."""
|
|
956
|
+
excluded: set[str] = set()
|
|
957
|
+
if not include_gwas:
|
|
958
|
+
excluded.add("gwas")
|
|
959
|
+
if exclude_snpedia:
|
|
960
|
+
excluded.add("snpedia")
|
|
961
|
+
_run_analysis_command(
|
|
962
|
+
file_path=file_path,
|
|
963
|
+
fmt=fmt,
|
|
964
|
+
data_dir=data_dir,
|
|
965
|
+
output=output,
|
|
966
|
+
report_format=report_format,
|
|
967
|
+
min_magnitude=min_magnitude,
|
|
968
|
+
category="pharma",
|
|
969
|
+
genes=None,
|
|
970
|
+
build=_normalize_cli_build(build),
|
|
971
|
+
include_benign=include_benign,
|
|
972
|
+
gwas_min_magnitude=gwas_min_magnitude,
|
|
973
|
+
snpedia_min_magnitude=snpedia_min_magnitude,
|
|
974
|
+
exclude_sources=frozenset(excluded) if excluded else None,
|
|
975
|
+
gwas_all=gwas_all,
|
|
976
|
+
diff_path=diff_path,
|
|
977
|
+
no_update=no_update,
|
|
978
|
+
no_gnomad=no_gnomad,
|
|
979
|
+
no_alphamissense=no_alphamissense,
|
|
980
|
+
)
|
|
981
|
+
|
|
982
|
+
|
|
983
|
+
@main.group()
|
|
984
|
+
def db() -> None:
|
|
985
|
+
"""Manage local reference database cache."""
|
|
986
|
+
|
|
987
|
+
|
|
988
|
+
def _stamp_remote_signal(annotator: Annotator, signal: str) -> None:
|
|
989
|
+
"""Write a remote signal to an existing cache without re-downloading."""
|
|
990
|
+
import contextlib
|
|
991
|
+
import sqlite3
|
|
992
|
+
|
|
993
|
+
from allelix.databases.manager import stamp_remote_signal
|
|
994
|
+
|
|
995
|
+
db_path = getattr(annotator, "_db_path", None)
|
|
996
|
+
if db_path is None:
|
|
997
|
+
return
|
|
998
|
+
with contextlib.closing(sqlite3.connect(db_path)) as conn:
|
|
999
|
+
stamp_remote_signal(conn, annotator.name, signal)
|
|
1000
|
+
conn.commit()
|
|
1001
|
+
|
|
1002
|
+
|
|
1003
|
+
def _confirm_cadd_license(*, license_held: bool = False) -> bool:
|
|
1004
|
+
"""Show the CADD license notice and ask for confirmation."""
|
|
1005
|
+
if license_held:
|
|
1006
|
+
console.print(
|
|
1007
|
+
"\n[bold yellow]CADD License Notice[/bold yellow]\n"
|
|
1008
|
+
"Commercial license asserted. Proceeding with CADD download.\n"
|
|
1009
|
+
)
|
|
1010
|
+
return True
|
|
1011
|
+
console.print(
|
|
1012
|
+
"\n[bold yellow]CADD License Notice[/bold yellow]\n"
|
|
1013
|
+
"CADD scores are provided by the University of Washington.\n"
|
|
1014
|
+
"Commercial use requires a license from UW CoMotion\n"
|
|
1015
|
+
"([link=https://els2.comotion.uw.edu/product/cadd-scores]"
|
|
1016
|
+
"https://els2.comotion.uw.edu/product/cadd-scores[/link]).\n"
|
|
1017
|
+
"By continuing, you confirm that your use is non-commercial\n"
|
|
1018
|
+
"or that you hold a valid commercial license.\n"
|
|
1019
|
+
)
|
|
1020
|
+
return click.confirm("Continue with CADD download?", default=False)
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
def _run_setup(annotator: Annotator) -> bool:
|
|
1024
|
+
"""Invoke annotator.setup(). Returns True on success, False on failure."""
|
|
1025
|
+
try:
|
|
1026
|
+
annotator.setup()
|
|
1027
|
+
except Exception as exc:
|
|
1028
|
+
if hasattr(exc, "close"):
|
|
1029
|
+
exc.close()
|
|
1030
|
+
console.print(f" [red]{annotator.name}: {exc}[/red]")
|
|
1031
|
+
return False
|
|
1032
|
+
sig = getattr(annotator, "cached_remote_signal", lambda: None)()
|
|
1033
|
+
if sig and "cpic:unavailable" in sig:
|
|
1034
|
+
console.print(
|
|
1035
|
+
f" [yellow]{annotator.name}: updated (CPIC unavailable — "
|
|
1036
|
+
"non-finding filter degraded, retry later)[/yellow]"
|
|
1037
|
+
)
|
|
1038
|
+
return True
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
@db.command("update")
|
|
1042
|
+
@_DATA_DIR_OPT
|
|
1043
|
+
@click.option(
|
|
1044
|
+
"--force",
|
|
1045
|
+
is_flag=True,
|
|
1046
|
+
default=False,
|
|
1047
|
+
help="Re-download even if the local cache appears current.",
|
|
1048
|
+
)
|
|
1049
|
+
@click.option(
|
|
1050
|
+
"--no-gnomad",
|
|
1051
|
+
is_flag=True,
|
|
1052
|
+
default=False,
|
|
1053
|
+
help="Skip gnomAD population frequency database.",
|
|
1054
|
+
)
|
|
1055
|
+
@click.option(
|
|
1056
|
+
"--no-alphamissense",
|
|
1057
|
+
is_flag=True,
|
|
1058
|
+
default=False,
|
|
1059
|
+
help="Skip AlphaMissense pathogenicity database.",
|
|
1060
|
+
)
|
|
1061
|
+
@click.option(
|
|
1062
|
+
"--cadd",
|
|
1063
|
+
"include_cadd",
|
|
1064
|
+
is_flag=True,
|
|
1065
|
+
default=False,
|
|
1066
|
+
help="Download CADD deleteriousness scores (non-commercial use only; disabled by default).",
|
|
1067
|
+
)
|
|
1068
|
+
@click.option(
|
|
1069
|
+
"--build",
|
|
1070
|
+
type=click.Choice(["grch37", "grch38", "both"], case_sensitive=False),
|
|
1071
|
+
default="both",
|
|
1072
|
+
help=(
|
|
1073
|
+
"Which ClinVar genome build(s) to download. 'both' (default) keeps "
|
|
1074
|
+
"GRCh37 and GRCh38 caches in sync so `analyze` can dispatch by "
|
|
1075
|
+
"detected build (ADR-0021). 'grch37' / 'grch38' restrict to one to "
|
|
1076
|
+
"save bandwidth."
|
|
1077
|
+
),
|
|
1078
|
+
)
|
|
1079
|
+
def db_update(
|
|
1080
|
+
data_dir: Path | None,
|
|
1081
|
+
force: bool,
|
|
1082
|
+
no_gnomad: bool,
|
|
1083
|
+
no_alphamissense: bool,
|
|
1084
|
+
include_cadd: bool,
|
|
1085
|
+
build: str,
|
|
1086
|
+
) -> None:
|
|
1087
|
+
"""Download or refresh reference databases.
|
|
1088
|
+
|
|
1089
|
+
For each annotator:
|
|
1090
|
+
- no cache → download
|
|
1091
|
+
- --force → download
|
|
1092
|
+
- cache + remote signal matches cache → skip
|
|
1093
|
+
- cache + remote signal differs (or legacy v0.4.1 cache with no
|
|
1094
|
+
stored signal) → download
|
|
1095
|
+
- cache + remote signal can't be fetched → skip with notice (use
|
|
1096
|
+
--force to override)
|
|
1097
|
+
|
|
1098
|
+
`--build` selects which ClinVar build(s) to manage. Default 'both'
|
|
1099
|
+
downloads GRCh37 and GRCh38 caches.
|
|
1100
|
+
"""
|
|
1101
|
+
resolved = resolve_data_dir(data_dir)
|
|
1102
|
+
console.print(f"Data directory: [cyan]{resolved}[/cyan]")
|
|
1103
|
+
|
|
1104
|
+
from allelix.config import load_config
|
|
1105
|
+
|
|
1106
|
+
cfg = load_config(resolved)
|
|
1107
|
+
|
|
1108
|
+
clinvar_builds = _resolve_clinvar_builds(build)
|
|
1109
|
+
for annotator in get_annotators(
|
|
1110
|
+
resolved, clinvar_builds=clinvar_builds, cadd_full=cfg.cadd_full
|
|
1111
|
+
):
|
|
1112
|
+
with annotator:
|
|
1113
|
+
if no_gnomad and annotator.name == "gnomad":
|
|
1114
|
+
console.print(f" [dim]{annotator.name}: skipped (--no-gnomad)[/dim]")
|
|
1115
|
+
continue
|
|
1116
|
+
if no_alphamissense and annotator.name == "alphamissense":
|
|
1117
|
+
console.print(f" [dim]{annotator.name}: skipped (--no-alphamissense)[/dim]")
|
|
1118
|
+
continue
|
|
1119
|
+
|
|
1120
|
+
if annotator.name == "cadd":
|
|
1121
|
+
if not include_cadd and not cfg.is_enabled("cadd"):
|
|
1122
|
+
console.print(
|
|
1123
|
+
f" [dim]{annotator.name}: disabled "
|
|
1124
|
+
"(enable with `allelix config set sources.cadd true` "
|
|
1125
|
+
"or pass `--cadd`)[/dim]"
|
|
1126
|
+
)
|
|
1127
|
+
continue
|
|
1128
|
+
if (not annotator.is_ready() or force) and not _confirm_cadd_license(
|
|
1129
|
+
license_held=cfg.license_held("cadd"),
|
|
1130
|
+
):
|
|
1131
|
+
console.print(f" [dim]{annotator.name}: skipped (declined)[/dim]")
|
|
1132
|
+
continue
|
|
1133
|
+
|
|
1134
|
+
if not annotator.requires_download:
|
|
1135
|
+
if annotator.is_ready():
|
|
1136
|
+
console.print(
|
|
1137
|
+
f" [dim]{annotator.name}: ready "
|
|
1138
|
+
f"({annotator.version() or 'unknown'})[/dim]"
|
|
1139
|
+
)
|
|
1140
|
+
continue
|
|
1141
|
+
|
|
1142
|
+
if not annotator.is_ready():
|
|
1143
|
+
console.print(f" [bold]{annotator.name}[/bold]: downloading…")
|
|
1144
|
+
if _run_setup(annotator):
|
|
1145
|
+
console.print(
|
|
1146
|
+
f" [green]✓ {annotator.name} ready[/green] "
|
|
1147
|
+
f"(version {annotator.version() or '(unknown)'})"
|
|
1148
|
+
)
|
|
1149
|
+
continue
|
|
1150
|
+
|
|
1151
|
+
if force:
|
|
1152
|
+
console.print(f" [bold]{annotator.name}[/bold]: --force; refreshing…")
|
|
1153
|
+
if _run_setup(annotator):
|
|
1154
|
+
console.print(
|
|
1155
|
+
f" [green]✓ {annotator.name} refreshed[/green] "
|
|
1156
|
+
f"(version {annotator.version() or '(unknown)'})"
|
|
1157
|
+
)
|
|
1158
|
+
continue
|
|
1159
|
+
|
|
1160
|
+
# Code-driven sources (commit-pinned HF caches) are updated
|
|
1161
|
+
# only via code changes — no runtime freshness probe needed.
|
|
1162
|
+
if not annotator.server_driven_freshness:
|
|
1163
|
+
console.print(
|
|
1164
|
+
f" [dim]{annotator.name}: already current "
|
|
1165
|
+
f"(version {annotator.version() or '(unknown)'})[/dim]"
|
|
1166
|
+
)
|
|
1167
|
+
continue
|
|
1168
|
+
|
|
1169
|
+
remote = annotator.fetch_remote_signal()
|
|
1170
|
+
if remote is None:
|
|
1171
|
+
console.print(
|
|
1172
|
+
f" [yellow]{annotator.name}: cache present, but remote "
|
|
1173
|
+
"freshness can't be verified (network error or no signal). "
|
|
1174
|
+
"Pass --force to refresh anyway.[/yellow]"
|
|
1175
|
+
)
|
|
1176
|
+
continue
|
|
1177
|
+
|
|
1178
|
+
cached = annotator.cached_remote_signal()
|
|
1179
|
+
if cached == remote:
|
|
1180
|
+
console.print(
|
|
1181
|
+
f" [dim]{annotator.name}: already current "
|
|
1182
|
+
f"(version {annotator.version() or '(unknown)'})[/dim]"
|
|
1183
|
+
)
|
|
1184
|
+
continue
|
|
1185
|
+
|
|
1186
|
+
if cached is None:
|
|
1187
|
+
_stamp_remote_signal(annotator, remote)
|
|
1188
|
+
console.print(
|
|
1189
|
+
f" [dim]{annotator.name}: stamped remote signal "
|
|
1190
|
+
f"(version {annotator.version() or '(unknown)'})[/dim]"
|
|
1191
|
+
)
|
|
1192
|
+
continue
|
|
1193
|
+
|
|
1194
|
+
console.print(f" [bold]{annotator.name}[/bold]: remote signal changed; refreshing…")
|
|
1195
|
+
if _run_setup(annotator):
|
|
1196
|
+
console.print(
|
|
1197
|
+
f" [green]✓ {annotator.name} refreshed[/green] "
|
|
1198
|
+
f"(version {annotator.version() or '(unknown)'})"
|
|
1199
|
+
)
|
|
1200
|
+
|
|
1201
|
+
|
|
1202
|
+
@db.command("status")
|
|
1203
|
+
@_DATA_DIR_OPT
|
|
1204
|
+
def db_status(data_dir: Path | None) -> None:
|
|
1205
|
+
"""Show installed reference database versions and freshness."""
|
|
1206
|
+
from allelix.config import load_config
|
|
1207
|
+
|
|
1208
|
+
resolved = resolve_data_dir(data_dir)
|
|
1209
|
+
cfg = load_config(resolved)
|
|
1210
|
+
table = Table(title=f"Reference Databases ({resolved})")
|
|
1211
|
+
table.add_column("Annotator", style="cyan", no_wrap=True)
|
|
1212
|
+
table.add_column("Ready", justify="center")
|
|
1213
|
+
table.add_column("Version")
|
|
1214
|
+
table.add_column("Records", justify="right")
|
|
1215
|
+
for annotator in get_annotators(resolved, cadd_full=cfg.cadd_full):
|
|
1216
|
+
with annotator:
|
|
1217
|
+
ready = annotator.is_ready()
|
|
1218
|
+
ready_marker = "[green]yes[/green]" if ready else "[red]no[/red]"
|
|
1219
|
+
version = annotator.version() or "—"
|
|
1220
|
+
sig = getattr(annotator, "cached_remote_signal", lambda: None)()
|
|
1221
|
+
if sig and "cpic:unavailable" in sig:
|
|
1222
|
+
version += " (no CPIC)"
|
|
1223
|
+
records = "—"
|
|
1224
|
+
count_fn = getattr(annotator, "record_count", None)
|
|
1225
|
+
if callable(count_fn):
|
|
1226
|
+
count = count_fn()
|
|
1227
|
+
if count is not None:
|
|
1228
|
+
records = f"{count:,}"
|
|
1229
|
+
table.add_row(annotator.display_name, ready_marker, version, records)
|
|
1230
|
+
console.print(table)
|
|
1231
|
+
|
|
1232
|
+
|
|
1233
|
+
@main.group()
|
|
1234
|
+
def config() -> None:
|
|
1235
|
+
"""Manage persistent configuration (source toggles, license mode)."""
|
|
1236
|
+
|
|
1237
|
+
|
|
1238
|
+
@config.command("show")
|
|
1239
|
+
@_DATA_DIR_OPT
|
|
1240
|
+
def config_show(data_dir: Path | None) -> None:
|
|
1241
|
+
"""Display current configuration."""
|
|
1242
|
+
from allelix.annotators import _ANNOTATOR_CLASSES
|
|
1243
|
+
from allelix.annotators.base import Permission
|
|
1244
|
+
from allelix.annotators.base import permission as check_permission
|
|
1245
|
+
from allelix.config import load_config
|
|
1246
|
+
|
|
1247
|
+
resolved = resolve_data_dir(data_dir)
|
|
1248
|
+
cfg = load_config(resolved)
|
|
1249
|
+
|
|
1250
|
+
table = Table(title=f"Configuration ({resolved / 'config.toml'})")
|
|
1251
|
+
table.add_column("Source", style="cyan", no_wrap=True)
|
|
1252
|
+
table.add_column("Enabled", justify="center")
|
|
1253
|
+
table.add_column("Note", style="dim")
|
|
1254
|
+
for name, enabled in sorted(cfg.sources.items()):
|
|
1255
|
+
cls = _ANNOTATOR_CLASSES.get(name)
|
|
1256
|
+
note = ""
|
|
1257
|
+
if cls is not None:
|
|
1258
|
+
perm = check_permission(
|
|
1259
|
+
cls.license,
|
|
1260
|
+
commercial=cfg.commercial,
|
|
1261
|
+
license_held=cfg.license_held(name),
|
|
1262
|
+
)
|
|
1263
|
+
if perm is Permission.BLOCK_PURCHASABLE:
|
|
1264
|
+
marker = "[red]no[/red]"
|
|
1265
|
+
note = f"requires commercial license — purchase: {cls.license.purchase_url}"
|
|
1266
|
+
elif perm is Permission.BLOCK_FINAL:
|
|
1267
|
+
marker = "[red]no[/red]"
|
|
1268
|
+
note = "no commercial license is available"
|
|
1269
|
+
elif enabled:
|
|
1270
|
+
marker = "[green]yes[/green]"
|
|
1271
|
+
else:
|
|
1272
|
+
marker = "[red]no[/red]"
|
|
1273
|
+
elif enabled:
|
|
1274
|
+
marker = "[green]yes[/green]"
|
|
1275
|
+
else:
|
|
1276
|
+
marker = "[red]no[/red]"
|
|
1277
|
+
table.add_row(name, marker, note)
|
|
1278
|
+
console.print(table)
|
|
1279
|
+
mode = "[yellow]commercial[/yellow]" if cfg.commercial else "[green]personal[/green]"
|
|
1280
|
+
console.print(f"License mode: {mode}")
|
|
1281
|
+
|
|
1282
|
+
|
|
1283
|
+
@config.command("get")
|
|
1284
|
+
@_DATA_DIR_OPT
|
|
1285
|
+
@click.argument("key", required=False, default=None)
|
|
1286
|
+
def config_get(data_dir: Path | None, key: str | None) -> None:
|
|
1287
|
+
r"""Get a configuration value (or dump entire config).
|
|
1288
|
+
|
|
1289
|
+
\b
|
|
1290
|
+
Keys:
|
|
1291
|
+
sources.<name> Show if a source is enabled
|
|
1292
|
+
license.commercial Show commercial mode
|
|
1293
|
+
license.<source> Show if a license is asserted for <source>
|
|
1294
|
+
options.cadd_full Show full CADD tabix mode
|
|
1295
|
+
|
|
1296
|
+
\b
|
|
1297
|
+
Examples:
|
|
1298
|
+
allelix config get # dump entire config
|
|
1299
|
+
allelix config get sources.cadd # true
|
|
1300
|
+
allelix config get license.cadd # false
|
|
1301
|
+
allelix config get options.cadd_full # false
|
|
1302
|
+
"""
|
|
1303
|
+
from allelix.config import _serialize, load_config
|
|
1304
|
+
|
|
1305
|
+
resolved = resolve_data_dir(data_dir)
|
|
1306
|
+
cfg = load_config(resolved)
|
|
1307
|
+
|
|
1308
|
+
if key is None:
|
|
1309
|
+
console.print(f"[dim]Config: {resolved / 'config.toml'}[/dim]")
|
|
1310
|
+
click.echo(_serialize(cfg))
|
|
1311
|
+
return
|
|
1312
|
+
|
|
1313
|
+
if key.startswith("sources."):
|
|
1314
|
+
source_name = key[len("sources.") :]
|
|
1315
|
+
val = cfg.sources.get(source_name)
|
|
1316
|
+
if val is None:
|
|
1317
|
+
raise click.ClickException(
|
|
1318
|
+
f"Unknown source {source_name!r}. Known sources: {', '.join(sorted(cfg.sources))}"
|
|
1319
|
+
)
|
|
1320
|
+
click.echo(str(val).lower())
|
|
1321
|
+
elif key == "license.commercial":
|
|
1322
|
+
click.echo(str(cfg.commercial).lower())
|
|
1323
|
+
elif key.startswith("license."):
|
|
1324
|
+
source_name = key[len("license.") :]
|
|
1325
|
+
click.echo(str(cfg.license_held(source_name)).lower())
|
|
1326
|
+
elif key == "options.cadd_full":
|
|
1327
|
+
click.echo(str(cfg.cadd_full).lower())
|
|
1328
|
+
else:
|
|
1329
|
+
raise click.ClickException(
|
|
1330
|
+
f"Unknown key {key!r}. Use 'sources.<name>', 'license.commercial', "
|
|
1331
|
+
"'license.<source>', or 'options.cadd_full'."
|
|
1332
|
+
)
|
|
1333
|
+
|
|
1334
|
+
|
|
1335
|
+
@config.command("set")
|
|
1336
|
+
@_DATA_DIR_OPT
|
|
1337
|
+
@click.argument("key")
|
|
1338
|
+
@click.argument("value")
|
|
1339
|
+
def config_set(data_dir: Path | None, key: str, value: str) -> None:
|
|
1340
|
+
r"""Set a configuration value.
|
|
1341
|
+
|
|
1342
|
+
\b
|
|
1343
|
+
Keys:
|
|
1344
|
+
sources.<name> Enable/disable a source (true/false)
|
|
1345
|
+
license.commercial Set commercial mode (true/false)
|
|
1346
|
+
license.<source> Assert you hold a commercial license for <source>
|
|
1347
|
+
options.cadd_full Use full CADD tabix file instead of cache (true/false)
|
|
1348
|
+
|
|
1349
|
+
\b
|
|
1350
|
+
Examples:
|
|
1351
|
+
allelix config set sources.snpedia false
|
|
1352
|
+
allelix config set license.commercial true
|
|
1353
|
+
allelix config set license.cadd true
|
|
1354
|
+
allelix config set options.cadd_full true
|
|
1355
|
+
"""
|
|
1356
|
+
from allelix.config import load_config, save_config
|
|
1357
|
+
|
|
1358
|
+
resolved = resolve_data_dir(data_dir)
|
|
1359
|
+
cfg = load_config(resolved)
|
|
1360
|
+
|
|
1361
|
+
val_lower = value.strip().lower()
|
|
1362
|
+
if val_lower not in ("true", "false"):
|
|
1363
|
+
raise click.ClickException(f"Value must be 'true' or 'false', got {value!r}")
|
|
1364
|
+
bool_val = val_lower == "true"
|
|
1365
|
+
|
|
1366
|
+
if key.startswith("sources."):
|
|
1367
|
+
source_name = key[len("sources.") :]
|
|
1368
|
+
cfg.sources[source_name] = bool_val
|
|
1369
|
+
elif key == "license.commercial":
|
|
1370
|
+
cfg.commercial = bool_val
|
|
1371
|
+
elif key.startswith("license."):
|
|
1372
|
+
source_name = key[len("license.") :]
|
|
1373
|
+
if bool_val:
|
|
1374
|
+
from allelix.annotators import get_annotator_class
|
|
1375
|
+
|
|
1376
|
+
cls = get_annotator_class(source_name)
|
|
1377
|
+
if cls is not None and not cls.license.licensable:
|
|
1378
|
+
raise click.ClickException(
|
|
1379
|
+
f"{source_name} is not commercially licensable. "
|
|
1380
|
+
f"This assertion has no effect and cannot be set."
|
|
1381
|
+
)
|
|
1382
|
+
cfg.license_overrides[source_name] = True
|
|
1383
|
+
else:
|
|
1384
|
+
from allelix.annotators import get_annotator_class
|
|
1385
|
+
|
|
1386
|
+
if (
|
|
1387
|
+
get_annotator_class(source_name) is None
|
|
1388
|
+
and source_name not in cfg.license_overrides
|
|
1389
|
+
):
|
|
1390
|
+
console.print(f"[yellow]Warning: unknown source {source_name!r}[/yellow]")
|
|
1391
|
+
cfg.license_overrides.pop(source_name, None)
|
|
1392
|
+
elif key == "options.cadd_full":
|
|
1393
|
+
cfg.cadd_full = bool_val
|
|
1394
|
+
else:
|
|
1395
|
+
raise click.ClickException(
|
|
1396
|
+
f"Unknown key {key!r}. Use 'sources.<name>', 'license.commercial', "
|
|
1397
|
+
"'license.<source>', or 'options.cadd_full'."
|
|
1398
|
+
)
|
|
1399
|
+
|
|
1400
|
+
save_config(resolved, cfg)
|
|
1401
|
+
console.print(f"[dim]Config: {resolved / 'config.toml'}[/dim]")
|
|
1402
|
+
console.print(f"[green]Set {key} = {val_lower}[/green]")
|
|
1403
|
+
|
|
1404
|
+
|
|
1405
|
+
@main.group()
|
|
1406
|
+
def export() -> None:
|
|
1407
|
+
"""Export parsed genotype data to other formats."""
|
|
1408
|
+
|
|
1409
|
+
|
|
1410
|
+
@export.command("plink")
|
|
1411
|
+
@_FILE_ARG
|
|
1412
|
+
@click.option(
|
|
1413
|
+
"--output-prefix",
|
|
1414
|
+
"-o",
|
|
1415
|
+
type=click.Path(path_type=Path),
|
|
1416
|
+
default=None,
|
|
1417
|
+
help="Base path for .bed/.bim/.fam (default: input stem).",
|
|
1418
|
+
)
|
|
1419
|
+
@_FORMAT_OPT
|
|
1420
|
+
@_BUILD_OPT
|
|
1421
|
+
@_DATA_DIR_OPT
|
|
1422
|
+
def export_plink_cmd(
|
|
1423
|
+
file_path: Path,
|
|
1424
|
+
output_prefix: Path | None,
|
|
1425
|
+
fmt: str | None,
|
|
1426
|
+
build: str,
|
|
1427
|
+
data_dir: Path | None,
|
|
1428
|
+
) -> None:
|
|
1429
|
+
"""Convert to PLINK1 binary format (.bed/.bim/.fam).
|
|
1430
|
+
|
|
1431
|
+
Produces a single-sample, SNP-major .bed file suitable for downstream
|
|
1432
|
+
tools (plink2 PCA, ADMIXTURE, PRSice). Uses gnomAD ref/alt for allele
|
|
1433
|
+
coding when available; falls back to monomorphic (A2=0) for positions
|
|
1434
|
+
without gnomAD coverage.
|
|
1435
|
+
"""
|
|
1436
|
+
from allelix.exporters.plink import _orient_genotype, export_plink
|
|
1437
|
+
|
|
1438
|
+
parser = _resolve_parser(file_path, fmt)
|
|
1439
|
+
prefix = output_prefix if output_prefix else file_path.with_suffix("")
|
|
1440
|
+
build_override = _normalize_cli_build(build)
|
|
1441
|
+
metadata = parser.get_metadata(file_path)
|
|
1442
|
+
effective_build = build_override or metadata.get("build", "GRCh37")
|
|
1443
|
+
resolved = resolve_data_dir(data_dir)
|
|
1444
|
+
|
|
1445
|
+
variants = list(parser.parse(file_path))
|
|
1446
|
+
|
|
1447
|
+
# Sort by chromosome then position so the .bim has contiguous
|
|
1448
|
+
# chromosome blocks — PLINK1.9 rejects split chromosomes.
|
|
1449
|
+
chrom_order = {str(i): i for i in range(1, 23)}
|
|
1450
|
+
chrom_order.update({"X": 23, "Y": 24, "XY": 25, "MT": 26})
|
|
1451
|
+
variants.sort(
|
|
1452
|
+
key=lambda v: (chrom_order.get(v.chromosome, 99), v.chromosome, v.position),
|
|
1453
|
+
)
|
|
1454
|
+
|
|
1455
|
+
variant_by_rsid: dict[str, Variant] = {}
|
|
1456
|
+
for v in variants:
|
|
1457
|
+
if not v.is_no_call:
|
|
1458
|
+
variant_by_rsid[v.rsid] = v
|
|
1459
|
+
rsids = set(variant_by_rsid)
|
|
1460
|
+
|
|
1461
|
+
ref_alt_map: dict[str, tuple[str, str]] = {}
|
|
1462
|
+
gnomad = None
|
|
1463
|
+
try:
|
|
1464
|
+
from allelix.annotators.gnomad import GnomadAnnotator
|
|
1465
|
+
|
|
1466
|
+
gnomad = GnomadAnnotator(resolved)
|
|
1467
|
+
if gnomad.is_ready():
|
|
1468
|
+
coord_map = gnomad.bulk_resolve_coordinates(rsids)
|
|
1469
|
+
for rsid, coords in coord_map.items():
|
|
1470
|
+
if len(coords) == 1:
|
|
1471
|
+
_, _, ref, alt = coords[0]
|
|
1472
|
+
ref_alt_map[rsid] = (ref, alt)
|
|
1473
|
+
else:
|
|
1474
|
+
v = variant_by_rsid[rsid]
|
|
1475
|
+
pair = {v.allele1, v.allele2}
|
|
1476
|
+
for _, _, ref, alt in coords:
|
|
1477
|
+
if _orient_genotype(
|
|
1478
|
+
v.allele1, v.allele2, ref, alt
|
|
1479
|
+
) is not None and pair <= {ref, alt}:
|
|
1480
|
+
ref_alt_map[rsid] = (ref, alt)
|
|
1481
|
+
break
|
|
1482
|
+
else:
|
|
1483
|
+
for _, _, ref, alt in coords:
|
|
1484
|
+
if _orient_genotype(v.allele1, v.allele2, ref, alt) is not None:
|
|
1485
|
+
ref_alt_map[rsid] = (ref, alt)
|
|
1486
|
+
break
|
|
1487
|
+
except Exception:
|
|
1488
|
+
console.print(
|
|
1489
|
+
"[yellow]gnomAD coordinate resolution failed; using fallback allele coding.[/yellow]"
|
|
1490
|
+
)
|
|
1491
|
+
finally:
|
|
1492
|
+
if gnomad is not None:
|
|
1493
|
+
gnomad.close()
|
|
1494
|
+
|
|
1495
|
+
written, skipped, indel_skip, mono = export_plink(
|
|
1496
|
+
iter(variants), prefix, effective_build, ref_alt_map or None
|
|
1497
|
+
)
|
|
1498
|
+
skip_parts = []
|
|
1499
|
+
if skipped:
|
|
1500
|
+
skip_parts.append(f"{skipped:,} no-calls")
|
|
1501
|
+
if indel_skip:
|
|
1502
|
+
skip_parts.append(f"{indel_skip:,} indels")
|
|
1503
|
+
skip_msg = f" ({', '.join(skip_parts)} skipped)" if skip_parts else ""
|
|
1504
|
+
console.print(f"Wrote {written:,} variants to {prefix}.bed/.bim/.fam{skip_msg}")
|
|
1505
|
+
if mono > 0:
|
|
1506
|
+
pct = mono / written * 100 if written else 0
|
|
1507
|
+
console.print(
|
|
1508
|
+
f"[dim]{mono:,} markers ({pct:.0f}%) exported as monomorphic "
|
|
1509
|
+
f"(A2=0, ref/alt unknown or ambiguous).[/dim]"
|
|
1510
|
+
)
|
|
1511
|
+
if not ref_alt_map:
|
|
1512
|
+
console.print(
|
|
1513
|
+
"[yellow]gnomAD not available — all homozygous markers exported "
|
|
1514
|
+
"as monomorphic.[/yellow]"
|
|
1515
|
+
)
|
|
1516
|
+
console.print("[yellow]Run `allelix db update` first for proper allele coding.[/yellow]")
|
|
1517
|
+
console.print(
|
|
1518
|
+
"[dim]Single-sample export. Merging with other samples requires "
|
|
1519
|
+
"allele harmonization (--merge-mode or set-all-var-ids).[/dim]"
|
|
1520
|
+
)
|
|
1521
|
+
|
|
1522
|
+
|
|
1523
|
+
if __name__ == "__main__":
|
|
1524
|
+
main()
|