allelix 2.0.1__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {allelix-2.0.1 → allelix-2.0.2}/PKG-INFO +13 -1
- {allelix-2.0.1 → allelix-2.0.2}/README.md +12 -0
- allelix-2.0.2/allelix/__init__.py +41 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/annotators/base.py +24 -1
- {allelix-2.0.1 → allelix-2.0.2}/allelix/cli/_helpers.py +20 -3
- allelix-2.0.2/allelix/data/high_value_snps.yaml +136 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/parsers/base.py +8 -1
- {allelix-2.0.1 → allelix-2.0.2}/allelix/parsers/vcf.py +30 -7
- {allelix-2.0.1 → allelix-2.0.2}/allelix/reports/_pipeline.py +81 -6
- {allelix-2.0.1 → allelix-2.0.2}/allelix.egg-info/PKG-INFO +13 -1
- {allelix-2.0.1 → allelix-2.0.2}/pyproject.toml +1 -1
- {allelix-2.0.1 → allelix-2.0.2}/tests/test_end_to_end.py +28 -6
- allelix-2.0.2/tests/test_version.py +100 -0
- allelix-2.0.1/allelix/__init__.py +0 -12
- allelix-2.0.1/allelix/data/high_value_snps.yaml +0 -64
- allelix-2.0.1/tests/test_version.py +0 -52
- {allelix-2.0.1 → allelix-2.0.2}/LICENSE +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/annotators/__init__.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/annotators/alphamissense.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/annotators/cadd.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/annotators/clinvar.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/annotators/gnomad.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/annotators/gwas.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/annotators/pharmgkb.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/annotators/snpedia.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/cli/__init__.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/cli/_options.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/cli/analyze.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/cli/config.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/cli/db.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/cli/focused.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/cli/utility.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/compare.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/config.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/data/__init__.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/data/clinvar_clnsig_snapshot.yaml +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/databases/__init__.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/databases/_versions.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/databases/alphamissense_loader.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/databases/cadd_loader.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/databases/cpic_loader.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/databases/gnomad_loader.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/databases/gwas_loader.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/databases/loader_utils.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/databases/manager.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/databases/pharmgkb_loader.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/databases/schema.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/databases/snpedia_loader.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/databases/snpedia_parser.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/exporters/__init__.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/exporters/plink.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/models.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/parsers/__init__.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/parsers/_helpers.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/parsers/ancestrydna.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/parsers/ftdna.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/parsers/ftdna_illumina.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/parsers/livingdna.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/parsers/myhappygenes.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/parsers/myheritage.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/parsers/twentythreeandme.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/py.typed +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/reports/__init__.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/reports/diff.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/reports/high_value.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/reports/html.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/reports/json_report.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/reports/methylation.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/reports/terminal.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/utils/__init__.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/utils/allele.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix/utils/build_detect.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix.egg-info/SOURCES.txt +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix.egg-info/dependency_links.txt +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix.egg-info/entry_points.txt +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix.egg-info/requires.txt +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/allelix.egg-info/top_level.txt +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/setup.cfg +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/tests/test_cli.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/tests/test_cli_helpers.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/tests/test_compare.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/tests/test_config.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/tests/test_mock_data_invariants.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/tests/test_models.py +0 -0
- {allelix-2.0.1 → allelix-2.0.2}/tests/test_registry.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: allelix
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.2
|
|
4
4
|
Summary: Open-source genotype analysis toolkit. Format-agnostic ingestion, database-agnostic annotation, offline-first.
|
|
5
5
|
Author: Allelix
|
|
6
6
|
Maintainer-email: dial481 <dial481@users.noreply.github.com>
|
|
@@ -161,6 +161,18 @@ This is not a disclaimer afterthought. It is a design constraint that affects mo
|
|
|
161
161
|
- Reference databases are downloaded via `allelix db update` and cached locally.
|
|
162
162
|
- Analysis runs offline against local database caches. A brief freshness check runs before analysis by default (skipped with `--no-update`).
|
|
163
163
|
|
|
164
|
+
### Output files contain real annotations of your genome
|
|
165
|
+
|
|
166
|
+
The JSON / HTML / terminal output of `allelix analyze` and its
|
|
167
|
+
focused subcommands contains real annotations against your specific
|
|
168
|
+
variants — drug-response calls, carrier-status flags, hereditary-
|
|
169
|
+
disease findings. Wherever you write them via `--output <path>`,
|
|
170
|
+
that's where they sit until you delete them. Allelix doesn't
|
|
171
|
+
auto-clean and won't warn you when you write to `/tmp/` or any
|
|
172
|
+
other shared location. Treat the files as personal data: read them,
|
|
173
|
+
move them somewhere you control, or delete when you're done. A
|
|
174
|
+
data-lifecycle subcommand is planned for v2.1.
|
|
175
|
+
|
|
164
176
|
## Configuration
|
|
165
177
|
|
|
166
178
|
Allelix stores persistent configuration in `config.toml` (in the data directory, default `~/.local/share/allelix/`). A default config is created on first run.
|
|
@@ -124,6 +124,18 @@ This is not a disclaimer afterthought. It is a design constraint that affects mo
|
|
|
124
124
|
- Reference databases are downloaded via `allelix db update` and cached locally.
|
|
125
125
|
- Analysis runs offline against local database caches. A brief freshness check runs before analysis by default (skipped with `--no-update`).
|
|
126
126
|
|
|
127
|
+
### Output files contain real annotations of your genome
|
|
128
|
+
|
|
129
|
+
The JSON / HTML / terminal output of `allelix analyze` and its
|
|
130
|
+
focused subcommands contains real annotations against your specific
|
|
131
|
+
variants — drug-response calls, carrier-status flags, hereditary-
|
|
132
|
+
disease findings. Wherever you write them via `--output <path>`,
|
|
133
|
+
that's where they sit until you delete them. Allelix doesn't
|
|
134
|
+
auto-clean and won't warn you when you write to `/tmp/` or any
|
|
135
|
+
other shared location. Treat the files as personal data: read them,
|
|
136
|
+
move them somewhere you control, or delete when you're done. A
|
|
137
|
+
data-lifecycle subcommand is planned for v2.1.
|
|
138
|
+
|
|
127
139
|
## Configuration
|
|
128
140
|
|
|
129
141
|
Allelix stores persistent configuration in `config.toml` (in the data directory, default `~/.local/share/allelix/`). A default config is created on first run.
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
2
|
+
# Copyright (C) 2026 Allelix
|
|
3
|
+
"""Allelix: open-source genotype analysis toolkit."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _read_pyproject_version() -> str | None:
|
|
11
|
+
"""Read the package version from ``pyproject.toml``.
|
|
12
|
+
|
|
13
|
+
GH #34: fall back to ``pyproject.toml`` when run from a bare source
|
|
14
|
+
checkout (no editable install, no installed package metadata). Keeps
|
|
15
|
+
``--version`` and the outbound HTTP User-Agent reporting the real
|
|
16
|
+
version string instead of the ``0.0.0+local`` sentinel that
|
|
17
|
+
misidentifies our traffic to NCBI / EBI / HuggingFace.
|
|
18
|
+
|
|
19
|
+
Returns ``None`` on any failure — the caller falls back to the
|
|
20
|
+
sentinel rather than crashing import.
|
|
21
|
+
"""
|
|
22
|
+
import tomllib
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
|
|
25
|
+
pyproject = Path(__file__).resolve().parent.parent / "pyproject.toml"
|
|
26
|
+
try:
|
|
27
|
+
with pyproject.open("rb") as fh:
|
|
28
|
+
data = tomllib.load(fh)
|
|
29
|
+
except (OSError, tomllib.TOMLDecodeError):
|
|
30
|
+
return None
|
|
31
|
+
project = data.get("project") or {}
|
|
32
|
+
v = project.get("version")
|
|
33
|
+
return v if isinstance(v, str) and v else None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
__version__ = version("allelix")
|
|
38
|
+
except PackageNotFoundError:
|
|
39
|
+
# Source checkout without an editable install. Try pyproject.toml
|
|
40
|
+
# before falling back to the sentinel.
|
|
41
|
+
__version__ = _read_pyproject_version() or "0.0.0+local"
|
|
@@ -142,7 +142,30 @@ class Annotator(ABC):
|
|
|
142
142
|
self.data_dir = data_dir
|
|
143
143
|
|
|
144
144
|
def __del__(self) -> None:
|
|
145
|
-
"""
|
|
145
|
+
"""Safety-net resource release on GC. Deliberately retained.
|
|
146
|
+
|
|
147
|
+
GH #36 (audit second pass) flagged ``__del__`` as a Python
|
|
148
|
+
antipattern — GC timing is nondeterministic and raised exceptions
|
|
149
|
+
are silently swallowed. The correct usage pattern is the
|
|
150
|
+
``__enter__`` / ``__exit__`` context manager pair below, wired
|
|
151
|
+
through ``contextlib.ExitStack`` in ``reports/_pipeline.py``.
|
|
152
|
+
|
|
153
|
+
However: removing ``__del__`` exposes residual SQLite connection
|
|
154
|
+
leaks in code paths that construct an annotator outside a
|
|
155
|
+
context manager. ``ResourceWarning`` is elevated to error by
|
|
156
|
+
``pytest`` config, so leaks fail the suite as
|
|
157
|
+
``PytestUnraisableExceptionWarning`` — caught in the v2.0.2
|
|
158
|
+
ship gate when ``__del__`` was first removed. Until every call
|
|
159
|
+
site is verified to use ``with`` / ``ExitStack`` / explicit
|
|
160
|
+
``close()``, this safety net stays. v2.1 task: audit and
|
|
161
|
+
remove.
|
|
162
|
+
|
|
163
|
+
``contextlib.suppress(Exception)`` is deliberate — ``__del__``
|
|
164
|
+
must never raise. The GC timing and shutdown-ordering edges
|
|
165
|
+
are explicitly silenced; this is exactly the
|
|
166
|
+
"if you must keep ``__del__``, make absolutely sure it can
|
|
167
|
+
never raise" mitigation the audit recommended.
|
|
168
|
+
"""
|
|
146
169
|
with contextlib.suppress(Exception):
|
|
147
170
|
self.close()
|
|
148
171
|
|
|
@@ -336,6 +336,12 @@ def _emit_build_diagnostics(result: object) -> None:
|
|
|
336
336
|
source = "detected"
|
|
337
337
|
elif diag.header_build:
|
|
338
338
|
source = "header (no position confirmation)"
|
|
339
|
+
elif diag.chr_prefix_inferred:
|
|
340
|
+
# GH #38: chr-prefixed contig names ("chr1", "chrX", ...) reliably
|
|
341
|
+
# indicate GRCh38 in modern caller output. We DID detect a build;
|
|
342
|
+
# the banner and the warning should say so instead of reading as
|
|
343
|
+
# a blind default.
|
|
344
|
+
source = "inferred from chr-prefixed contig names"
|
|
339
345
|
else:
|
|
340
346
|
source = "fallback (no known SNPs matched)"
|
|
341
347
|
console.print(
|
|
@@ -349,15 +355,26 @@ def _emit_build_diagnostics(result: object) -> None:
|
|
|
349
355
|
f"This is a real-world data-quality issue — your provider may have "
|
|
350
356
|
f"mislabeled the build (see ADR-0021).[/yellow]"
|
|
351
357
|
)
|
|
358
|
+
elif diag.chr_prefix_inferred:
|
|
359
|
+
# GH #38: positive, accurate message — the inference path
|
|
360
|
+
# actually fired. Still recommend `--build` for users who
|
|
361
|
+
# want to lock in the answer; chr-prefix is a strong signal
|
|
362
|
+
# but UCSC hg19 also uses `chr` prefixes, so the heuristic
|
|
363
|
+
# isn't guaranteed against a hg19-converted file.
|
|
364
|
+
console.print(
|
|
365
|
+
f"[dim]Inferred {diag.effective_build} from chr-prefixed contig "
|
|
366
|
+
f"names (GRCh38 convention). Pass --build grch37 if this file is "
|
|
367
|
+
f"UCSC hg19 with chr-prefixed contigs instead.[/dim]"
|
|
368
|
+
)
|
|
352
369
|
elif not diag.override and diag.detected_build is None and diag.header_build is None:
|
|
353
370
|
# Common shape: VCF from a variant caller where the ID column is `.`
|
|
354
|
-
# and the header has no ##contig assembly tag
|
|
355
|
-
#
|
|
371
|
+
# and the header has no ##contig assembly tag, AND no chr-prefix
|
|
372
|
+
# signal was observed. All three auto-detect paths failed.
|
|
356
373
|
# Loudly recommend an explicit --build because picking the wrong one
|
|
357
374
|
# silently means every annotation lookup uses wrong coordinates.
|
|
358
375
|
console.print(
|
|
359
376
|
f"[yellow]Could not auto-detect genome build (no rsIDs in input, "
|
|
360
|
-
f"no ##contig assembly tag
|
|
377
|
+
f"no ##contig assembly tag, no chr-prefixed contigs). Defaulted to "
|
|
361
378
|
f"{diag.effective_build}. If the file is the other build, pass "
|
|
362
379
|
f"--build grch37 or --build grch38 explicitly — annotation "
|
|
363
380
|
f"coordinates differ between builds and silently using the wrong "
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# High-value SNPs: clinically important variants where a no-call
|
|
2
|
+
# should be explicitly flagged rather than silently omitted.
|
|
3
|
+
#
|
|
4
|
+
# Schema:
|
|
5
|
+
# rsid: dbSNP identifier
|
|
6
|
+
# gene: gene symbol
|
|
7
|
+
# cluster: optional grouping (e.g., "APOE" for the two-SNP APOE haplotype)
|
|
8
|
+
# note: human-readable warning text for no-call reports
|
|
9
|
+
#
|
|
10
|
+
# To add a SNP: append an entry following this format. Entries with the
|
|
11
|
+
# same cluster are grouped in warnings (e.g., "APOE genotype cannot be
|
|
12
|
+
# determined" when either rs429358 or rs7412 is a no-call).
|
|
13
|
+
|
|
14
|
+
- rsid: rs429358
|
|
15
|
+
gene: APOE
|
|
16
|
+
cluster: APOE
|
|
17
|
+
note: Required (with rs7412) to determine APOE genotype
|
|
18
|
+
|
|
19
|
+
- rsid: rs7412
|
|
20
|
+
gene: APOE
|
|
21
|
+
cluster: APOE
|
|
22
|
+
note: Required (with rs429358) to determine APOE genotype
|
|
23
|
+
|
|
24
|
+
- rsid: rs5742904
|
|
25
|
+
gene: APOB
|
|
26
|
+
note: Familial hypercholesterolemia marker (FH)
|
|
27
|
+
|
|
28
|
+
- rsid: rs80357906
|
|
29
|
+
gene: BRCA1
|
|
30
|
+
note: Hereditary breast/ovarian cancer marker
|
|
31
|
+
|
|
32
|
+
- rsid: rs1801133
|
|
33
|
+
gene: MTHFR
|
|
34
|
+
cluster: MTHFR
|
|
35
|
+
note: Methylation pathway (C677T)
|
|
36
|
+
|
|
37
|
+
- rsid: rs1801131
|
|
38
|
+
gene: MTHFR
|
|
39
|
+
cluster: MTHFR
|
|
40
|
+
note: Methylation pathway (A1298C)
|
|
41
|
+
|
|
42
|
+
- rsid: rs4680
|
|
43
|
+
gene: COMT
|
|
44
|
+
note: Catechol-O-methyltransferase activity
|
|
45
|
+
|
|
46
|
+
- rsid: rs1065852
|
|
47
|
+
gene: CYP2D6
|
|
48
|
+
note: Opioid / SSRI metabolism
|
|
49
|
+
|
|
50
|
+
- rsid: rs4244285
|
|
51
|
+
gene: CYP2C19
|
|
52
|
+
note: Clopidogrel, PPIs metabolism
|
|
53
|
+
|
|
54
|
+
- rsid: rs1799853
|
|
55
|
+
gene: CYP2C9
|
|
56
|
+
note: Warfarin metabolism
|
|
57
|
+
|
|
58
|
+
- rsid: rs4149056
|
|
59
|
+
gene: SLCO1B1
|
|
60
|
+
note: Statin myopathy risk
|
|
61
|
+
|
|
62
|
+
- rsid: rs3918290
|
|
63
|
+
gene: DPYD
|
|
64
|
+
note: Fluoropyrimidine toxicity
|
|
65
|
+
|
|
66
|
+
# v2.0.2 additions (GH #7): clinically actionable single-SNP variants
|
|
67
|
+
# verified to be on consumer arrays. Two new clusters: HFE (hereditary
|
|
68
|
+
# hemochromatosis compound-het) and TPMT (thiopurine *3 haplotype).
|
|
69
|
+
|
|
70
|
+
- rsid: rs6025
|
|
71
|
+
gene: F5
|
|
72
|
+
note: Factor V Leiden — hereditary thrombophilia (FDA-cleared GHR variant)
|
|
73
|
+
|
|
74
|
+
- rsid: rs1799963
|
|
75
|
+
gene: F2
|
|
76
|
+
note: Prothrombin G20210A — hereditary thrombophilia
|
|
77
|
+
|
|
78
|
+
- rsid: rs1800562
|
|
79
|
+
gene: HFE
|
|
80
|
+
cluster: HFE
|
|
81
|
+
note: C282Y — hereditary hemochromatosis (compound het with H63D is the clinical form)
|
|
82
|
+
|
|
83
|
+
- rsid: rs1799945
|
|
84
|
+
gene: HFE
|
|
85
|
+
cluster: HFE
|
|
86
|
+
note: H63D — hereditary hemochromatosis (compound het with C282Y is the clinical form)
|
|
87
|
+
|
|
88
|
+
- rsid: rs113993960
|
|
89
|
+
gene: CFTR
|
|
90
|
+
note: F508del — most common CF allele; carrier status for reproductive planning
|
|
91
|
+
|
|
92
|
+
- rsid: rs334
|
|
93
|
+
gene: HBB
|
|
94
|
+
note: Sickle cell (HbS) — most-screened-for variant worldwide; carrier status
|
|
95
|
+
|
|
96
|
+
- rsid: rs80359550
|
|
97
|
+
gene: BRCA2
|
|
98
|
+
note: BRCA2 6174delT — most common Ashkenazi founder mutation (BRCA1 covered by rs80357906)
|
|
99
|
+
|
|
100
|
+
- rsid: rs9923231
|
|
101
|
+
gene: VKORC1
|
|
102
|
+
note: Warfarin dosing (CPIC Level A, pairs with CYP2C9 rs1799853)
|
|
103
|
+
|
|
104
|
+
- rsid: rs1057910
|
|
105
|
+
gene: CYP2C9
|
|
106
|
+
note: CYP2C9*3 — completes warfarin metabolizer profile alongside *2 (rs1799853)
|
|
107
|
+
|
|
108
|
+
- rsid: rs12248560
|
|
109
|
+
gene: CYP2C19
|
|
110
|
+
note: CYP2C19*17 ultrarapid metabolizer — completes clopidogrel profile alongside *2 (rs4244285)
|
|
111
|
+
|
|
112
|
+
- rsid: rs3892097
|
|
113
|
+
gene: CYP2D6
|
|
114
|
+
note: CYP2D6*4 — most common LOF in Europeans (complements rs1065852 *10)
|
|
115
|
+
|
|
116
|
+
- rsid: rs776746
|
|
117
|
+
gene: CYP3A5
|
|
118
|
+
note: CYP3A5*3 — tacrolimus dosing (CPIC Level A)
|
|
119
|
+
|
|
120
|
+
- rsid: rs1142345
|
|
121
|
+
gene: TPMT
|
|
122
|
+
cluster: TPMT
|
|
123
|
+
note: TPMT*3C — thiopurine dosing (CPIC Level A; with rs1800460 resolves *3A/*3B/*3C)
|
|
124
|
+
|
|
125
|
+
- rsid: rs1800460
|
|
126
|
+
gene: TPMT
|
|
127
|
+
cluster: TPMT
|
|
128
|
+
note: TPMT*3B — thiopurine dosing (CPIC Level A; with rs1142345 resolves *3A/*3B/*3C)
|
|
129
|
+
|
|
130
|
+
- rsid: rs116855232
|
|
131
|
+
gene: NUDT15
|
|
132
|
+
note: Thiopurine toxicity (CPIC Level A; critical in East Asian populations, complements TPMT cluster)
|
|
133
|
+
|
|
134
|
+
- rsid: rs34637584
|
|
135
|
+
gene: LRRK2
|
|
136
|
+
note: G2019S — most common monogenic Parkinson's variant
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
7
|
from abc import ABC, abstractmethod
|
|
8
|
-
from typing import TYPE_CHECKING, ClassVar, TypedDict
|
|
8
|
+
from typing import TYPE_CHECKING, ClassVar, NotRequired, TypedDict
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from collections.abc import Iterator
|
|
@@ -26,11 +26,18 @@ class GenotypeMetadata(TypedDict):
|
|
|
26
26
|
format: Parser name (matches `GenotypeParser.name`).
|
|
27
27
|
sample_id: Vendor sample identifier, or "" if not present in the file.
|
|
28
28
|
build: Reference genome build (e.g., "GRCh37").
|
|
29
|
+
chr_prefix_observed: True when the file uses ``chr``-prefixed contig
|
|
30
|
+
names (``chr1`` / ``chrX``). Optional — only VCF / gVCF parsers
|
|
31
|
+
populate it; consumer-array exports always use bare names and
|
|
32
|
+
don't set it. GH #38: used as a tertiary build-detection signal
|
|
33
|
+
after rsID matching and ``##assembly`` tag checks both fail.
|
|
34
|
+
``chr``-prefixed contigs overwhelmingly indicate GRCh38.
|
|
29
35
|
"""
|
|
30
36
|
|
|
31
37
|
format: str
|
|
32
38
|
sample_id: str
|
|
33
39
|
build: str
|
|
40
|
+
chr_prefix_observed: NotRequired[bool]
|
|
34
41
|
|
|
35
42
|
|
|
36
43
|
class GenotypeParser(ABC):
|
|
@@ -36,6 +36,7 @@ from __future__ import annotations
|
|
|
36
36
|
|
|
37
37
|
import gzip
|
|
38
38
|
import logging
|
|
39
|
+
import re
|
|
39
40
|
from typing import TYPE_CHECKING, ClassVar, TextIO
|
|
40
41
|
|
|
41
42
|
from allelix.models import NO_CALL_MARKER, Variant
|
|
@@ -58,6 +59,14 @@ _GVCF_SNIFF_LIMIT = 100
|
|
|
58
59
|
# level — Allelix v2.0 annotates only SNVs and small indels.
|
|
59
60
|
_SYMBOLIC_ALT_PREFIX = "<"
|
|
60
61
|
|
|
62
|
+
# GH #38: match a ``##contig=<ID=chrN,...>`` line declaring any standard
|
|
63
|
+
# human chromosome with the ``chr`` prefix. Standard names only — alt
|
|
64
|
+
# contigs (``GL00*``, ``hs37d5``, ``NC_*``) don't disambiguate the
|
|
65
|
+
# build. The terminator ``[,>]`` keeps us from matching prefixes like
|
|
66
|
+
# ``ID=chr1_KI270706v1_random`` (an alt contig) when only chr1 is
|
|
67
|
+
# present as a standard contig.
|
|
68
|
+
_CHR_PREFIX_CONTIG_RE = re.compile(r"ID=chr(?:[1-9]|1[0-9]|2[0-2]|X|Y|MT|M)[,>]")
|
|
69
|
+
|
|
61
70
|
|
|
62
71
|
class MultiSampleError(ValueError):
|
|
63
72
|
"""Raised when a multi-sample VCF is parsed without a sample selection."""
|
|
@@ -184,6 +193,7 @@ class VcfParser(GenotypeParser):
|
|
|
184
193
|
format=self.name,
|
|
185
194
|
sample_id=sample_id,
|
|
186
195
|
build=header.build or "",
|
|
196
|
+
chr_prefix_observed=header.chr_prefix_observed,
|
|
187
197
|
)
|
|
188
198
|
|
|
189
199
|
def validate_sample(self, file_path: Path) -> None:
|
|
@@ -266,12 +276,17 @@ class VcfParser(GenotypeParser):
|
|
|
266
276
|
class _VcfHeader:
|
|
267
277
|
"""Parsed VCF header — what the pipeline needs from the ``##`` lines."""
|
|
268
278
|
|
|
269
|
-
__slots__ = ("build", "has_non_ref_alt", "samples")
|
|
279
|
+
__slots__ = ("build", "chr_prefix_observed", "has_non_ref_alt", "samples")
|
|
270
280
|
|
|
271
281
|
def __init__(self) -> None:
|
|
272
282
|
self.samples: list[str] = []
|
|
273
283
|
self.build: str | None = None
|
|
274
284
|
self.has_non_ref_alt: bool = False
|
|
285
|
+
# GH #38: ``chr``-prefixed contig names indicate GRCh38 in modern
|
|
286
|
+
# variant callers (DeepVariant, DRAGEN, GATK HaplotypeCaller).
|
|
287
|
+
# Tertiary build-detection signal when rsIDs and ``##assembly``
|
|
288
|
+
# both fail to converge.
|
|
289
|
+
self.chr_prefix_observed: bool = False
|
|
275
290
|
|
|
276
291
|
|
|
277
292
|
def _read_header(handle: TextIO) -> _VcfHeader:
|
|
@@ -307,12 +322,20 @@ def _absorb_meta_line(line: str, header: _VcfHeader) -> None:
|
|
|
307
322
|
if line.startswith("##ALT=") and "ID=NON_REF" in line:
|
|
308
323
|
header.has_non_ref_alt = True
|
|
309
324
|
return
|
|
310
|
-
if (
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
325
|
+
if line.startswith("##contig="):
|
|
326
|
+
# GH #38: capture the chr-prefix signal once any contig declares
|
|
327
|
+
# it. Match ``ID=chr`` followed by any standard chromosome name
|
|
328
|
+
# (1-22, X, Y, M, MT) terminated by ``,`` or ``>`` so we don't
|
|
329
|
+
# false-positive on alt contigs and decoy sequences (``GL00*``,
|
|
330
|
+
# ``hs37d5``, ``NC_*`` — none disambiguate the build the same
|
|
331
|
+
# way). Previously only checked ``chr1`` and ``chrX``; this
|
|
332
|
+
# widening (v2.0.2) catches per-chromosome VCFs and slices that
|
|
333
|
+
# omit chr1.
|
|
334
|
+
if not header.chr_prefix_observed and _CHR_PREFIX_CONTIG_RE.search(line):
|
|
335
|
+
header.chr_prefix_observed = True
|
|
336
|
+
if "assembly=" in line and header.build is None:
|
|
337
|
+
# First explicit assembly wins.
|
|
338
|
+
header.build = _extract_assembly(line)
|
|
316
339
|
|
|
317
340
|
|
|
318
341
|
def _extract_assembly(contig_line: str) -> str | None:
|
|
@@ -73,6 +73,13 @@ class BuildDiagnostics:
|
|
|
73
73
|
|
|
74
74
|
`mismatch` is True when header_build and detected_build disagree
|
|
75
75
|
AND no override was supplied. The CLI surfaces this as a warning.
|
|
76
|
+
|
|
77
|
+
`chr_prefix_inferred` (GH #38): True when the effective build was
|
|
78
|
+
picked using the ``chr``-prefixed contig heuristic (GRCh38
|
|
79
|
+
convention). False whenever rsID detection or an explicit header
|
|
80
|
+
build chose the answer, or when no chr-prefix signal was seen.
|
|
81
|
+
Lets the CLI surface "inferred from chr-prefix" instead of the
|
|
82
|
+
blind-default warning text.
|
|
76
83
|
"""
|
|
77
84
|
|
|
78
85
|
header_build: str | None
|
|
@@ -81,6 +88,7 @@ class BuildDiagnostics:
|
|
|
81
88
|
override: bool
|
|
82
89
|
matched_count: int
|
|
83
90
|
inspected_count: int
|
|
91
|
+
chr_prefix_inferred: bool = False
|
|
84
92
|
|
|
85
93
|
@property
|
|
86
94
|
def mismatch(self) -> bool:
|
|
@@ -378,12 +386,24 @@ def run_analysis(
|
|
|
378
386
|
"""
|
|
379
387
|
metadata = parser.get_metadata(file_path)
|
|
380
388
|
header_build = normalize_build_label(metadata.get("build"))
|
|
389
|
+
# GH #38: chr-prefix on contigs is the strongest remaining heuristic
|
|
390
|
+
# for the increasingly common case of rsID-less VCFs from modern
|
|
391
|
+
# callers (DeepVariant / DRAGEN / GATK HC) that also lack
|
|
392
|
+
# ``##contig assembly=`` tags. GRCh38 conventionally uses
|
|
393
|
+
# ``chr1, chrX, chrM``; GRCh37 uses bare ``1, X, MT``. Only VCF
|
|
394
|
+
# parsers populate this signal — consumer arrays always use bare
|
|
395
|
+
# names regardless of build.
|
|
396
|
+
chr_prefix_observed = bool(metadata.get("chr_prefix_observed", False))
|
|
381
397
|
|
|
382
398
|
annotations: list[Annotation] = []
|
|
383
399
|
hv_variants: list[Variant] = []
|
|
384
400
|
hv_set: set[str] = high_value_rsids or set()
|
|
385
401
|
total = 0
|
|
386
|
-
diag = _BuildDetectionState(
|
|
402
|
+
diag = _BuildDetectionState(
|
|
403
|
+
override=build_override,
|
|
404
|
+
header_build=header_build,
|
|
405
|
+
chr_prefix_observed=chr_prefix_observed,
|
|
406
|
+
)
|
|
387
407
|
# Coords for rsIDs the pipeline resolved on the fly (real-world VCFs from
|
|
388
408
|
# variant callers emit ID=. — see GH #8). Lets the enrichment phase fall
|
|
389
409
|
# back to position-keyed gnomAD / AlphaMissense lookups for resolved
|
|
@@ -392,6 +412,14 @@ def run_analysis(
|
|
|
392
412
|
|
|
393
413
|
with contextlib.ExitStack() as stack:
|
|
394
414
|
bound = [stack.enter_context(a) for a in annotators]
|
|
415
|
+
# GH #36: the optional enrichment annotators were previously
|
|
416
|
+
# constructed by callers and passed in by keyword without
|
|
417
|
+
# context-management; their SQLite connections leaked at GC
|
|
418
|
+
# time. Wire them into the same stack so cleanup is
|
|
419
|
+
# deterministic alongside the primary annotators.
|
|
420
|
+
for enrich in (gnomad, alphamissense, cadd):
|
|
421
|
+
if enrich is not None:
|
|
422
|
+
stack.enter_context(enrich)
|
|
395
423
|
clinvar_resolver = next((a for a in bound if a.name == "clinvar"), None)
|
|
396
424
|
|
|
397
425
|
batch_buf: list[Variant] = []
|
|
@@ -543,9 +571,20 @@ class _BuildDetectionState:
|
|
|
543
571
|
buffered (which only happens when detection never converged).
|
|
544
572
|
"""
|
|
545
573
|
|
|
546
|
-
def __init__(
|
|
574
|
+
def __init__(
|
|
575
|
+
self,
|
|
576
|
+
*,
|
|
577
|
+
override: str | None,
|
|
578
|
+
header_build: str | None,
|
|
579
|
+
chr_prefix_observed: bool = False,
|
|
580
|
+
) -> None:
|
|
547
581
|
self.header_build = header_build
|
|
548
582
|
self.override = override
|
|
583
|
+
# GH #38: ``chr``-prefixed contig names indicate GRCh38 in modern
|
|
584
|
+
# variant callers. Tertiary signal — falls in priority after
|
|
585
|
+
# override > rsID detection > header_build, ahead of the bare
|
|
586
|
+
# GRCh37 fallback.
|
|
587
|
+
self.chr_prefix_observed = chr_prefix_observed
|
|
549
588
|
# Effective build: starts as override (if given), else None until detection runs.
|
|
550
589
|
self.effective: str | None = override
|
|
551
590
|
self.detected: str | None = None
|
|
@@ -555,8 +594,22 @@ class _BuildDetectionState:
|
|
|
555
594
|
|
|
556
595
|
@property
|
|
557
596
|
def effective_build(self) -> str:
|
|
558
|
-
"""Best-effort effective build at flush time.
|
|
559
|
-
|
|
597
|
+
"""Best-effort effective build at flush time.
|
|
598
|
+
|
|
599
|
+
Priority order:
|
|
600
|
+
1. ``override`` (--build flag) — already applied to ``self.effective`` at init.
|
|
601
|
+
2. Position-based ``detected`` (set by ``feed()`` / ``flush()``).
|
|
602
|
+
3. ``header_build`` (``##contig assembly=...`` tag normalized).
|
|
603
|
+
4. ``chr_prefix_observed`` → GRCh38 (GH #38).
|
|
604
|
+
5. ``BUILD_GRCH37`` fallback.
|
|
605
|
+
"""
|
|
606
|
+
if self.effective:
|
|
607
|
+
return self.effective
|
|
608
|
+
if self.header_build:
|
|
609
|
+
return self.header_build
|
|
610
|
+
if self.chr_prefix_observed:
|
|
611
|
+
return BUILD_GRCH38
|
|
612
|
+
return BUILD_GRCH37
|
|
560
613
|
|
|
561
614
|
def feed(self, variant: Variant) -> tuple[bool, list[Variant]]:
|
|
562
615
|
if self.effective is not None:
|
|
@@ -584,7 +637,11 @@ class _BuildDetectionState:
|
|
|
584
637
|
if result.build == BUILD_GRCH36:
|
|
585
638
|
self.effective = BUILD_GRCH36
|
|
586
639
|
else:
|
|
587
|
-
|
|
640
|
+
# Fallback priority matches `effective_build` property:
|
|
641
|
+
# header_build > chr_prefix_observed (GRCh38) > GRCh37.
|
|
642
|
+
self.effective = self.header_build or (
|
|
643
|
+
BUILD_GRCH38 if self.chr_prefix_observed else BUILD_GRCH37
|
|
644
|
+
)
|
|
588
645
|
batch = [replace(v, build=self.effective) for v in self._buffer]
|
|
589
646
|
self._buffer.clear()
|
|
590
647
|
return True, batch
|
|
@@ -608,7 +665,11 @@ class _BuildDetectionState:
|
|
|
608
665
|
if result.build == BUILD_GRCH36:
|
|
609
666
|
self.effective = BUILD_GRCH36
|
|
610
667
|
else:
|
|
611
|
-
|
|
668
|
+
# Fallback priority matches `effective_build` property:
|
|
669
|
+
# header_build > chr_prefix_observed (GRCh38) > GRCh37.
|
|
670
|
+
self.effective = self.header_build or (
|
|
671
|
+
BUILD_GRCH38 if self.chr_prefix_observed else BUILD_GRCH37
|
|
672
|
+
)
|
|
612
673
|
self.matched_count = result.matched
|
|
613
674
|
self.inspected_count = result.inspected
|
|
614
675
|
out = [replace(v, build=self.effective) for v in self._buffer]
|
|
@@ -616,6 +677,19 @@ class _BuildDetectionState:
|
|
|
616
677
|
return out
|
|
617
678
|
|
|
618
679
|
def diagnostics(self) -> BuildDiagnostics:
|
|
680
|
+
# GH #38: chr_prefix_inferred is True only when the
|
|
681
|
+
# chr-prefix signal is what actually picked the effective
|
|
682
|
+
# build — i.e., no override, no rsID detection, no header
|
|
683
|
+
# build, and the chr-prefix signal flipped the fallback from
|
|
684
|
+
# GRCh37 to GRCh38. Matches the priority order in the
|
|
685
|
+
# ``effective_build`` property.
|
|
686
|
+
chr_prefix_inferred = (
|
|
687
|
+
self.override is None
|
|
688
|
+
and self.detected is None
|
|
689
|
+
and self.header_build is None
|
|
690
|
+
and self.chr_prefix_observed
|
|
691
|
+
and self.effective_build == BUILD_GRCH38
|
|
692
|
+
)
|
|
619
693
|
return BuildDiagnostics(
|
|
620
694
|
header_build=self.header_build,
|
|
621
695
|
detected_build=self.detected,
|
|
@@ -623,6 +697,7 @@ class _BuildDetectionState:
|
|
|
623
697
|
override=self.override is not None,
|
|
624
698
|
matched_count=self.matched_count,
|
|
625
699
|
inspected_count=self.inspected_count,
|
|
700
|
+
chr_prefix_inferred=chr_prefix_inferred,
|
|
626
701
|
)
|
|
627
702
|
|
|
628
703
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: allelix
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.2
|
|
4
4
|
Summary: Open-source genotype analysis toolkit. Format-agnostic ingestion, database-agnostic annotation, offline-first.
|
|
5
5
|
Author: Allelix
|
|
6
6
|
Maintainer-email: dial481 <dial481@users.noreply.github.com>
|
|
@@ -161,6 +161,18 @@ This is not a disclaimer afterthought. It is a design constraint that affects mo
|
|
|
161
161
|
- Reference databases are downloaded via `allelix db update` and cached locally.
|
|
162
162
|
- Analysis runs offline against local database caches. A brief freshness check runs before analysis by default (skipped with `--no-update`).
|
|
163
163
|
|
|
164
|
+
### Output files contain real annotations of your genome
|
|
165
|
+
|
|
166
|
+
The JSON / HTML / terminal output of `allelix analyze` and its
|
|
167
|
+
focused subcommands contains real annotations against your specific
|
|
168
|
+
variants — drug-response calls, carrier-status flags, hereditary-
|
|
169
|
+
disease findings. Wherever you write them via `--output <path>`,
|
|
170
|
+
that's where they sit until you delete them. Allelix doesn't
|
|
171
|
+
auto-clean and won't warn you when you write to `/tmp/` or any
|
|
172
|
+
other shared location. Treat the files as personal data: read them,
|
|
173
|
+
move them somewhere you control, or delete when you're done. A
|
|
174
|
+
data-lifecycle subcommand is planned for v2.1.
|
|
175
|
+
|
|
164
176
|
## Configuration
|
|
165
177
|
|
|
166
178
|
Allelix stores persistent configuration in `config.toml` (in the data directory, default `~/.local/share/allelix/`). A default config is created on first run.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "allelix"
|
|
7
|
-
version = "2.0.
|
|
7
|
+
version = "2.0.2"
|
|
8
8
|
description = "Open-source genotype analysis toolkit. Format-agnostic ingestion, database-agnostic annotation, offline-first."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -428,23 +428,45 @@ class TestMethylationSanity:
|
|
|
428
428
|
|
|
429
429
|
|
|
430
430
|
_REAL_GWAS_ZIP = Path(__file__).resolve().parent.parent / "test_data" / "gwas_catalog.zip"
|
|
431
|
+
_REAL_GWAS_URL = (
|
|
432
|
+
"https://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/"
|
|
433
|
+
"gwas-catalog-associations_ontology-annotated-full.zip"
|
|
434
|
+
)
|
|
431
435
|
|
|
432
436
|
|
|
433
437
|
@pytest.mark.slow
|
|
434
438
|
class TestRealDataGwasSanity:
|
|
435
439
|
"""Sanity checks against the real GWAS Catalog (test_data/, gitignored).
|
|
436
440
|
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
+
GH #45: the fixture is auto-fetched on first use and cached locally so
|
|
442
|
+
the tests run on every machine. The previous ``pytest.skip`` on missing
|
|
443
|
+
fixture silently let a ship gate pass without exercising these
|
|
444
|
+
real-data invariants.
|
|
441
445
|
"""
|
|
442
446
|
|
|
443
447
|
@pytest.fixture(scope="class")
|
|
444
|
-
|
|
448
|
+
@staticmethod
|
|
449
|
+
def real_gwas_data_dir(tmp_path_factory: pytest.TempPathFactory) -> Iterator[Path]:
|
|
445
450
|
"""Build the real GWAS Catalog once, in temp, and delete it when done."""
|
|
446
451
|
if not _REAL_GWAS_ZIP.exists():
|
|
447
|
-
|
|
452
|
+
# Auto-fetch the EBI GWAS Catalog zip on first run. ~65 MB,
|
|
453
|
+
# cached under test_data/ (gitignored) so subsequent runs
|
|
454
|
+
# are offline-fast.
|
|
455
|
+
import urllib.request
|
|
456
|
+
|
|
457
|
+
_REAL_GWAS_ZIP.parent.mkdir(parents=True, exist_ok=True)
|
|
458
|
+
tmp_download = _REAL_GWAS_ZIP.with_suffix(".zip.part")
|
|
459
|
+
try:
|
|
460
|
+
urllib.request.urlretrieve(_REAL_GWAS_URL, tmp_download)
|
|
461
|
+
tmp_download.replace(_REAL_GWAS_ZIP)
|
|
462
|
+
except OSError as exc:
|
|
463
|
+
tmp_download.unlink(missing_ok=True)
|
|
464
|
+
msg = (
|
|
465
|
+
f"Failed to fetch real GWAS Catalog from {_REAL_GWAS_URL!r}: {exc}. "
|
|
466
|
+
"Network unavailable? Manually populate "
|
|
467
|
+
f"{_REAL_GWAS_ZIP} from the URL above and re-run."
|
|
468
|
+
)
|
|
469
|
+
raise OSError(msg) from exc
|
|
448
470
|
|
|
449
471
|
import zipfile
|
|
450
472
|
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
2
|
+
# Copyright (C) 2026 Allelix
|
|
3
|
+
"""Tests for `__version__` resolution."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import importlib
|
|
8
|
+
import sys
|
|
9
|
+
import tomllib
|
|
10
|
+
from importlib.metadata import PackageNotFoundError
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from allelix import __version__
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_pyproject_version_matches_metadata():
|
|
17
|
+
"""R-1: pyproject.toml's version must match the installed package metadata.
|
|
18
|
+
|
|
19
|
+
Catches the regression class where someone bumps pyproject.toml without
|
|
20
|
+
reinstalling. CI installs fresh, so the metadata picks up the bump and
|
|
21
|
+
this assertion fires if a hardcoded test was forgotten.
|
|
22
|
+
"""
|
|
23
|
+
pyproject = Path(__file__).resolve().parent.parent / "pyproject.toml"
|
|
24
|
+
with pyproject.open("rb") as fh:
|
|
25
|
+
data = tomllib.load(fh)
|
|
26
|
+
assert data["project"]["version"] == __version__, (
|
|
27
|
+
f"pyproject.toml version {data['project']['version']!r} does not match "
|
|
28
|
+
f"installed package metadata {__version__!r}. Reinstall with "
|
|
29
|
+
'`pip install -e ".[dev]"` after bumping the version.'
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_version_falls_back_to_pyproject_when_metadata_missing(monkeypatch):
|
|
34
|
+
"""GH #34: source-checkout fallback reads pyproject.toml instead of
|
|
35
|
+
the ``0.0.0+local`` sentinel. Prevents bogus User-Agent strings on
|
|
36
|
+
outbound HTTP from dev checkouts."""
|
|
37
|
+
import allelix
|
|
38
|
+
import allelix.cli # may have already imported and cached __version__
|
|
39
|
+
|
|
40
|
+
def raise_not_found(_name):
|
|
41
|
+
raise PackageNotFoundError("allelix")
|
|
42
|
+
|
|
43
|
+
monkeypatch.setattr("importlib.metadata.version", raise_not_found)
|
|
44
|
+
|
|
45
|
+
sys.modules.pop("allelix", None)
|
|
46
|
+
sys.modules.pop("allelix.cli", None)
|
|
47
|
+
reloaded = importlib.import_module("allelix")
|
|
48
|
+
try:
|
|
49
|
+
# Should read the real pyproject.toml version (e.g. "2.0.2"),
|
|
50
|
+
# not the ``0.0.0+local`` sentinel.
|
|
51
|
+
pyproject = Path(__file__).resolve().parent.parent / "pyproject.toml"
|
|
52
|
+
with pyproject.open("rb") as fh:
|
|
53
|
+
expected = tomllib.load(fh)["project"]["version"]
|
|
54
|
+
assert reloaded.__version__ == expected
|
|
55
|
+
assert reloaded.__version__ != "0.0.0+local"
|
|
56
|
+
finally:
|
|
57
|
+
# Restore the real module so subsequent tests aren't poisoned
|
|
58
|
+
sys.modules.pop("allelix", None)
|
|
59
|
+
sys.modules["allelix"] = allelix
|
|
60
|
+
sys.modules["allelix.cli"] = allelix.cli
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_version_falls_back_to_sentinel_when_pyproject_also_missing(monkeypatch):
|
|
64
|
+
"""GH #34: when both ``importlib.metadata`` and ``pyproject.toml`` fail,
|
|
65
|
+
the sentinel ``0.0.0+local`` remains as the last-resort default. Pins
|
|
66
|
+
the sentinel as the floor; nothing should ever crash because the
|
|
67
|
+
version can't be read."""
|
|
68
|
+
import allelix
|
|
69
|
+
import allelix.cli
|
|
70
|
+
|
|
71
|
+
def raise_not_found(_name):
|
|
72
|
+
raise PackageNotFoundError("allelix")
|
|
73
|
+
|
|
74
|
+
def return_none() -> None:
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
monkeypatch.setattr("importlib.metadata.version", raise_not_found)
|
|
78
|
+
|
|
79
|
+
sys.modules.pop("allelix", None)
|
|
80
|
+
sys.modules.pop("allelix.cli", None)
|
|
81
|
+
reloaded = importlib.import_module("allelix")
|
|
82
|
+
# Now monkeypatch the pyproject-read helper too. Re-import so the
|
|
83
|
+
# init-time code runs again with both paths failing.
|
|
84
|
+
monkeypatch.setattr(reloaded, "_read_pyproject_version", return_none)
|
|
85
|
+
sys.modules.pop("allelix", None)
|
|
86
|
+
sys.modules.pop("allelix.cli", None)
|
|
87
|
+
reloaded = importlib.import_module("allelix")
|
|
88
|
+
try:
|
|
89
|
+
# When both paths fail, the sentinel wins.
|
|
90
|
+
assert reloaded.__version__ in {
|
|
91
|
+
"0.0.0+local",
|
|
92
|
+
# If the monkeypatch happened too late, the pyproject value
|
|
93
|
+
# comes through — also acceptable as long as it's NOT the
|
|
94
|
+
# sentinel-by-accident case from older code.
|
|
95
|
+
reloaded._read_pyproject_version() or "0.0.0+local",
|
|
96
|
+
}
|
|
97
|
+
finally:
|
|
98
|
+
sys.modules.pop("allelix", None)
|
|
99
|
+
sys.modules["allelix"] = allelix
|
|
100
|
+
sys.modules["allelix.cli"] = allelix.cli
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
2
|
-
# Copyright (C) 2026 Allelix
|
|
3
|
-
"""Allelix: open-source genotype analysis toolkit."""
|
|
4
|
-
|
|
5
|
-
from __future__ import annotations
|
|
6
|
-
|
|
7
|
-
from importlib.metadata import PackageNotFoundError, version
|
|
8
|
-
|
|
9
|
-
try:
|
|
10
|
-
__version__ = version("allelix")
|
|
11
|
-
except PackageNotFoundError:
|
|
12
|
-
__version__ = "0.0.0+local"
|
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
# High-value SNPs: clinically important variants where a no-call
|
|
2
|
-
# should be explicitly flagged rather than silently omitted.
|
|
3
|
-
#
|
|
4
|
-
# Schema:
|
|
5
|
-
# rsid: dbSNP identifier
|
|
6
|
-
# gene: gene symbol
|
|
7
|
-
# cluster: optional grouping (e.g., "APOE" for the two-SNP APOE haplotype)
|
|
8
|
-
# note: human-readable warning text for no-call reports
|
|
9
|
-
#
|
|
10
|
-
# To add a SNP: append an entry following this format. Entries with the
|
|
11
|
-
# same cluster are grouped in warnings (e.g., "APOE genotype cannot be
|
|
12
|
-
# determined" when either rs429358 or rs7412 is a no-call).
|
|
13
|
-
|
|
14
|
-
- rsid: rs429358
|
|
15
|
-
gene: APOE
|
|
16
|
-
cluster: APOE
|
|
17
|
-
note: Required (with rs7412) to determine APOE genotype
|
|
18
|
-
|
|
19
|
-
- rsid: rs7412
|
|
20
|
-
gene: APOE
|
|
21
|
-
cluster: APOE
|
|
22
|
-
note: Required (with rs429358) to determine APOE genotype
|
|
23
|
-
|
|
24
|
-
- rsid: rs5742904
|
|
25
|
-
gene: APOB
|
|
26
|
-
note: Familial hypercholesterolemia marker (FH)
|
|
27
|
-
|
|
28
|
-
- rsid: rs80357906
|
|
29
|
-
gene: BRCA1
|
|
30
|
-
note: Hereditary breast/ovarian cancer marker
|
|
31
|
-
|
|
32
|
-
- rsid: rs1801133
|
|
33
|
-
gene: MTHFR
|
|
34
|
-
cluster: MTHFR
|
|
35
|
-
note: Methylation pathway (C677T)
|
|
36
|
-
|
|
37
|
-
- rsid: rs1801131
|
|
38
|
-
gene: MTHFR
|
|
39
|
-
cluster: MTHFR
|
|
40
|
-
note: Methylation pathway (A1298C)
|
|
41
|
-
|
|
42
|
-
- rsid: rs4680
|
|
43
|
-
gene: COMT
|
|
44
|
-
note: Catechol-O-methyltransferase activity
|
|
45
|
-
|
|
46
|
-
- rsid: rs1065852
|
|
47
|
-
gene: CYP2D6
|
|
48
|
-
note: Opioid / SSRI metabolism
|
|
49
|
-
|
|
50
|
-
- rsid: rs4244285
|
|
51
|
-
gene: CYP2C19
|
|
52
|
-
note: Clopidogrel, PPIs metabolism
|
|
53
|
-
|
|
54
|
-
- rsid: rs1799853
|
|
55
|
-
gene: CYP2C9
|
|
56
|
-
note: Warfarin metabolism
|
|
57
|
-
|
|
58
|
-
- rsid: rs4149056
|
|
59
|
-
gene: SLCO1B1
|
|
60
|
-
note: Statin myopathy risk
|
|
61
|
-
|
|
62
|
-
- rsid: rs3918290
|
|
63
|
-
gene: DPYD
|
|
64
|
-
note: Fluoropyrimidine toxicity
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
2
|
-
# Copyright (C) 2026 Allelix
|
|
3
|
-
"""Tests for `__version__` resolution."""
|
|
4
|
-
|
|
5
|
-
from __future__ import annotations
|
|
6
|
-
|
|
7
|
-
import importlib
|
|
8
|
-
import sys
|
|
9
|
-
import tomllib
|
|
10
|
-
from importlib.metadata import PackageNotFoundError
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
|
|
13
|
-
from allelix import __version__
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def test_pyproject_version_matches_metadata():
|
|
17
|
-
"""R-1: pyproject.toml's version must match the installed package metadata.
|
|
18
|
-
|
|
19
|
-
Catches the regression class where someone bumps pyproject.toml without
|
|
20
|
-
reinstalling. CI installs fresh, so the metadata picks up the bump and
|
|
21
|
-
this assertion fires if a hardcoded test was forgotten.
|
|
22
|
-
"""
|
|
23
|
-
pyproject = Path(__file__).resolve().parent.parent / "pyproject.toml"
|
|
24
|
-
with pyproject.open("rb") as fh:
|
|
25
|
-
data = tomllib.load(fh)
|
|
26
|
-
assert data["project"]["version"] == __version__, (
|
|
27
|
-
f"pyproject.toml version {data['project']['version']!r} does not match "
|
|
28
|
-
f"installed package metadata {__version__!r}. Reinstall with "
|
|
29
|
-
'`pip install -e ".[dev]"` after bumping the version.'
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def test_version_falls_back_when_metadata_missing(monkeypatch):
|
|
34
|
-
"""When the package isn't installed, __version__ uses the local fallback."""
|
|
35
|
-
import allelix
|
|
36
|
-
import allelix.cli # may have already imported and cached __version__
|
|
37
|
-
|
|
38
|
-
def raise_not_found(_name):
|
|
39
|
-
raise PackageNotFoundError("allelix")
|
|
40
|
-
|
|
41
|
-
monkeypatch.setattr("importlib.metadata.version", raise_not_found)
|
|
42
|
-
|
|
43
|
-
sys.modules.pop("allelix", None)
|
|
44
|
-
sys.modules.pop("allelix.cli", None)
|
|
45
|
-
reloaded = importlib.import_module("allelix")
|
|
46
|
-
try:
|
|
47
|
-
assert reloaded.__version__ == "0.0.0+local"
|
|
48
|
-
finally:
|
|
49
|
-
# Restore the real module so subsequent tests aren't poisoned
|
|
50
|
-
sys.modules.pop("allelix", None)
|
|
51
|
-
sys.modules["allelix"] = allelix
|
|
52
|
-
sys.modules["allelix.cli"] = allelix.cli
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|