allelix 1.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. allelix/__init__.py +12 -0
  2. allelix/annotators/__init__.py +90 -0
  3. allelix/annotators/alphamissense.py +228 -0
  4. allelix/annotators/base.py +214 -0
  5. allelix/annotators/cadd.py +283 -0
  6. allelix/annotators/clinvar.py +404 -0
  7. allelix/annotators/gnomad.py +212 -0
  8. allelix/annotators/gwas.py +354 -0
  9. allelix/annotators/pharmgkb.py +406 -0
  10. allelix/annotators/snpedia.py +276 -0
  11. allelix/cli.py +1524 -0
  12. allelix/compare.py +149 -0
  13. allelix/config.py +143 -0
  14. allelix/data/__init__.py +3 -0
  15. allelix/data/high_value_snps.yaml +64 -0
  16. allelix/databases/__init__.py +30 -0
  17. allelix/databases/_versions.py +16 -0
  18. allelix/databases/alphamissense_loader.py +48 -0
  19. allelix/databases/cadd_loader.py +49 -0
  20. allelix/databases/cpic_loader.py +234 -0
  21. allelix/databases/gnomad_loader.py +49 -0
  22. allelix/databases/gwas_loader.py +546 -0
  23. allelix/databases/loader_utils.py +80 -0
  24. allelix/databases/manager.py +515 -0
  25. allelix/databases/pharmgkb_loader.py +437 -0
  26. allelix/databases/schema.py +165 -0
  27. allelix/databases/snpedia_loader.py +44 -0
  28. allelix/databases/snpedia_parser.py +342 -0
  29. allelix/exporters/__init__.py +3 -0
  30. allelix/exporters/plink.py +144 -0
  31. allelix/models.py +117 -0
  32. allelix/parsers/__init__.py +73 -0
  33. allelix/parsers/_helpers.py +41 -0
  34. allelix/parsers/ancestrydna.py +130 -0
  35. allelix/parsers/base.py +97 -0
  36. allelix/parsers/ftdna.py +129 -0
  37. allelix/parsers/livingdna.py +121 -0
  38. allelix/parsers/myhappygenes.py +135 -0
  39. allelix/parsers/myheritage.py +118 -0
  40. allelix/parsers/twentythreeandme.py +150 -0
  41. allelix/py.typed +0 -0
  42. allelix/reports/__init__.py +40 -0
  43. allelix/reports/_pipeline.py +497 -0
  44. allelix/reports/diff.py +169 -0
  45. allelix/reports/high_value.py +133 -0
  46. allelix/reports/html.py +1130 -0
  47. allelix/reports/json_report.py +163 -0
  48. allelix/reports/methylation.py +50 -0
  49. allelix/reports/terminal.py +203 -0
  50. allelix/utils/__init__.py +3 -0
  51. allelix/utils/allele.py +87 -0
  52. allelix/utils/build_detect.py +203 -0
  53. allelix-1.8.1.dist-info/METADATA +276 -0
  54. allelix-1.8.1.dist-info/RECORD +58 -0
  55. allelix-1.8.1.dist-info/WHEEL +5 -0
  56. allelix-1.8.1.dist-info/entry_points.txt +2 -0
  57. allelix-1.8.1.dist-info/licenses/LICENSE +671 -0
  58. allelix-1.8.1.dist-info/top_level.txt +1 -0
allelix/__init__.py ADDED
@@ -0,0 +1,12 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2026 dial481
3
+ """Allelix: open-source genotype analysis toolkit."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from importlib.metadata import PackageNotFoundError, version
8
+
9
+ try:
10
+ __version__ = version("allelix")
11
+ except PackageNotFoundError:
12
+ __version__ = "0.0.0+local"
@@ -0,0 +1,90 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2026 dial481
3
+ """Annotator registry. Unlike parsers, ALL annotators run on every variant."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import TYPE_CHECKING
8
+
9
+ from allelix.annotators.alphamissense import AlphaMissenseAnnotator
10
+ from allelix.annotators.base import Annotator
11
+ from allelix.annotators.cadd import CaddAnnotator
12
+ from allelix.annotators.clinvar import CLINVAR_SUPPORTED_BUILDS, ClinVarAnnotator
13
+ from allelix.annotators.gnomad import GnomadAnnotator
14
+ from allelix.annotators.gwas import GWASCatalogAnnotator
15
+ from allelix.annotators.pharmgkb import PharmGKBAnnotator
16
+ from allelix.annotators.snpedia import SNPediaAnnotator
17
+
18
+ if TYPE_CHECKING:
19
+ from pathlib import Path
20
+
21
+
22
+ def get_annotators(
23
+ data_dir: Path,
24
+ clinvar_builds: tuple[str, ...] = CLINVAR_SUPPORTED_BUILDS,
25
+ *,
26
+ include_benign: bool = False,
27
+ gwas_filter_traits: bool = True,
28
+ cadd_full: bool = False,
29
+ ) -> list[Annotator]:
30
+ """Construct all registered annotators bound to the given data directory.
31
+
32
+ `clinvar_builds` selects which ClinVar builds are managed by this
33
+ process. Default is both GRCh37 and GRCh38 (per ADR-0021). The CLI
34
+ narrows it via `db update --build grch37|grch38`.
35
+
36
+ `include_benign` passes through to ClinVarAnnotator. Default False
37
+ suppresses Benign/Likely_benign annotations (ADR-0008 amendment).
38
+
39
+ `gwas_filter_traits` passes through to GWASCatalogAnnotator. Default
40
+ True excludes common-trait noise categories (ADR-0024 amendment).
41
+
42
+ `cadd_full` enables CADD full mode (tabix queries against the
43
+ complete 81 GB CADD file). Requires ``pysam`` and a local copy.
44
+
45
+ ADR-0023: ClinVar's `reference_for(rsid, build)` is wired into
46
+ PharmGKB and SNPedia as the primary hom-ref suppression filter — the
47
+ REF allele lookup universally determines whether the user is
48
+ homozygous reference (and thus a non-finding for that variant).
49
+ """
50
+ clinvar = ClinVarAnnotator(data_dir, builds=clinvar_builds, include_benign=include_benign)
51
+ pharmgkb = PharmGKBAnnotator(data_dir, clinvar_ref_provider=clinvar.reference_for)
52
+ gwas = GWASCatalogAnnotator(data_dir, filter_traits=gwas_filter_traits)
53
+ snpedia = SNPediaAnnotator(data_dir, clinvar_ref_provider=clinvar.reference_for)
54
+ gnomad = GnomadAnnotator(data_dir)
55
+ alphamissense = AlphaMissenseAnnotator(data_dir)
56
+ cadd = CaddAnnotator(data_dir, full_mode=cadd_full)
57
+ return [clinvar, pharmgkb, gwas, snpedia, gnomad, alphamissense, cadd]
58
+
59
+
60
+ _ANNOTATOR_CLASSES: dict[str, type[Annotator]] = {
61
+ cls.name: cls
62
+ for cls in [
63
+ ClinVarAnnotator,
64
+ PharmGKBAnnotator,
65
+ GWASCatalogAnnotator,
66
+ SNPediaAnnotator,
67
+ GnomadAnnotator,
68
+ AlphaMissenseAnnotator,
69
+ CaddAnnotator,
70
+ ]
71
+ }
72
+
73
+
74
+ def get_annotator_class(name: str) -> type[Annotator] | None:
75
+ """Return the annotator class for a given source name, or None."""
76
+ return _ANNOTATOR_CLASSES.get(name)
77
+
78
+
79
+ __all__ = [
80
+ "AlphaMissenseAnnotator",
81
+ "Annotator",
82
+ "CaddAnnotator",
83
+ "ClinVarAnnotator",
84
+ "GWASCatalogAnnotator",
85
+ "GnomadAnnotator",
86
+ "PharmGKBAnnotator",
87
+ "SNPediaAnnotator",
88
+ "get_annotator_class",
89
+ "get_annotators",
90
+ ]
@@ -0,0 +1,228 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2026 dial481
3
+ """AlphaMissense variant pathogenicity enrichment.
4
+
5
+ AlphaMissense is not a clinical annotator — it does not produce
6
+ Annotation objects. It enriches existing annotations with missense
7
+ variant pathogenicity predictions. The pipeline calls
8
+ ``bulk_lookup()`` after all annotators have run, and stamps each
9
+ annotation's ``am_pathogenicity`` and ``am_class`` fields.
10
+
11
+ License: CC BY 4.0. Attribution: Cheng et al., Science 2023
12
+ (doi:10.1126/science.adg7492).
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import logging
18
+ import sqlite3
19
+ from typing import TYPE_CHECKING, ClassVar
20
+
21
+ from allelix.annotators.base import Annotator, LicenseDescriptor
22
+ from allelix.databases._versions import ALPHAMISSENSE_SCHEMA_VERSION
23
+ from allelix.databases.alphamissense_loader import (
24
+ ALPHAMISSENSE_CACHE_URL,
25
+ ALPHAMISSENSE_DB_FILENAME,
26
+ ALPHAMISSENSE_EXPECTED_SHA256,
27
+ install_prebuilt_cache,
28
+ )
29
+ from allelix.databases.gnomad_loader import GNOMAD_DB_FILENAME
30
+ from allelix.databases.manager import (
31
+ download,
32
+ get_database_info,
33
+ verify_file_hash,
34
+ )
35
+
36
+ if TYPE_CHECKING:
37
+ from pathlib import Path
38
+
39
+ from allelix.models import Annotation, Variant
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+ _BULK_BATCH_SIZE = 900
44
+
45
+
46
+ class AlphaMissenseAnnotator(Annotator):
47
+ """Missense variant pathogenicity enrichment from AlphaMissense.
48
+
49
+ Subclasses Annotator for ``db update`` / ``db status`` / ``is_ready()``
50
+ integration. ``annotate()`` always returns ``[]`` — AlphaMissense does
51
+ not participate in the per-variant annotation loop.
52
+ """
53
+
54
+ name: ClassVar[str] = "alphamissense"
55
+ display_name: ClassVar[str] = "AlphaMissense"
56
+ attribution: ClassVar[str] = "AlphaMissense"
57
+ requires_download: ClassVar[bool] = True
58
+ server_driven_freshness: ClassVar[bool] = False
59
+ license: ClassVar[LicenseDescriptor] = LicenseDescriptor(
60
+ spdx="CC-BY-4.0",
61
+ license_url="https://creativecommons.org/licenses/by/4.0/",
62
+ attribution_text=(
63
+ "AlphaMissense predictions from Cheng et al., Science 2023"
64
+ " (doi:10.1126/science.adg7492). Licensed under CC BY 4.0."
65
+ ),
66
+ source_url="https://zenodo.org/records/10813168",
67
+ citation="Cheng et al., Science 2023 (doi:10.1126/science.adg7492)",
68
+ commercial_ok=True,
69
+ )
70
+
71
+ def __init__(self, data_dir: Path) -> None:
72
+ """Bind to the data directory."""
73
+ super().__init__(data_dir)
74
+ self._db_path = data_dir / ALPHAMISSENSE_DB_FILENAME
75
+ self._conn: sqlite3.Connection | None = None
76
+
77
+ def _connection(self) -> sqlite3.Connection:
78
+ if self._conn is None:
79
+ if not self._db_path.exists():
80
+ raise FileNotFoundError(
81
+ f"AlphaMissense cache not found at {self._db_path}. "
82
+ "Run `allelix db update` first."
83
+ )
84
+ self._conn = sqlite3.connect(self._db_path)
85
+ self._check_gnomad_version()
86
+ return self._conn
87
+
88
+ def _check_gnomad_version(self) -> None:
89
+ """Warn if the gnomAD version used to build the AM cache differs from installed."""
90
+ assert self._conn is not None
91
+ row = self._conn.execute(
92
+ "SELECT version FROM database_versions WHERE name = 'alphamissense_gnomad_source'"
93
+ ).fetchone()
94
+ if row is None:
95
+ return
96
+ stamped = row[0]
97
+ if stamped == "no_gnomad":
98
+ logger.warning(
99
+ "AlphaMissense cache was built without gnomAD (--no-gnomad). "
100
+ "rsID lookups will return no results."
101
+ )
102
+ return
103
+ gnomad_info = get_database_info(self.data_dir / GNOMAD_DB_FILENAME, "gnomad")
104
+ if gnomad_info is None:
105
+ return
106
+ installed = gnomad_info["version"]
107
+ if installed and stamped != installed:
108
+ logger.warning(
109
+ "AlphaMissense cache was built against gnomAD %s but installed "
110
+ "gnomAD is %s. rsID mappings may be stale. Rebuild with: "
111
+ "python scripts/build_alphamissense_cache.py",
112
+ stamped,
113
+ installed,
114
+ )
115
+
116
+ def setup(self) -> None:
117
+ """Download the pre-built AlphaMissense cache from HuggingFace."""
118
+ gz_path = self.data_dir / "alphamissense.sqlite.gz"
119
+ download(ALPHAMISSENSE_CACHE_URL, gz_path)
120
+ verify_file_hash(gz_path, "sha256", ALPHAMISSENSE_EXPECTED_SHA256)
121
+ install_prebuilt_cache(
122
+ gz_path,
123
+ self._db_path,
124
+ source_url=ALPHAMISSENSE_CACHE_URL,
125
+ )
126
+ try:
127
+ gz_path.unlink()
128
+ except OSError:
129
+ logger.warning("Could not remove staged file at %s", gz_path)
130
+
131
+ def is_ready(self) -> bool:
132
+ """True when the AlphaMissense SQLite cache exists with current schema version."""
133
+ info = get_database_info(self._db_path, "alphamissense")
134
+ if info is None:
135
+ return False
136
+ tag = info.get("local_version_tag") or ""
137
+ return tag == f"sv:{ALPHAMISSENSE_SCHEMA_VERSION}" or not tag
138
+
139
+ def version(self) -> str | None:
140
+ """Return the cached database version, or None."""
141
+ info = get_database_info(self._db_path, "alphamissense")
142
+ return info["version"] if info else None
143
+
144
+ def record_count(self) -> int | None:
145
+ """Return the number of variants in the cache, or None."""
146
+ info = get_database_info(self._db_path, "alphamissense")
147
+ return info["record_count"] if info else None
148
+
149
+ def close(self) -> None:
150
+ """Close the SQLite connection if open."""
151
+ if self._conn is not None:
152
+ self._conn.close()
153
+ self._conn = None
154
+
155
+ def fetch_remote_signal(self) -> str | None:
156
+ """Code-driven source — no runtime freshness probe (ADR-0030)."""
157
+ return None
158
+
159
+ def cached_remote_signal(self) -> str | None:
160
+ """Code-driven source — no cached signal to compare (ADR-0030)."""
161
+ return None
162
+
163
+ def annotate(self, variant: Variant) -> list[Annotation]:
164
+ """Not used — AlphaMissense enriches, does not annotate. Always returns []."""
165
+ return []
166
+
167
+ def lookup(self, rsid: str) -> tuple[float, str] | None:
168
+ """Return (am_pathogenicity, am_class) for a single rsID, or None."""
169
+ conn = self._connection()
170
+ row = conn.execute(
171
+ "SELECT MAX(am_pathogenicity), am_class FROM alphamissense_scores WHERE rsid = ?",
172
+ (rsid,),
173
+ ).fetchone()
174
+ if row is None or row[0] is None:
175
+ return None
176
+ return (row[0], row[1])
177
+
178
+ def bulk_lookup(self, rsids: set[str]) -> dict[str, tuple[float, str]]:
179
+ """Return ``{rsid: (am_pathogenicity, am_class)}`` for found rsIDs.
180
+
181
+ Fallback for annotations without a known alt allele. Uses MAX to
182
+ resolve multi-allelic sites. Prefer ``bulk_lookup_by_alt`` when alt
183
+ is available.
184
+
185
+ Batches into chunks of 900 to stay within SQLite's variable limit.
186
+ """
187
+ if not rsids:
188
+ return {}
189
+ conn = self._connection()
190
+ result: dict[str, tuple[float, str]] = {}
191
+ rsid_list = list(rsids)
192
+ for i in range(0, len(rsid_list), _BULK_BATCH_SIZE):
193
+ batch = rsid_list[i : i + _BULK_BATCH_SIZE]
194
+ placeholders = ",".join("?" * len(batch))
195
+ rows = conn.execute(
196
+ f"SELECT rsid, MAX(am_pathogenicity), am_class"
197
+ f" FROM alphamissense_scores"
198
+ f" WHERE rsid IN ({placeholders}) GROUP BY rsid",
199
+ batch,
200
+ ).fetchall()
201
+ for rsid, score, cls in rows:
202
+ if score is not None:
203
+ result[rsid] = (score, cls)
204
+ return result
205
+
206
+ def bulk_lookup_by_alt(
207
+ self, keys: set[tuple[str, str]]
208
+ ) -> dict[tuple[str, str], tuple[float, str]]:
209
+ """Return ``{(rsid, alt): (am_pathogenicity, am_class)}`` for exact matches."""
210
+ if not keys:
211
+ return {}
212
+ conn = self._connection()
213
+ result: dict[tuple[str, str], tuple[float, str]] = {}
214
+ key_list = list(keys)
215
+ batch_size = _BULK_BATCH_SIZE // 2
216
+ for i in range(0, len(key_list), batch_size):
217
+ batch = key_list[i : i + batch_size]
218
+ clauses = " OR ".join(["(rsid = ? AND alt = ?)"] * len(batch))
219
+ params = [v for rsid, alt in batch for v in (rsid, alt)]
220
+ rows = conn.execute(
221
+ f"SELECT rsid, alt, am_pathogenicity, am_class"
222
+ f" FROM alphamissense_scores WHERE {clauses}",
223
+ params,
224
+ ).fetchall()
225
+ for rsid, alt, score, cls in rows:
226
+ if score is not None:
227
+ result[(rsid, alt)] = (score, cls)
228
+ return result
@@ -0,0 +1,214 @@
1
+ # SPDX-License-Identifier: AGPL-3.0-or-later
2
+ # Copyright (C) 2026 dial481
3
+ """Abstract base class for reference-database annotators."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import contextlib
8
+ from abc import ABC, abstractmethod
9
+ from dataclasses import dataclass
10
+ from enum import Enum, auto
11
+ from typing import TYPE_CHECKING, ClassVar
12
+
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Callable
15
+ from pathlib import Path
16
+ from types import TracebackType
17
+
18
+ from allelix.models import Annotation, Variant
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class LicenseDescriptor:
23
+ """Single source of truth for a data source's license terms."""
24
+
25
+ spdx: str
26
+ license_url: str
27
+ attribution_text: str
28
+ source_url: str | None = None
29
+ citation: str | None = None
30
+ commercial_ok: bool | None = None
31
+ licensable: bool = False
32
+ purchase_url: str | None = None
33
+
34
+
35
+ _NON_COMMERCIAL_SPDX: frozenset[str] = frozenset(
36
+ {
37
+ "CC-BY-NC-SA-3.0-US",
38
+ "CC-BY-NC-SA-4.0",
39
+ "CC-BY-NC-4.0",
40
+ }
41
+ )
42
+
43
+
44
+ def is_non_commercial(descriptor: LicenseDescriptor) -> bool:
45
+ """Return True if the license prohibits commercial use."""
46
+ if descriptor.commercial_ok is not None:
47
+ return not descriptor.commercial_ok
48
+ return descriptor.spdx in _NON_COMMERCIAL_SPDX
49
+
50
+
51
+ class Permission(Enum):
52
+ """Three-state permission result for a source in the current license context."""
53
+
54
+ ALLOW = auto()
55
+ BLOCK_FINAL = auto()
56
+ BLOCK_PURCHASABLE = auto()
57
+
58
+
59
+ def permission(
60
+ descriptor: LicenseDescriptor,
61
+ *,
62
+ commercial: bool,
63
+ license_held: bool,
64
+ ) -> Permission:
65
+ """Determine whether a source is permitted under the current license context."""
66
+ if not commercial:
67
+ return Permission.ALLOW
68
+ if not is_non_commercial(descriptor):
69
+ return Permission.ALLOW
70
+ if not descriptor.licensable:
71
+ return Permission.BLOCK_FINAL
72
+ if license_held:
73
+ return Permission.ALLOW
74
+ return Permission.BLOCK_PURCHASABLE
75
+
76
+
77
+ def is_clinvar_homref(
78
+ variant: Variant,
79
+ clinvar_ref_provider: Callable[[str, str], str | None] | None,
80
+ ) -> bool:
81
+ """Return True if the variant is homozygous reference per ClinVar (ADR-0023)."""
82
+ if clinvar_ref_provider is None:
83
+ return False
84
+ ref = clinvar_ref_provider(variant.rsid, variant.build)
85
+ return ref is not None and len(ref) == 1 and variant.allele1 == ref and variant.allele2 == ref
86
+
87
+
88
+ class Annotator(ABC):
89
+ """Base class for all reference database annotators.
90
+
91
+ Annotators bind to a `data_dir` at construction. `setup()` performs the
92
+ one-time download/parse into the cache. `is_ready()` reports whether the
93
+ cache exists and is queryable. `annotate(variant)` returns zero or more
94
+ `Annotation` objects for the variant — checking both rsid AND genotype, per
95
+ the regulatory posture (ADR-0003) and the genotype-matching rule (ADR-0007).
96
+
97
+ Annotators hold resources (SQLite connections, file handles). Always close
98
+ them via `close()` or the context manager protocol; the CLI uses
99
+ `contextlib.ExitStack` to guarantee deterministic cleanup.
100
+
101
+ Attributes:
102
+ name: Lowercase identifier (e.g., "clinvar").
103
+ display_name: Human-readable name ("ClinVar").
104
+ attribution: Display label used in user-facing reports ("ClinVar").
105
+ Equal to `display_name` for first-party single-source annotators.
106
+ requires_download: Whether `setup()` needs network/disk space.
107
+ """
108
+
109
+ name: ClassVar[str]
110
+ display_name: ClassVar[str]
111
+ attribution: ClassVar[str]
112
+ requires_download: ClassVar[bool] = True
113
+ server_driven_freshness: ClassVar[bool] = True
114
+ license: ClassVar[LicenseDescriptor]
115
+
116
+ def __init_subclass__(cls, **kwargs: object) -> None:
117
+ """Enforce required ClassVars at subclass definition time."""
118
+ super().__init_subclass__(**kwargs)
119
+ is_abstract = any(getattr(v, "__isabstractmethod__", False) for v in cls.__dict__.values())
120
+ if not is_abstract and not hasattr(cls, "license"):
121
+ msg = f"{cls.__name__} must declare a 'license' ClassVar of type LicenseDescriptor"
122
+ raise TypeError(msg)
123
+ if hasattr(cls, "license"):
124
+ desc = cls.license
125
+ if (
126
+ desc.spdx.startswith("LicenseRef-") or desc.spdx.startswith("custom-")
127
+ ) and desc.commercial_ok is None:
128
+ msg = (
129
+ f"{cls.__name__} uses custom SPDX '{desc.spdx}' but "
130
+ f"does not declare commercial_ok (True or False)"
131
+ )
132
+ raise TypeError(msg)
133
+ if desc.licensable and desc.purchase_url is None:
134
+ msg = (
135
+ f"{cls.__name__} declares licensable=True but "
136
+ f"purchase_url is None — set it explicitly"
137
+ )
138
+ raise TypeError(msg)
139
+
140
+ def __init__(self, data_dir: Path) -> None:
141
+ """Bind the annotator to a data directory (created elsewhere)."""
142
+ self.data_dir = data_dir
143
+
144
+ def __del__(self) -> None:
145
+ """Release resources on GC to prevent ResourceWarning."""
146
+ with contextlib.suppress(Exception):
147
+ self.close()
148
+
149
+ def __enter__(self) -> Annotator:
150
+ """Return self for `with` usage."""
151
+ return self
152
+
153
+ def __exit__(
154
+ self,
155
+ exc_type: type[BaseException] | None,
156
+ exc_val: BaseException | None,
157
+ exc_tb: TracebackType | None,
158
+ ) -> None:
159
+ """Release any open resources via `close()`."""
160
+ self.close()
161
+
162
+ @abstractmethod
163
+ def setup(self) -> None:
164
+ """Download and prepare the reference database. Idempotent."""
165
+ ...
166
+
167
+ @abstractmethod
168
+ def annotate(self, variant: Variant) -> list[Annotation]:
169
+ """Return all annotations for this variant.
170
+
171
+ Implementations MUST verify both rsid AND genotype — presence in the
172
+ database is not enough. The user must carry the flagged allele.
173
+ """
174
+ ...
175
+
176
+ @abstractmethod
177
+ def is_ready(self) -> bool:
178
+ """Whether the local cache exists and is queryable."""
179
+ ...
180
+
181
+ @abstractmethod
182
+ def version(self) -> str | None:
183
+ """Return the cached database version, or None if not set up."""
184
+ ...
185
+
186
+ @abstractmethod
187
+ def close(self) -> None:
188
+ """Release any open resources (database connections, file handles)."""
189
+ ...
190
+
191
+ @abstractmethod
192
+ def fetch_remote_signal(self) -> str | None:
193
+ """Fetch a small remote freshness signal (md5 hash, ETag, Last-Modified).
194
+
195
+ Implementations MUST return a prefixed, opaque string (e.g.
196
+ `"md5:abcdef…"`, `"lm:Wed, 21 Oct 2025 …"`, `"etag:…"`) so that a
197
+ future server-side switch in signal type triggers a refresh
198
+ rather than a silent miss.
199
+
200
+ Returns None on any failure (network error, timeout, missing
201
+ header, source doesn't expose a signal). Never raises — `db update`
202
+ treats None as "can't verify freshness" and falls through to skip
203
+ with a notice. See ADR-0012.
204
+ """
205
+ ...
206
+
207
+ @abstractmethod
208
+ def cached_remote_signal(self) -> str | None:
209
+ """Return the remote signal stored at last successful download, or None.
210
+
211
+ Returns None if the cache is missing entirely OR if the cache was
212
+ written by a pre-v0.4.2 release that didn't capture signals.
213
+ """
214
+ ...