allelix 1.8.2__tar.gz → 1.8.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {allelix-1.8.2 → allelix-1.8.4}/PKG-INFO +17 -45
- {allelix-1.8.2 → allelix-1.8.4}/README.md +16 -44
- {allelix-1.8.2 → allelix-1.8.4}/allelix/cli.py +22 -5
- {allelix-1.8.2 → allelix-1.8.4}/allelix.egg-info/PKG-INFO +17 -45
- {allelix-1.8.2 → allelix-1.8.4}/pyproject.toml +1 -1
- {allelix-1.8.2 → allelix-1.8.4}/tests/test_cli.py +52 -0
- {allelix-1.8.2 → allelix-1.8.4}/LICENSE +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/__init__.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/annotators/__init__.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/annotators/alphamissense.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/annotators/base.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/annotators/cadd.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/annotators/clinvar.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/annotators/gnomad.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/annotators/gwas.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/annotators/pharmgkb.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/annotators/snpedia.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/compare.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/config.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/data/__init__.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/data/high_value_snps.yaml +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/databases/__init__.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/databases/_versions.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/databases/alphamissense_loader.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/databases/cadd_loader.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/databases/cpic_loader.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/databases/gnomad_loader.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/databases/gwas_loader.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/databases/loader_utils.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/databases/manager.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/databases/pharmgkb_loader.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/databases/schema.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/databases/snpedia_loader.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/databases/snpedia_parser.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/exporters/__init__.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/exporters/plink.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/models.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/parsers/__init__.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/parsers/_helpers.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/parsers/ancestrydna.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/parsers/base.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/parsers/ftdna.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/parsers/livingdna.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/parsers/myhappygenes.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/parsers/myheritage.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/parsers/twentythreeandme.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/py.typed +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/reports/__init__.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/reports/_pipeline.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/reports/diff.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/reports/high_value.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/reports/html.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/reports/json_report.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/reports/methylation.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/reports/terminal.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/utils/__init__.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/utils/allele.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix/utils/build_detect.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix.egg-info/SOURCES.txt +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix.egg-info/dependency_links.txt +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix.egg-info/entry_points.txt +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix.egg-info/requires.txt +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/allelix.egg-info/top_level.txt +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/setup.cfg +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/tests/test_cli_helpers.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/tests/test_compare.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/tests/test_config.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/tests/test_end_to_end.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/tests/test_mock_data_invariants.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/tests/test_models.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/tests/test_registry.py +0 -0
- {allelix-1.8.2 → allelix-1.8.4}/tests/test_version.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: allelix
|
|
3
|
-
Version: 1.8.
|
|
3
|
+
Version: 1.8.4
|
|
4
4
|
Summary: Open-source genotype analysis toolkit. Format-agnostic ingestion, database-agnostic annotation, offline-first.
|
|
5
5
|
Author-email: dial481 <dial481@users.noreply.github.com>
|
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
|
@@ -44,55 +44,27 @@ Open-source command-line toolkit for analyzing raw genotype files from consumer
|
|
|
44
44
|
> HTML/JSON/terminal reports, methylation + pharmacogenomics focused
|
|
45
45
|
> commands, report diffing, persistent config with commercial-mode
|
|
46
46
|
> safety switch. Build auto-detection from position data (ADR-0021).
|
|
47
|
-
> No regex on prose anywhere in production. **Latest: v1.8.
|
|
48
|
-
>
|
|
49
|
-
>
|
|
50
|
-
> [`CHANGELOG.md`](CHANGELOG.md).
|
|
47
|
+
> No regex on prose anywhere in production. **Latest: v1.8.4** —
|
|
48
|
+
> `--no-cadd` flag for licensing exclusion parity.
|
|
49
|
+
> Release notes:
|
|
50
|
+
> [`CHANGELOG.md`](https://github.com/dial481/allelix/blob/main/CHANGELOG.md).
|
|
51
51
|
|
|
52
52
|
## Quickstart
|
|
53
53
|
|
|
54
|
-
Requires Python 3.11+.
|
|
55
|
-
|
|
56
54
|
```bash
|
|
57
|
-
|
|
58
|
-
cd allelix
|
|
59
|
-
python -m venv .venv
|
|
60
|
-
source .venv/bin/activate
|
|
61
|
-
pip install -e ".[dev]"
|
|
62
|
-
|
|
63
|
-
# Generate a synthetic test fixture
|
|
64
|
-
python tests/generate_mock_data.py
|
|
65
|
-
|
|
66
|
-
# Show summary statistics for a genotype file
|
|
67
|
-
allelix stats tests/fixtures/mock_myhappygenes.txt
|
|
55
|
+
pip install allelix
|
|
68
56
|
|
|
69
|
-
# Download reference databases
|
|
70
|
-
#
|
|
71
|
-
# to skip the large enrichment databases. Re-runs skip unchanged sources.
|
|
57
|
+
# Download reference databases (~15GB with all sources).
|
|
58
|
+
# Use --no-gnomad / --no-alphamissense to skip the large ones.
|
|
72
59
|
# CADD is opt-in: allelix db update --cadd
|
|
73
60
|
allelix db update
|
|
74
|
-
allelix db status # see what's cached
|
|
75
61
|
|
|
76
|
-
# Analyze a genotype file
|
|
77
|
-
allelix analyze
|
|
78
|
-
|
|
79
|
-
# Same data, focused subsets
|
|
80
|
-
allelix methylation tests/fixtures/mock_myhappygenes.txt
|
|
81
|
-
allelix pharmacogenomics tests/fixtures/mock_myhappygenes.txt
|
|
82
|
-
|
|
83
|
-
# Compare two genotype files (coverage, concordance, strand-flip detection)
|
|
84
|
-
allelix compare file1.txt file2.txt
|
|
85
|
-
|
|
86
|
-
# Export to PLINK1 binary format (.bed/.bim/.fam) for plink2, ADMIXTURE, PRSice
|
|
87
|
-
# Expect ~60% monomorphic markers (A2=0) — genotyping chips probe many
|
|
88
|
-
# intronic/intergenic sites outside gnomAD's exome coverage.
|
|
89
|
-
allelix export plink genotype_file.txt -o output_prefix --build grch37
|
|
90
|
-
|
|
91
|
-
# Output to a self-contained HTML or JSON report
|
|
92
|
-
allelix analyze tests/fixtures/mock_myhappygenes.txt --output report.html
|
|
93
|
-
allelix analyze tests/fixtures/mock_myhappygenes.txt --output report.json
|
|
62
|
+
# Analyze a genotype file
|
|
63
|
+
allelix analyze your_genotype_file.txt --output report.html
|
|
94
64
|
```
|
|
95
65
|
|
|
66
|
+
Requires Python 3.11+. See [Development](#development) for source installs and running tests.
|
|
67
|
+
|
|
96
68
|
## Supported Formats
|
|
97
69
|
|
|
98
70
|
| Format | Status | Notes |
|
|
@@ -126,7 +98,7 @@ Adding a new format means adding one file to `allelix/parsers/` and registering
|
|
|
126
98
|
| GWAS Catalog | ✓ | Public domain (EBI/NHGRI). Trait–SNP associations with p-values and effect sizes. Carrier rule (ADR-0007) requires the user to carry the risk allele. P-value magnitude scoring (ADR-0024) maps continuous p-values to the 0–10 scale; unknown-risk-allele entries fire on rsID match alone but are capped at 3.0. |
|
|
127
99
|
| gnomAD | ✓ | ODbL v1.0. **Enrichment annotator** — adds population allele frequency context to existing annotations. Shows how common each variant is in the general population (~16M exome variants from 730K individuals). A pathogenic variant that 35% of people carry reads very differently from one seen in 0.001%. Pre-built cache downloaded via `db update` (~6GB on disk). Use `--no-gnomad` to skip. |
|
|
128
100
|
| AlphaMissense | ✓ | CC BY 4.0. **Enrichment annotator** — adds DeepMind's protein-structure-based pathogenicity predictions to existing annotations. Scores 71M missense variants on a 0–1 scale: <0.34 = likely benign, >0.564 = likely pathogenic. Complements ClinVar's expert classifications with computational predictions — especially valuable for variants ClinVar hasn't reviewed yet. Pre-built cache downloaded via `db update` (~8GB on disk). Use `--no-alphamissense` to skip. |
|
|
129
|
-
| CADD | ✓ | LicenseRef-CADD (non-commercial). **Enrichment annotator** — adds PHRED-scaled deleteriousness scores from CADD v1.7. Ranks how deleterious any single-nucleotide variant is using 100+ annotation tracks (coding, non-coding, regulatory). PHRED 10 = top 10% most deleterious, 20 = top 1%, 30 = top 0.1%. **Opt-in** — disabled by default (`sources.cadd = false`). Enable via `allelix db update --cadd` or `allelix config set sources.cadd true`. Pre-built cache (~5 GB on disk, ~120M variant keys). Full mode available via pysam for GRCh38 data (`options.cadd_full = true`). Cache mode covers the large majority of variants present in gnomAD, AlphaMissense, and ClinVar — nearly every position allelix can annotate from its other databases. For genotyping chip data (23andMe, AncestryDNA, MyHappyGenes, etc.), cache and full mode produce effectively identical results because chip probes overwhelmingly target known, cataloged variants. Full mode adds coverage for novel or private variants that appear only in whole-genome or whole-exome sequencing data and are not in any pre-computed database. If your input is a genotyping chip file, cache mode is all you need. |
|
|
101
|
+
| CADD | ✓ | LicenseRef-CADD (non-commercial). **Enrichment annotator** — adds PHRED-scaled deleteriousness scores from CADD v1.7. Ranks how deleterious any single-nucleotide variant is using 100+ annotation tracks (coding, non-coding, regulatory). PHRED 10 = top 10% most deleterious, 20 = top 1%, 30 = top 0.1%. **Opt-in** — disabled by default (`sources.cadd = false`). Enable via `allelix db update --cadd` or `allelix config set sources.cadd true`. Use `--no-cadd` to skip enrichment for a single run. Pre-built cache (~5 GB on disk, ~120M variant keys). Full mode available via pysam for GRCh38 data (`options.cadd_full = true`). Cache mode covers the large majority of variants present in gnomAD, AlphaMissense, and ClinVar — nearly every position allelix can annotate from its other databases. For genotyping chip data (23andMe, AncestryDNA, MyHappyGenes, etc.), cache and full mode produce effectively identical results because chip probes overwhelmingly target known, cataloged variants. Full mode adds coverage for novel or private variants that appear only in whole-genome or whole-exome sequencing data and are not in any pre-computed database. If your input is a genotyping chip file, cache mode is all you need. |
|
|
130
102
|
|
|
131
103
|
### Known PharmGKB limitation: reference-genotype rows where ClinVar and CPIC both lack data
|
|
132
104
|
|
|
@@ -183,7 +155,7 @@ allelix config set license.commercial true
|
|
|
183
155
|
allelix config set license.cadd true
|
|
184
156
|
```
|
|
185
157
|
|
|
186
|
-
CLI flags (`--no-gnomad`, `--no-alphamissense`, `--exclude-snpedia`, `--cadd`) override the config for a single run. The config sets the baseline; flags override per-invocation.
|
|
158
|
+
CLI flags (`--no-gnomad`, `--no-alphamissense`, `--no-cadd`, `--exclude-snpedia`, `--cadd`) override the config for a single run. The config sets the baseline; flags override per-invocation.
|
|
187
159
|
|
|
188
160
|
### Database sizes and download times
|
|
189
161
|
|
|
@@ -215,7 +187,7 @@ Allelix source code is licensed under the **GNU Affero General Public License v3
|
|
|
215
187
|
| SNPedia | snpedia.com | CC BY-NC-SA 3.0 US | Attribution required, **non-commercial only**. Use `--exclude-snpedia` to omit. |
|
|
216
188
|
| gnomAD | gnomad.broadinstitute.org | ODbL v1.0 | Attribution required. Population allele frequencies for context; not a clinical annotator. Use `--no-gnomad` to omit. |
|
|
217
189
|
| AlphaMissense | zenodo.org/records/10813168 | CC BY 4.0 | Attribution required. Cheng et al., Science 2023. Missense variant pathogenicity predictions. Use `--no-alphamissense` to omit. |
|
|
218
|
-
| CADD | cadd.gs.washington.edu | LicenseRef-CADD | Attribution required, **non-commercial by default**. Commercial licenses available from UW CoMotion. Opt-in via `allelix db update --cadd`. |
|
|
190
|
+
| CADD | cadd.gs.washington.edu | LicenseRef-CADD | Attribution required, **non-commercial by default**. Commercial licenses available from UW CoMotion. Opt-in via `allelix db update --cadd`. Use `--no-cadd` to omit. |
|
|
219
191
|
|
|
220
192
|
**Commercial users:** When `license.commercial = true`, non-commercial sources are gated by a three-state permission model. SNPedia is permanently blocked (no commercial license is available). CADD is blocked by default but can be unlocked — the University of Washington offers commercial licenses at `https://els2.comotion.uw.edu/product/cadd-scores`; after purchasing, assert your license with `allelix config set license.cadd true` to re-enable CADD in commercial mode. All other databases (ClinVar, PharmGKB, GWAS Catalog, gnomAD, AlphaMissense) are compatible with commercial use. `allelix config show` displays the permission state for each source.
|
|
221
193
|
|
|
@@ -246,7 +218,7 @@ None of these are scraping errors. They are editorial inconsistencies on the sou
|
|
|
246
218
|
|
|
247
219
|
## Architecture & Design Decisions
|
|
248
220
|
|
|
249
|
-
The "why" behind major design choices lives in [`docs/adr/`](docs/adr/README.md) as Architecture Decision Records. Read these before proposing changes that touch the parser/annotator interfaces, the regulatory posture, or the data-handling model.
|
|
221
|
+
The "why" behind major design choices lives in [`docs/adr/`](https://github.com/dial481/allelix/blob/main/docs/adr/README.md) as Architecture Decision Records. Read these before proposing changes that touch the parser/annotator interfaces, the regulatory posture, or the data-handling model.
|
|
250
222
|
|
|
251
223
|
Notable load-bearing ADRs:
|
|
252
224
|
|
|
@@ -256,7 +228,7 @@ Notable load-bearing ADRs:
|
|
|
256
228
|
- **ADR-0009 — PharmGKB matches the user's exact normalized diploid call.**
|
|
257
229
|
- **ADR-0015 — Mock data generators are the contract.** Fixture shape must mirror real data shape; invariants tested.
|
|
258
230
|
|
|
259
|
-
Release history: see [`CHANGELOG.md`](CHANGELOG.md).
|
|
231
|
+
Release history: see [`CHANGELOG.md`](https://github.com/dial481/allelix/blob/main/CHANGELOG.md).
|
|
260
232
|
|
|
261
233
|
## Development
|
|
262
234
|
|
|
@@ -10,55 +10,27 @@ Open-source command-line toolkit for analyzing raw genotype files from consumer
|
|
|
10
10
|
> HTML/JSON/terminal reports, methylation + pharmacogenomics focused
|
|
11
11
|
> commands, report diffing, persistent config with commercial-mode
|
|
12
12
|
> safety switch. Build auto-detection from position data (ADR-0021).
|
|
13
|
-
> No regex on prose anywhere in production. **Latest: v1.8.
|
|
14
|
-
>
|
|
15
|
-
>
|
|
16
|
-
> [`CHANGELOG.md`](CHANGELOG.md).
|
|
13
|
+
> No regex on prose anywhere in production. **Latest: v1.8.4** —
|
|
14
|
+
> `--no-cadd` flag for licensing exclusion parity.
|
|
15
|
+
> Release notes:
|
|
16
|
+
> [`CHANGELOG.md`](https://github.com/dial481/allelix/blob/main/CHANGELOG.md).
|
|
17
17
|
|
|
18
18
|
## Quickstart
|
|
19
19
|
|
|
20
|
-
Requires Python 3.11+.
|
|
21
|
-
|
|
22
20
|
```bash
|
|
23
|
-
|
|
24
|
-
cd allelix
|
|
25
|
-
python -m venv .venv
|
|
26
|
-
source .venv/bin/activate
|
|
27
|
-
pip install -e ".[dev]"
|
|
28
|
-
|
|
29
|
-
# Generate a synthetic test fixture
|
|
30
|
-
python tests/generate_mock_data.py
|
|
31
|
-
|
|
32
|
-
# Show summary statistics for a genotype file
|
|
33
|
-
allelix stats tests/fixtures/mock_myhappygenes.txt
|
|
21
|
+
pip install allelix
|
|
34
22
|
|
|
35
|
-
# Download reference databases
|
|
36
|
-
#
|
|
37
|
-
# to skip the large enrichment databases. Re-runs skip unchanged sources.
|
|
23
|
+
# Download reference databases (~15GB with all sources).
|
|
24
|
+
# Use --no-gnomad / --no-alphamissense to skip the large ones.
|
|
38
25
|
# CADD is opt-in: allelix db update --cadd
|
|
39
26
|
allelix db update
|
|
40
|
-
allelix db status # see what's cached
|
|
41
27
|
|
|
42
|
-
# Analyze a genotype file
|
|
43
|
-
allelix analyze
|
|
44
|
-
|
|
45
|
-
# Same data, focused subsets
|
|
46
|
-
allelix methylation tests/fixtures/mock_myhappygenes.txt
|
|
47
|
-
allelix pharmacogenomics tests/fixtures/mock_myhappygenes.txt
|
|
48
|
-
|
|
49
|
-
# Compare two genotype files (coverage, concordance, strand-flip detection)
|
|
50
|
-
allelix compare file1.txt file2.txt
|
|
51
|
-
|
|
52
|
-
# Export to PLINK1 binary format (.bed/.bim/.fam) for plink2, ADMIXTURE, PRSice
|
|
53
|
-
# Expect ~60% monomorphic markers (A2=0) — genotyping chips probe many
|
|
54
|
-
# intronic/intergenic sites outside gnomAD's exome coverage.
|
|
55
|
-
allelix export plink genotype_file.txt -o output_prefix --build grch37
|
|
56
|
-
|
|
57
|
-
# Output to a self-contained HTML or JSON report
|
|
58
|
-
allelix analyze tests/fixtures/mock_myhappygenes.txt --output report.html
|
|
59
|
-
allelix analyze tests/fixtures/mock_myhappygenes.txt --output report.json
|
|
28
|
+
# Analyze a genotype file
|
|
29
|
+
allelix analyze your_genotype_file.txt --output report.html
|
|
60
30
|
```
|
|
61
31
|
|
|
32
|
+
Requires Python 3.11+. See [Development](#development) for source installs and running tests.
|
|
33
|
+
|
|
62
34
|
## Supported Formats
|
|
63
35
|
|
|
64
36
|
| Format | Status | Notes |
|
|
@@ -92,7 +64,7 @@ Adding a new format means adding one file to `allelix/parsers/` and registering
|
|
|
92
64
|
| GWAS Catalog | ✓ | Public domain (EBI/NHGRI). Trait–SNP associations with p-values and effect sizes. Carrier rule (ADR-0007) requires the user to carry the risk allele. P-value magnitude scoring (ADR-0024) maps continuous p-values to the 0–10 scale; unknown-risk-allele entries fire on rsID match alone but are capped at 3.0. |
|
|
93
65
|
| gnomAD | ✓ | ODbL v1.0. **Enrichment annotator** — adds population allele frequency context to existing annotations. Shows how common each variant is in the general population (~16M exome variants from 730K individuals). A pathogenic variant that 35% of people carry reads very differently from one seen in 0.001%. Pre-built cache downloaded via `db update` (~6GB on disk). Use `--no-gnomad` to skip. |
|
|
94
66
|
| AlphaMissense | ✓ | CC BY 4.0. **Enrichment annotator** — adds DeepMind's protein-structure-based pathogenicity predictions to existing annotations. Scores 71M missense variants on a 0–1 scale: <0.34 = likely benign, >0.564 = likely pathogenic. Complements ClinVar's expert classifications with computational predictions — especially valuable for variants ClinVar hasn't reviewed yet. Pre-built cache downloaded via `db update` (~8GB on disk). Use `--no-alphamissense` to skip. |
|
|
95
|
-
| CADD | ✓ | LicenseRef-CADD (non-commercial). **Enrichment annotator** — adds PHRED-scaled deleteriousness scores from CADD v1.7. Ranks how deleterious any single-nucleotide variant is using 100+ annotation tracks (coding, non-coding, regulatory). PHRED 10 = top 10% most deleterious, 20 = top 1%, 30 = top 0.1%. **Opt-in** — disabled by default (`sources.cadd = false`). Enable via `allelix db update --cadd` or `allelix config set sources.cadd true`. Pre-built cache (~5 GB on disk, ~120M variant keys). Full mode available via pysam for GRCh38 data (`options.cadd_full = true`). Cache mode covers the large majority of variants present in gnomAD, AlphaMissense, and ClinVar — nearly every position allelix can annotate from its other databases. For genotyping chip data (23andMe, AncestryDNA, MyHappyGenes, etc.), cache and full mode produce effectively identical results because chip probes overwhelmingly target known, cataloged variants. Full mode adds coverage for novel or private variants that appear only in whole-genome or whole-exome sequencing data and are not in any pre-computed database. If your input is a genotyping chip file, cache mode is all you need. |
|
|
67
|
+
| CADD | ✓ | LicenseRef-CADD (non-commercial). **Enrichment annotator** — adds PHRED-scaled deleteriousness scores from CADD v1.7. Ranks how deleterious any single-nucleotide variant is using 100+ annotation tracks (coding, non-coding, regulatory). PHRED 10 = top 10% most deleterious, 20 = top 1%, 30 = top 0.1%. **Opt-in** — disabled by default (`sources.cadd = false`). Enable via `allelix db update --cadd` or `allelix config set sources.cadd true`. Use `--no-cadd` to skip enrichment for a single run. Pre-built cache (~5 GB on disk, ~120M variant keys). Full mode available via pysam for GRCh38 data (`options.cadd_full = true`). Cache mode covers the large majority of variants present in gnomAD, AlphaMissense, and ClinVar — nearly every position allelix can annotate from its other databases. For genotyping chip data (23andMe, AncestryDNA, MyHappyGenes, etc.), cache and full mode produce effectively identical results because chip probes overwhelmingly target known, cataloged variants. Full mode adds coverage for novel or private variants that appear only in whole-genome or whole-exome sequencing data and are not in any pre-computed database. If your input is a genotyping chip file, cache mode is all you need. |
|
|
96
68
|
|
|
97
69
|
### Known PharmGKB limitation: reference-genotype rows where ClinVar and CPIC both lack data
|
|
98
70
|
|
|
@@ -149,7 +121,7 @@ allelix config set license.commercial true
|
|
|
149
121
|
allelix config set license.cadd true
|
|
150
122
|
```
|
|
151
123
|
|
|
152
|
-
CLI flags (`--no-gnomad`, `--no-alphamissense`, `--exclude-snpedia`, `--cadd`) override the config for a single run. The config sets the baseline; flags override per-invocation.
|
|
124
|
+
CLI flags (`--no-gnomad`, `--no-alphamissense`, `--no-cadd`, `--exclude-snpedia`, `--cadd`) override the config for a single run. The config sets the baseline; flags override per-invocation.
|
|
153
125
|
|
|
154
126
|
### Database sizes and download times
|
|
155
127
|
|
|
@@ -181,7 +153,7 @@ Allelix source code is licensed under the **GNU Affero General Public License v3
|
|
|
181
153
|
| SNPedia | snpedia.com | CC BY-NC-SA 3.0 US | Attribution required, **non-commercial only**. Use `--exclude-snpedia` to omit. |
|
|
182
154
|
| gnomAD | gnomad.broadinstitute.org | ODbL v1.0 | Attribution required. Population allele frequencies for context; not a clinical annotator. Use `--no-gnomad` to omit. |
|
|
183
155
|
| AlphaMissense | zenodo.org/records/10813168 | CC BY 4.0 | Attribution required. Cheng et al., Science 2023. Missense variant pathogenicity predictions. Use `--no-alphamissense` to omit. |
|
|
184
|
-
| CADD | cadd.gs.washington.edu | LicenseRef-CADD | Attribution required, **non-commercial by default**. Commercial licenses available from UW CoMotion. Opt-in via `allelix db update --cadd`. |
|
|
156
|
+
| CADD | cadd.gs.washington.edu | LicenseRef-CADD | Attribution required, **non-commercial by default**. Commercial licenses available from UW CoMotion. Opt-in via `allelix db update --cadd`. Use `--no-cadd` to omit. |
|
|
185
157
|
|
|
186
158
|
**Commercial users:** When `license.commercial = true`, non-commercial sources are gated by a three-state permission model. SNPedia is permanently blocked (no commercial license is available). CADD is blocked by default but can be unlocked — the University of Washington offers commercial licenses at `https://els2.comotion.uw.edu/product/cadd-scores`; after purchasing, assert your license with `allelix config set license.cadd true` to re-enable CADD in commercial mode. All other databases (ClinVar, PharmGKB, GWAS Catalog, gnomAD, AlphaMissense) are compatible with commercial use. `allelix config show` displays the permission state for each source.
|
|
187
159
|
|
|
@@ -212,7 +184,7 @@ None of these are scraping errors. They are editorial inconsistencies on the sou
|
|
|
212
184
|
|
|
213
185
|
## Architecture & Design Decisions
|
|
214
186
|
|
|
215
|
-
The "why" behind major design choices lives in [`docs/adr/`](docs/adr/README.md) as Architecture Decision Records. Read these before proposing changes that touch the parser/annotator interfaces, the regulatory posture, or the data-handling model.
|
|
187
|
+
The "why" behind major design choices lives in [`docs/adr/`](https://github.com/dial481/allelix/blob/main/docs/adr/README.md) as Architecture Decision Records. Read these before proposing changes that touch the parser/annotator interfaces, the regulatory posture, or the data-handling model.
|
|
216
188
|
|
|
217
189
|
Notable load-bearing ADRs:
|
|
218
190
|
|
|
@@ -222,7 +194,7 @@ Notable load-bearing ADRs:
|
|
|
222
194
|
- **ADR-0009 — PharmGKB matches the user's exact normalized diploid call.**
|
|
223
195
|
- **ADR-0015 — Mock data generators are the contract.** Fixture shape must mirror real data shape; invariants tested.
|
|
224
196
|
|
|
225
|
-
Release history: see [`CHANGELOG.md`](CHANGELOG.md).
|
|
197
|
+
Release history: see [`CHANGELOG.md`](https://github.com/dial481/allelix/blob/main/CHANGELOG.md).
|
|
226
198
|
|
|
227
199
|
## Development
|
|
228
200
|
|
|
@@ -214,6 +214,7 @@ def _run_analysis_command(
|
|
|
214
214
|
no_update: bool = False,
|
|
215
215
|
no_gnomad: bool = False,
|
|
216
216
|
no_alphamissense: bool = False,
|
|
217
|
+
no_cadd: bool = False,
|
|
217
218
|
) -> None:
|
|
218
219
|
resolved = resolve_data_dir(data_dir)
|
|
219
220
|
if not no_update:
|
|
@@ -256,12 +257,13 @@ def _run_analysis_command(
|
|
|
256
257
|
ready = [a for a in ready if a.name != "alphamissense"]
|
|
257
258
|
|
|
258
259
|
cadd_annotator = None
|
|
259
|
-
|
|
260
|
+
if not no_cadd:
|
|
261
|
+
from allelix.annotators.cadd import CaddAnnotator
|
|
260
262
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
263
|
+
for a in ready:
|
|
264
|
+
if isinstance(a, CaddAnnotator):
|
|
265
|
+
cadd_annotator = a
|
|
266
|
+
break
|
|
265
267
|
ready = [a for a in ready if a.name != "cadd"]
|
|
266
268
|
|
|
267
269
|
if not_ready:
|
|
@@ -576,6 +578,12 @@ _NO_ALPHAMISSENSE_OPT = click.option(
|
|
|
576
578
|
default=False,
|
|
577
579
|
help="Skip AlphaMissense variant pathogenicity enrichment.",
|
|
578
580
|
)
|
|
581
|
+
_NO_CADD_OPT = click.option(
|
|
582
|
+
"--no-cadd",
|
|
583
|
+
is_flag=True,
|
|
584
|
+
default=False,
|
|
585
|
+
help="Skip CADD deleteriousness score enrichment.",
|
|
586
|
+
)
|
|
579
587
|
_BUILD_OPT = click.option(
|
|
580
588
|
"--build",
|
|
581
589
|
type=click.Choice(["grch37", "grch38", "auto"], case_sensitive=False),
|
|
@@ -672,6 +680,7 @@ def _emit_build_diagnostics(result: object) -> None:
|
|
|
672
680
|
@_NO_UPDATE_OPT
|
|
673
681
|
@_NO_GNOMAD_OPT
|
|
674
682
|
@_NO_ALPHAMISSENSE_OPT
|
|
683
|
+
@_NO_CADD_OPT
|
|
675
684
|
def analyze(
|
|
676
685
|
file_path: Path,
|
|
677
686
|
fmt: str | None,
|
|
@@ -690,6 +699,7 @@ def analyze(
|
|
|
690
699
|
no_update: bool,
|
|
691
700
|
no_gnomad: bool,
|
|
692
701
|
no_alphamissense: bool,
|
|
702
|
+
no_cadd: bool,
|
|
693
703
|
) -> None:
|
|
694
704
|
"""Annotate a genotype file against all ready reference databases."""
|
|
695
705
|
_run_analysis_command(
|
|
@@ -711,6 +721,7 @@ def analyze(
|
|
|
711
721
|
no_update=no_update,
|
|
712
722
|
no_gnomad=no_gnomad,
|
|
713
723
|
no_alphamissense=no_alphamissense,
|
|
724
|
+
no_cadd=no_cadd,
|
|
714
725
|
)
|
|
715
726
|
|
|
716
727
|
|
|
@@ -868,6 +879,7 @@ def compare(file1: Path, file2: Path, fmt1: str | None, fmt2: str | None) -> Non
|
|
|
868
879
|
@_NO_UPDATE_OPT
|
|
869
880
|
@_NO_GNOMAD_OPT
|
|
870
881
|
@_NO_ALPHAMISSENSE_OPT
|
|
882
|
+
@_NO_CADD_OPT
|
|
871
883
|
def methylation(
|
|
872
884
|
file_path: Path,
|
|
873
885
|
fmt: str | None,
|
|
@@ -886,6 +898,7 @@ def methylation(
|
|
|
886
898
|
no_update: bool,
|
|
887
899
|
no_gnomad: bool,
|
|
888
900
|
no_alphamissense: bool,
|
|
901
|
+
no_cadd: bool,
|
|
889
902
|
) -> None:
|
|
890
903
|
"""Methylation-pathway-focused report (MTHFR, MTR, MTRR, COMT, CBS, …)."""
|
|
891
904
|
excluded: set[str] = set()
|
|
@@ -912,6 +925,7 @@ def methylation(
|
|
|
912
925
|
no_update=no_update,
|
|
913
926
|
no_gnomad=no_gnomad,
|
|
914
927
|
no_alphamissense=no_alphamissense,
|
|
928
|
+
no_cadd=no_cadd,
|
|
915
929
|
)
|
|
916
930
|
|
|
917
931
|
|
|
@@ -933,6 +947,7 @@ def methylation(
|
|
|
933
947
|
@_NO_UPDATE_OPT
|
|
934
948
|
@_NO_GNOMAD_OPT
|
|
935
949
|
@_NO_ALPHAMISSENSE_OPT
|
|
950
|
+
@_NO_CADD_OPT
|
|
936
951
|
def pharmacogenomics(
|
|
937
952
|
file_path: Path,
|
|
938
953
|
fmt: str | None,
|
|
@@ -951,6 +966,7 @@ def pharmacogenomics(
|
|
|
951
966
|
no_update: bool,
|
|
952
967
|
no_gnomad: bool,
|
|
953
968
|
no_alphamissense: bool,
|
|
969
|
+
no_cadd: bool,
|
|
954
970
|
) -> None:
|
|
955
971
|
"""Pharmacogenomics-focused report (annotations from PharmGKB-style sources)."""
|
|
956
972
|
excluded: set[str] = set()
|
|
@@ -977,6 +993,7 @@ def pharmacogenomics(
|
|
|
977
993
|
no_update=no_update,
|
|
978
994
|
no_gnomad=no_gnomad,
|
|
979
995
|
no_alphamissense=no_alphamissense,
|
|
996
|
+
no_cadd=no_cadd,
|
|
980
997
|
)
|
|
981
998
|
|
|
982
999
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: allelix
|
|
3
|
-
Version: 1.8.
|
|
3
|
+
Version: 1.8.4
|
|
4
4
|
Summary: Open-source genotype analysis toolkit. Format-agnostic ingestion, database-agnostic annotation, offline-first.
|
|
5
5
|
Author-email: dial481 <dial481@users.noreply.github.com>
|
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
|
@@ -44,55 +44,27 @@ Open-source command-line toolkit for analyzing raw genotype files from consumer
|
|
|
44
44
|
> HTML/JSON/terminal reports, methylation + pharmacogenomics focused
|
|
45
45
|
> commands, report diffing, persistent config with commercial-mode
|
|
46
46
|
> safety switch. Build auto-detection from position data (ADR-0021).
|
|
47
|
-
> No regex on prose anywhere in production. **Latest: v1.8.
|
|
48
|
-
>
|
|
49
|
-
>
|
|
50
|
-
> [`CHANGELOG.md`](CHANGELOG.md).
|
|
47
|
+
> No regex on prose anywhere in production. **Latest: v1.8.4** —
|
|
48
|
+
> `--no-cadd` flag for licensing exclusion parity.
|
|
49
|
+
> Release notes:
|
|
50
|
+
> [`CHANGELOG.md`](https://github.com/dial481/allelix/blob/main/CHANGELOG.md).
|
|
51
51
|
|
|
52
52
|
## Quickstart
|
|
53
53
|
|
|
54
|
-
Requires Python 3.11+.
|
|
55
|
-
|
|
56
54
|
```bash
|
|
57
|
-
|
|
58
|
-
cd allelix
|
|
59
|
-
python -m venv .venv
|
|
60
|
-
source .venv/bin/activate
|
|
61
|
-
pip install -e ".[dev]"
|
|
62
|
-
|
|
63
|
-
# Generate a synthetic test fixture
|
|
64
|
-
python tests/generate_mock_data.py
|
|
65
|
-
|
|
66
|
-
# Show summary statistics for a genotype file
|
|
67
|
-
allelix stats tests/fixtures/mock_myhappygenes.txt
|
|
55
|
+
pip install allelix
|
|
68
56
|
|
|
69
|
-
# Download reference databases
|
|
70
|
-
#
|
|
71
|
-
# to skip the large enrichment databases. Re-runs skip unchanged sources.
|
|
57
|
+
# Download reference databases (~15GB with all sources).
|
|
58
|
+
# Use --no-gnomad / --no-alphamissense to skip the large ones.
|
|
72
59
|
# CADD is opt-in: allelix db update --cadd
|
|
73
60
|
allelix db update
|
|
74
|
-
allelix db status # see what's cached
|
|
75
61
|
|
|
76
|
-
# Analyze a genotype file
|
|
77
|
-
allelix analyze
|
|
78
|
-
|
|
79
|
-
# Same data, focused subsets
|
|
80
|
-
allelix methylation tests/fixtures/mock_myhappygenes.txt
|
|
81
|
-
allelix pharmacogenomics tests/fixtures/mock_myhappygenes.txt
|
|
82
|
-
|
|
83
|
-
# Compare two genotype files (coverage, concordance, strand-flip detection)
|
|
84
|
-
allelix compare file1.txt file2.txt
|
|
85
|
-
|
|
86
|
-
# Export to PLINK1 binary format (.bed/.bim/.fam) for plink2, ADMIXTURE, PRSice
|
|
87
|
-
# Expect ~60% monomorphic markers (A2=0) — genotyping chips probe many
|
|
88
|
-
# intronic/intergenic sites outside gnomAD's exome coverage.
|
|
89
|
-
allelix export plink genotype_file.txt -o output_prefix --build grch37
|
|
90
|
-
|
|
91
|
-
# Output to a self-contained HTML or JSON report
|
|
92
|
-
allelix analyze tests/fixtures/mock_myhappygenes.txt --output report.html
|
|
93
|
-
allelix analyze tests/fixtures/mock_myhappygenes.txt --output report.json
|
|
62
|
+
# Analyze a genotype file
|
|
63
|
+
allelix analyze your_genotype_file.txt --output report.html
|
|
94
64
|
```
|
|
95
65
|
|
|
66
|
+
Requires Python 3.11+. See [Development](#development) for source installs and running tests.
|
|
67
|
+
|
|
96
68
|
## Supported Formats
|
|
97
69
|
|
|
98
70
|
| Format | Status | Notes |
|
|
@@ -126,7 +98,7 @@ Adding a new format means adding one file to `allelix/parsers/` and registering
|
|
|
126
98
|
| GWAS Catalog | ✓ | Public domain (EBI/NHGRI). Trait–SNP associations with p-values and effect sizes. Carrier rule (ADR-0007) requires the user to carry the risk allele. P-value magnitude scoring (ADR-0024) maps continuous p-values to the 0–10 scale; unknown-risk-allele entries fire on rsID match alone but are capped at 3.0. |
|
|
127
99
|
| gnomAD | ✓ | ODbL v1.0. **Enrichment annotator** — adds population allele frequency context to existing annotations. Shows how common each variant is in the general population (~16M exome variants from 730K individuals). A pathogenic variant that 35% of people carry reads very differently from one seen in 0.001%. Pre-built cache downloaded via `db update` (~6GB on disk). Use `--no-gnomad` to skip. |
|
|
128
100
|
| AlphaMissense | ✓ | CC BY 4.0. **Enrichment annotator** — adds DeepMind's protein-structure-based pathogenicity predictions to existing annotations. Scores 71M missense variants on a 0–1 scale: <0.34 = likely benign, >0.564 = likely pathogenic. Complements ClinVar's expert classifications with computational predictions — especially valuable for variants ClinVar hasn't reviewed yet. Pre-built cache downloaded via `db update` (~8GB on disk). Use `--no-alphamissense` to skip. |
|
|
129
|
-
| CADD | ✓ | LicenseRef-CADD (non-commercial). **Enrichment annotator** — adds PHRED-scaled deleteriousness scores from CADD v1.7. Ranks how deleterious any single-nucleotide variant is using 100+ annotation tracks (coding, non-coding, regulatory). PHRED 10 = top 10% most deleterious, 20 = top 1%, 30 = top 0.1%. **Opt-in** — disabled by default (`sources.cadd = false`). Enable via `allelix db update --cadd` or `allelix config set sources.cadd true`. Pre-built cache (~5 GB on disk, ~120M variant keys). Full mode available via pysam for GRCh38 data (`options.cadd_full = true`). Cache mode covers the large majority of variants present in gnomAD, AlphaMissense, and ClinVar — nearly every position allelix can annotate from its other databases. For genotyping chip data (23andMe, AncestryDNA, MyHappyGenes, etc.), cache and full mode produce effectively identical results because chip probes overwhelmingly target known, cataloged variants. Full mode adds coverage for novel or private variants that appear only in whole-genome or whole-exome sequencing data and are not in any pre-computed database. If your input is a genotyping chip file, cache mode is all you need. |
|
|
101
|
+
| CADD | ✓ | LicenseRef-CADD (non-commercial). **Enrichment annotator** — adds PHRED-scaled deleteriousness scores from CADD v1.7. Ranks how deleterious any single-nucleotide variant is using 100+ annotation tracks (coding, non-coding, regulatory). PHRED 10 = top 10% most deleterious, 20 = top 1%, 30 = top 0.1%. **Opt-in** — disabled by default (`sources.cadd = false`). Enable via `allelix db update --cadd` or `allelix config set sources.cadd true`. Use `--no-cadd` to skip enrichment for a single run. Pre-built cache (~5 GB on disk, ~120M variant keys). Full mode available via pysam for GRCh38 data (`options.cadd_full = true`). Cache mode covers the large majority of variants present in gnomAD, AlphaMissense, and ClinVar — nearly every position allelix can annotate from its other databases. For genotyping chip data (23andMe, AncestryDNA, MyHappyGenes, etc.), cache and full mode produce effectively identical results because chip probes overwhelmingly target known, cataloged variants. Full mode adds coverage for novel or private variants that appear only in whole-genome or whole-exome sequencing data and are not in any pre-computed database. If your input is a genotyping chip file, cache mode is all you need. |
|
|
130
102
|
|
|
131
103
|
### Known PharmGKB limitation: reference-genotype rows where ClinVar and CPIC both lack data
|
|
132
104
|
|
|
@@ -183,7 +155,7 @@ allelix config set license.commercial true
|
|
|
183
155
|
allelix config set license.cadd true
|
|
184
156
|
```
|
|
185
157
|
|
|
186
|
-
CLI flags (`--no-gnomad`, `--no-alphamissense`, `--exclude-snpedia`, `--cadd`) override the config for a single run. The config sets the baseline; flags override per-invocation.
|
|
158
|
+
CLI flags (`--no-gnomad`, `--no-alphamissense`, `--no-cadd`, `--exclude-snpedia`, `--cadd`) override the config for a single run. The config sets the baseline; flags override per-invocation.
|
|
187
159
|
|
|
188
160
|
### Database sizes and download times
|
|
189
161
|
|
|
@@ -215,7 +187,7 @@ Allelix source code is licensed under the **GNU Affero General Public License v3
|
|
|
215
187
|
| SNPedia | snpedia.com | CC BY-NC-SA 3.0 US | Attribution required, **non-commercial only**. Use `--exclude-snpedia` to omit. |
|
|
216
188
|
| gnomAD | gnomad.broadinstitute.org | ODbL v1.0 | Attribution required. Population allele frequencies for context; not a clinical annotator. Use `--no-gnomad` to omit. |
|
|
217
189
|
| AlphaMissense | zenodo.org/records/10813168 | CC BY 4.0 | Attribution required. Cheng et al., Science 2023. Missense variant pathogenicity predictions. Use `--no-alphamissense` to omit. |
|
|
218
|
-
| CADD | cadd.gs.washington.edu | LicenseRef-CADD | Attribution required, **non-commercial by default**. Commercial licenses available from UW CoMotion. Opt-in via `allelix db update --cadd`. |
|
|
190
|
+
| CADD | cadd.gs.washington.edu | LicenseRef-CADD | Attribution required, **non-commercial by default**. Commercial licenses available from UW CoMotion. Opt-in via `allelix db update --cadd`. Use `--no-cadd` to omit. |
|
|
219
191
|
|
|
220
192
|
**Commercial users:** When `license.commercial = true`, non-commercial sources are gated by a three-state permission model. SNPedia is permanently blocked (no commercial license is available). CADD is blocked by default but can be unlocked — the University of Washington offers commercial licenses at `https://els2.comotion.uw.edu/product/cadd-scores`; after purchasing, assert your license with `allelix config set license.cadd true` to re-enable CADD in commercial mode. All other databases (ClinVar, PharmGKB, GWAS Catalog, gnomAD, AlphaMissense) are compatible with commercial use. `allelix config show` displays the permission state for each source.
|
|
221
193
|
|
|
@@ -246,7 +218,7 @@ None of these are scraping errors. They are editorial inconsistencies on the sou
|
|
|
246
218
|
|
|
247
219
|
## Architecture & Design Decisions
|
|
248
220
|
|
|
249
|
-
The "why" behind major design choices lives in [`docs/adr/`](docs/adr/README.md) as Architecture Decision Records. Read these before proposing changes that touch the parser/annotator interfaces, the regulatory posture, or the data-handling model.
|
|
221
|
+
The "why" behind major design choices lives in [`docs/adr/`](https://github.com/dial481/allelix/blob/main/docs/adr/README.md) as Architecture Decision Records. Read these before proposing changes that touch the parser/annotator interfaces, the regulatory posture, or the data-handling model.
|
|
250
222
|
|
|
251
223
|
Notable load-bearing ADRs:
|
|
252
224
|
|
|
@@ -256,7 +228,7 @@ Notable load-bearing ADRs:
|
|
|
256
228
|
- **ADR-0009 — PharmGKB matches the user's exact normalized diploid call.**
|
|
257
229
|
- **ADR-0015 — Mock data generators are the contract.** Fixture shape must mirror real data shape; invariants tested.
|
|
258
230
|
|
|
259
|
-
Release history: see [`CHANGELOG.md`](CHANGELOG.md).
|
|
231
|
+
Release history: see [`CHANGELOG.md`](https://github.com/dial481/allelix/blob/main/CHANGELOG.md).
|
|
260
232
|
|
|
261
233
|
## Development
|
|
262
234
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "allelix"
|
|
7
|
-
version = "1.8.
|
|
7
|
+
version = "1.8.4"
|
|
8
8
|
description = "Open-source genotype analysis toolkit. Format-agnostic ingestion, database-agnostic annotation, offline-first."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -1527,6 +1527,58 @@ class TestExcludeSnpedia:
|
|
|
1527
1527
|
assert captured["exclude_sources"] == frozenset({"snpedia", "gwas"})
|
|
1528
1528
|
|
|
1529
1529
|
|
|
1530
|
+
class TestNoCaddFlag:
|
|
1531
|
+
"""--no-cadd wires through to no_cadd on all three analysis commands."""
|
|
1532
|
+
|
|
1533
|
+
def test_analyze_passes_no_cadd(self, mock_mhg_path, monkeypatch):
|
|
1534
|
+
captured: dict = {}
|
|
1535
|
+
|
|
1536
|
+
def fake_run(**kwargs):
|
|
1537
|
+
captured.update(kwargs)
|
|
1538
|
+
|
|
1539
|
+
monkeypatch.setattr("allelix.cli._run_analysis_command", fake_run)
|
|
1540
|
+
runner = CliRunner()
|
|
1541
|
+
result = runner.invoke(main, ["analyze", str(mock_mhg_path), "--no-cadd"])
|
|
1542
|
+
assert result.exit_code == 0, result.output
|
|
1543
|
+
assert captured["no_cadd"] is True
|
|
1544
|
+
|
|
1545
|
+
def test_analyze_default_no_cadd_false(self, mock_mhg_path, monkeypatch):
|
|
1546
|
+
captured: dict = {}
|
|
1547
|
+
|
|
1548
|
+
def fake_run(**kwargs):
|
|
1549
|
+
captured.update(kwargs)
|
|
1550
|
+
|
|
1551
|
+
monkeypatch.setattr("allelix.cli._run_analysis_command", fake_run)
|
|
1552
|
+
runner = CliRunner()
|
|
1553
|
+
result = runner.invoke(main, ["analyze", str(mock_mhg_path)])
|
|
1554
|
+
assert result.exit_code == 0, result.output
|
|
1555
|
+
assert captured["no_cadd"] is False
|
|
1556
|
+
|
|
1557
|
+
def test_methylation_passes_no_cadd(self, mock_mhg_path, monkeypatch):
|
|
1558
|
+
captured: dict = {}
|
|
1559
|
+
|
|
1560
|
+
def fake_run(**kwargs):
|
|
1561
|
+
captured.update(kwargs)
|
|
1562
|
+
|
|
1563
|
+
monkeypatch.setattr("allelix.cli._run_analysis_command", fake_run)
|
|
1564
|
+
runner = CliRunner()
|
|
1565
|
+
result = runner.invoke(main, ["methylation", str(mock_mhg_path), "--no-cadd"])
|
|
1566
|
+
assert result.exit_code == 0, result.output
|
|
1567
|
+
assert captured["no_cadd"] is True
|
|
1568
|
+
|
|
1569
|
+
def test_pharmacogenomics_passes_no_cadd(self, mock_mhg_path, monkeypatch):
|
|
1570
|
+
captured: dict = {}
|
|
1571
|
+
|
|
1572
|
+
def fake_run(**kwargs):
|
|
1573
|
+
captured.update(kwargs)
|
|
1574
|
+
|
|
1575
|
+
monkeypatch.setattr("allelix.cli._run_analysis_command", fake_run)
|
|
1576
|
+
runner = CliRunner()
|
|
1577
|
+
result = runner.invoke(main, ["pharmacogenomics", str(mock_mhg_path), "--no-cadd"])
|
|
1578
|
+
assert result.exit_code == 0, result.output
|
|
1579
|
+
assert captured["no_cadd"] is True
|
|
1580
|
+
|
|
1581
|
+
|
|
1530
1582
|
class TestHighValueNoCalls:
|
|
1531
1583
|
def test_stats_flags_dpyd_no_call(self, mock_mhg_path):
|
|
1532
1584
|
"""The MHG fixture has rs3918290 (DPYD) as a no-call; stats should flag it."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|