allelix 1.8.4__tar.gz → 1.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {allelix-1.8.4 → allelix-1.9.0}/PKG-INFO +6 -3
- {allelix-1.8.4 → allelix-1.9.0}/README.md +5 -2
- {allelix-1.8.4 → allelix-1.9.0}/allelix/cli.py +56 -1
- {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/_pipeline.py +13 -3
- {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/html.py +2 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/json_report.py +4 -1
- {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/terminal.py +2 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix.egg-info/PKG-INFO +6 -3
- {allelix-1.8.4 → allelix-1.9.0}/pyproject.toml +1 -1
- {allelix-1.8.4 → allelix-1.9.0}/tests/test_cli.py +195 -0
- {allelix-1.8.4 → allelix-1.9.0}/LICENSE +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/__init__.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/__init__.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/alphamissense.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/base.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/cadd.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/clinvar.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/gnomad.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/gwas.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/pharmgkb.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/snpedia.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/compare.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/config.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/data/__init__.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/data/high_value_snps.yaml +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/__init__.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/_versions.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/alphamissense_loader.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/cadd_loader.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/cpic_loader.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/gnomad_loader.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/gwas_loader.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/loader_utils.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/manager.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/pharmgkb_loader.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/schema.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/snpedia_loader.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/snpedia_parser.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/exporters/__init__.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/exporters/plink.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/models.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/__init__.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/_helpers.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/ancestrydna.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/base.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/ftdna.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/livingdna.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/myhappygenes.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/myheritage.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/twentythreeandme.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/py.typed +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/__init__.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/diff.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/high_value.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/methylation.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/utils/__init__.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/utils/allele.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix/utils/build_detect.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix.egg-info/SOURCES.txt +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix.egg-info/dependency_links.txt +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix.egg-info/entry_points.txt +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix.egg-info/requires.txt +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/allelix.egg-info/top_level.txt +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/setup.cfg +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/tests/test_cli_helpers.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/tests/test_compare.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/tests/test_config.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/tests/test_end_to_end.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/tests/test_mock_data_invariants.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/tests/test_models.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/tests/test_registry.py +0 -0
- {allelix-1.8.4 → allelix-1.9.0}/tests/test_version.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: allelix
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.9.0
|
|
4
4
|
Summary: Open-source genotype analysis toolkit. Format-agnostic ingestion, database-agnostic annotation, offline-first.
|
|
5
5
|
Author-email: dial481 <dial481@users.noreply.github.com>
|
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
|
@@ -44,8 +44,8 @@ Open-source command-line toolkit for analyzing raw genotype files from consumer
|
|
|
44
44
|
> HTML/JSON/terminal reports, methylation + pharmacogenomics focused
|
|
45
45
|
> commands, report diffing, persistent config with commercial-mode
|
|
46
46
|
> safety switch. Build auto-detection from position data (ADR-0021).
|
|
47
|
-
> No regex on prose anywhere in production. **Latest: v1.
|
|
48
|
-
> `--
|
|
47
|
+
> No regex on prose anywhere in production. **Latest: v1.9.0** —
|
|
48
|
+
> `--filter-file` flag for custom-panel filtering on `analyze`.
|
|
49
49
|
> Release notes:
|
|
50
50
|
> [`CHANGELOG.md`](https://github.com/dial481/allelix/blob/main/CHANGELOG.md).
|
|
51
51
|
|
|
@@ -61,6 +61,9 @@ allelix db update
|
|
|
61
61
|
|
|
62
62
|
# Analyze a genotype file
|
|
63
63
|
allelix analyze your_genotype_file.txt --output report.html
|
|
64
|
+
|
|
65
|
+
# Filter to a custom panel (rsIDs + gene names, one per line; '#' comments and blank lines ignored)
|
|
66
|
+
allelix analyze your_genotype_file.txt --filter-file my_panel.txt --output report.html
|
|
64
67
|
```
|
|
65
68
|
|
|
66
69
|
Requires Python 3.11+. See [Development](#development) for source installs and running tests.
|
|
@@ -10,8 +10,8 @@ Open-source command-line toolkit for analyzing raw genotype files from consumer
|
|
|
10
10
|
> HTML/JSON/terminal reports, methylation + pharmacogenomics focused
|
|
11
11
|
> commands, report diffing, persistent config with commercial-mode
|
|
12
12
|
> safety switch. Build auto-detection from position data (ADR-0021).
|
|
13
|
-
> No regex on prose anywhere in production. **Latest: v1.
|
|
14
|
-
> `--
|
|
13
|
+
> No regex on prose anywhere in production. **Latest: v1.9.0** —
|
|
14
|
+
> `--filter-file` flag for custom-panel filtering on `analyze`.
|
|
15
15
|
> Release notes:
|
|
16
16
|
> [`CHANGELOG.md`](https://github.com/dial481/allelix/blob/main/CHANGELOG.md).
|
|
17
17
|
|
|
@@ -27,6 +27,9 @@ allelix db update
|
|
|
27
27
|
|
|
28
28
|
# Analyze a genotype file
|
|
29
29
|
allelix analyze your_genotype_file.txt --output report.html
|
|
30
|
+
|
|
31
|
+
# Filter to a custom panel (rsIDs + gene names, one per line; '#' comments and blank lines ignored)
|
|
32
|
+
allelix analyze your_genotype_file.txt --filter-file my_panel.txt --output report.html
|
|
30
33
|
```
|
|
31
34
|
|
|
32
35
|
Requires Python 3.11+. See [Development](#development) for source installs and running tests.
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
7
|
import logging
|
|
8
|
+
import re
|
|
8
9
|
import sys
|
|
9
10
|
import time
|
|
10
11
|
from pathlib import Path
|
|
@@ -195,6 +196,35 @@ def _format_from_path(output: Path, override: str | None) -> str:
|
|
|
195
196
|
)
|
|
196
197
|
|
|
197
198
|
|
|
199
|
+
_RSID_PATTERN = re.compile(r"^rs\d+$", re.IGNORECASE)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _parse_filter_file(path: Path) -> tuple[frozenset[str], frozenset[str]]:
|
|
203
|
+
r"""Parse a filter file into ``(gene_names, rsids)``.
|
|
204
|
+
|
|
205
|
+
Lines matching ``^rs\d+$`` (case-insensitive) are rsIDs. Everything
|
|
206
|
+
else is a gene name. Lines starting with ``#`` and blank lines are
|
|
207
|
+
ignored. Gene names starting with ``RS`` (e.g., RSPO1, RSF1) are
|
|
208
|
+
correctly classified as gene names, not rsIDs.
|
|
209
|
+
|
|
210
|
+
Input is case-tolerant; output is canonical: rsIDs are normalized to
|
|
211
|
+
lowercase (``rs1801133``), gene names to uppercase (``MTHFR``). The
|
|
212
|
+
filter recorded in JSON output therefore looks identical regardless
|
|
213
|
+
of how the user typed the entries in the filter file.
|
|
214
|
+
"""
|
|
215
|
+
genes: set[str] = set()
|
|
216
|
+
rsids: set[str] = set()
|
|
217
|
+
for raw in path.read_text().splitlines():
|
|
218
|
+
line = raw.strip()
|
|
219
|
+
if not line or line.startswith("#"):
|
|
220
|
+
continue
|
|
221
|
+
if _RSID_PATTERN.match(line):
|
|
222
|
+
rsids.add(line.lower())
|
|
223
|
+
else:
|
|
224
|
+
genes.add(line.upper())
|
|
225
|
+
return frozenset(genes), frozenset(rsids)
|
|
226
|
+
|
|
227
|
+
|
|
198
228
|
def _run_analysis_command(
|
|
199
229
|
file_path: Path,
|
|
200
230
|
fmt: str | None,
|
|
@@ -204,6 +234,7 @@ def _run_analysis_command(
|
|
|
204
234
|
min_magnitude: float,
|
|
205
235
|
category: str | None,
|
|
206
236
|
genes: frozenset[str] | None,
|
|
237
|
+
rsids: frozenset[str] | None = None,
|
|
207
238
|
build: str | None = None,
|
|
208
239
|
include_benign: bool = False,
|
|
209
240
|
gwas_min_magnitude: float | None = None,
|
|
@@ -335,6 +366,7 @@ def _run_analysis_command(
|
|
|
335
366
|
min_magnitude=min_magnitude,
|
|
336
367
|
category=category,
|
|
337
368
|
genes=genes,
|
|
369
|
+
rsids=rsids,
|
|
338
370
|
source_min_magnitudes=source_floors,
|
|
339
371
|
)
|
|
340
372
|
from allelix.reports._pipeline import rollup_gwas_duplicates
|
|
@@ -356,6 +388,7 @@ def _run_analysis_command(
|
|
|
356
388
|
min_magnitude=min_magnitude,
|
|
357
389
|
category=category,
|
|
358
390
|
genes=genes,
|
|
391
|
+
rsids=rsids,
|
|
359
392
|
source_min_magnitudes=source_floors,
|
|
360
393
|
)
|
|
361
394
|
else:
|
|
@@ -373,6 +406,7 @@ def _run_analysis_command(
|
|
|
373
406
|
min_magnitude=min_magnitude,
|
|
374
407
|
category=category,
|
|
375
408
|
genes=genes,
|
|
409
|
+
rsids=rsids,
|
|
376
410
|
source_min_magnitudes=source_floors,
|
|
377
411
|
diff=diff_result,
|
|
378
412
|
high_value_no_calls=hv_dicts,
|
|
@@ -384,6 +418,7 @@ def _run_analysis_command(
|
|
|
384
418
|
min_magnitude=min_magnitude,
|
|
385
419
|
category=category,
|
|
386
420
|
genes=genes,
|
|
421
|
+
rsids=rsids,
|
|
387
422
|
source_min_magnitudes=source_floors,
|
|
388
423
|
diff=diff_result,
|
|
389
424
|
high_value_no_calls=hv_warning_lines,
|
|
@@ -560,6 +595,16 @@ _DIFF_OPT = click.option(
|
|
|
560
595
|
"Not a monitoring tool — use for version-to-version validation."
|
|
561
596
|
),
|
|
562
597
|
)
|
|
598
|
+
_FILTER_FILE_OPT = click.option(
|
|
599
|
+
"--filter-file",
|
|
600
|
+
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
|
601
|
+
default=None,
|
|
602
|
+
help=(
|
|
603
|
+
"Plain text file with rsIDs and/or gene names (one per line) to "
|
|
604
|
+
"filter the report. Lines matching '^rs\\d+$' are rsIDs; everything "
|
|
605
|
+
"else is a gene name. Comments (#) and blank lines are ignored."
|
|
606
|
+
),
|
|
607
|
+
)
|
|
563
608
|
_NO_UPDATE_OPT = click.option(
|
|
564
609
|
"--no-update",
|
|
565
610
|
is_flag=True,
|
|
@@ -677,6 +722,7 @@ def _emit_build_diagnostics(result: object) -> None:
|
|
|
677
722
|
@_GWAS_ALL_OPT
|
|
678
723
|
@_EXCLUDE_SNPEDIA_OPT
|
|
679
724
|
@_DIFF_OPT
|
|
725
|
+
@_FILTER_FILE_OPT
|
|
680
726
|
@_NO_UPDATE_OPT
|
|
681
727
|
@_NO_GNOMAD_OPT
|
|
682
728
|
@_NO_ALPHAMISSENSE_OPT
|
|
@@ -696,12 +742,20 @@ def analyze(
|
|
|
696
742
|
gwas_all: bool,
|
|
697
743
|
exclude_snpedia: bool,
|
|
698
744
|
diff_path: Path | None,
|
|
745
|
+
filter_file: Path | None,
|
|
699
746
|
no_update: bool,
|
|
700
747
|
no_gnomad: bool,
|
|
701
748
|
no_alphamissense: bool,
|
|
702
749
|
no_cadd: bool,
|
|
703
750
|
) -> None:
|
|
704
751
|
"""Annotate a genotype file against all ready reference databases."""
|
|
752
|
+
filter_genes: frozenset[str] | None = None
|
|
753
|
+
filter_rsids: frozenset[str] | None = None
|
|
754
|
+
if filter_file is not None:
|
|
755
|
+
filter_genes, filter_rsids = _parse_filter_file(filter_file)
|
|
756
|
+
# Empty sets (file had only comments/blanks) still apply — they
|
|
757
|
+
# mean "match nothing", producing an empty report.
|
|
758
|
+
|
|
705
759
|
_run_analysis_command(
|
|
706
760
|
file_path=file_path,
|
|
707
761
|
fmt=fmt,
|
|
@@ -710,7 +764,8 @@ def analyze(
|
|
|
710
764
|
report_format=report_format,
|
|
711
765
|
min_magnitude=min_magnitude,
|
|
712
766
|
category=category,
|
|
713
|
-
genes=
|
|
767
|
+
genes=filter_genes,
|
|
768
|
+
rsids=filter_rsids,
|
|
714
769
|
build=_normalize_cli_build(build),
|
|
715
770
|
include_benign=include_benign,
|
|
716
771
|
gwas_min_magnitude=gwas_min_magnitude,
|
|
@@ -105,6 +105,7 @@ class AnalysisResult:
|
|
|
105
105
|
min_magnitude: float = 0.0,
|
|
106
106
|
category: str | None = None,
|
|
107
107
|
genes: Iterable[str] | None = None,
|
|
108
|
+
rsids: Iterable[str] | None = None,
|
|
108
109
|
source_min_magnitudes: dict[str, float] | None = None,
|
|
109
110
|
) -> list[Annotation]:
|
|
110
111
|
"""Apply the standard filters and return a sorted list of annotations.
|
|
@@ -117,8 +118,14 @@ class AnalysisResult:
|
|
|
117
118
|
entry, that value IS the floor for that source — it can raise OR
|
|
118
119
|
lower the global ``min_magnitude``. Sources without an entry use
|
|
119
120
|
the global floor.
|
|
121
|
+
|
|
122
|
+
`genes` and `rsids` combine with OR: when either is provided, an
|
|
123
|
+
annotation passes if it matches the gene set OR the rsid set.
|
|
124
|
+
Empty collections (vs None) mean "match nothing" — an empty
|
|
125
|
+
filter file produces an empty report.
|
|
120
126
|
"""
|
|
121
|
-
gene_set = {g.upper() for g in genes} if genes else None
|
|
127
|
+
gene_set = {g.upper() for g in genes} if genes is not None else None
|
|
128
|
+
rsid_set = {r.lower() for r in rsids} if rsids is not None else None
|
|
122
129
|
out: list[Annotation] = []
|
|
123
130
|
for a in self.annotations:
|
|
124
131
|
if (
|
|
@@ -133,8 +140,11 @@ class AnalysisResult:
|
|
|
133
140
|
continue
|
|
134
141
|
if category is not None and a.category != category:
|
|
135
142
|
continue
|
|
136
|
-
if gene_set is not None
|
|
137
|
-
|
|
143
|
+
if gene_set is not None or rsid_set is not None:
|
|
144
|
+
gene_match = gene_set is not None and (a.gene or "").upper() in gene_set
|
|
145
|
+
rsid_match = rsid_set is not None and a.rsid.lower() in rsid_set
|
|
146
|
+
if not gene_match and not rsid_match:
|
|
147
|
+
continue
|
|
138
148
|
out.append(a)
|
|
139
149
|
out.sort(key=lambda a: (-a.magnitude, a.rsid))
|
|
140
150
|
return out
|
|
@@ -951,6 +951,7 @@ def render_html(
|
|
|
951
951
|
min_magnitude: float = 0.0,
|
|
952
952
|
category: str | None = None,
|
|
953
953
|
genes: Iterable[str] | None = None,
|
|
954
|
+
rsids: Iterable[str] | None = None,
|
|
954
955
|
source_min_magnitudes: dict[str, float] | None = None,
|
|
955
956
|
title: str = "Allelix Genotype Report",
|
|
956
957
|
diff: DiffResult | None = None,
|
|
@@ -961,6 +962,7 @@ def render_html(
|
|
|
961
962
|
min_magnitude=min_magnitude,
|
|
962
963
|
category=category,
|
|
963
964
|
genes=genes,
|
|
965
|
+
rsids=rsids,
|
|
964
966
|
source_min_magnitudes=source_min_magnitudes,
|
|
965
967
|
)
|
|
966
968
|
filtered = rollup_gwas_duplicates(filtered)
|
|
@@ -103,6 +103,7 @@ def render_json(
|
|
|
103
103
|
min_magnitude: float = 0.0,
|
|
104
104
|
category: str | None = None,
|
|
105
105
|
genes: Iterable[str] | None = None,
|
|
106
|
+
rsids: Iterable[str] | None = None,
|
|
106
107
|
source_min_magnitudes: dict[str, float] | None = None,
|
|
107
108
|
diff: DiffResult | None = None,
|
|
108
109
|
high_value_no_calls: list[dict[str, str]] | None = None,
|
|
@@ -112,6 +113,7 @@ def render_json(
|
|
|
112
113
|
min_magnitude=min_magnitude,
|
|
113
114
|
category=category,
|
|
114
115
|
genes=genes,
|
|
116
|
+
rsids=rsids,
|
|
115
117
|
source_min_magnitudes=source_min_magnitudes,
|
|
116
118
|
)
|
|
117
119
|
filtered = rollup_gwas_duplicates(filtered)
|
|
@@ -134,7 +136,8 @@ def render_json(
|
|
|
134
136
|
"filters": {
|
|
135
137
|
"min_magnitude": min_magnitude,
|
|
136
138
|
"category": category,
|
|
137
|
-
"genes": sorted(genes) if genes else None,
|
|
139
|
+
"genes": sorted(genes) if genes is not None else None,
|
|
140
|
+
"rsids": sorted(rsids) if rsids is not None else None,
|
|
138
141
|
},
|
|
139
142
|
"annotations": [_annotation_dict(a) for a in filtered],
|
|
140
143
|
}
|
|
@@ -27,6 +27,7 @@ def render_terminal(
|
|
|
27
27
|
min_magnitude: float = 0.0,
|
|
28
28
|
category: str | None = None,
|
|
29
29
|
genes: Iterable[str] | None = None,
|
|
30
|
+
rsids: Iterable[str] | None = None,
|
|
30
31
|
source_min_magnitudes: dict[str, float] | None = None,
|
|
31
32
|
) -> int:
|
|
32
33
|
"""Render an AnalysisResult as a Rich table. Returns annotation count.
|
|
@@ -38,6 +39,7 @@ def render_terminal(
|
|
|
38
39
|
min_magnitude=min_magnitude,
|
|
39
40
|
category=category,
|
|
40
41
|
genes=genes,
|
|
42
|
+
rsids=rsids,
|
|
41
43
|
source_min_magnitudes=source_min_magnitudes,
|
|
42
44
|
)
|
|
43
45
|
filtered = rollup_gwas_duplicates(filtered)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: allelix
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.9.0
|
|
4
4
|
Summary: Open-source genotype analysis toolkit. Format-agnostic ingestion, database-agnostic annotation, offline-first.
|
|
5
5
|
Author-email: dial481 <dial481@users.noreply.github.com>
|
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
|
@@ -44,8 +44,8 @@ Open-source command-line toolkit for analyzing raw genotype files from consumer
|
|
|
44
44
|
> HTML/JSON/terminal reports, methylation + pharmacogenomics focused
|
|
45
45
|
> commands, report diffing, persistent config with commercial-mode
|
|
46
46
|
> safety switch. Build auto-detection from position data (ADR-0021).
|
|
47
|
-
> No regex on prose anywhere in production. **Latest: v1.
|
|
48
|
-
> `--
|
|
47
|
+
> No regex on prose anywhere in production. **Latest: v1.9.0** —
|
|
48
|
+
> `--filter-file` flag for custom-panel filtering on `analyze`.
|
|
49
49
|
> Release notes:
|
|
50
50
|
> [`CHANGELOG.md`](https://github.com/dial481/allelix/blob/main/CHANGELOG.md).
|
|
51
51
|
|
|
@@ -61,6 +61,9 @@ allelix db update
|
|
|
61
61
|
|
|
62
62
|
# Analyze a genotype file
|
|
63
63
|
allelix analyze your_genotype_file.txt --output report.html
|
|
64
|
+
|
|
65
|
+
# Filter to a custom panel (rsIDs + gene names, one per line; '#' comments and blank lines ignored)
|
|
66
|
+
allelix analyze your_genotype_file.txt --filter-file my_panel.txt --output report.html
|
|
64
67
|
```
|
|
65
68
|
|
|
66
69
|
Requires Python 3.11+. See [Development](#development) for source installs and running tests.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "allelix"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.9.0"
|
|
8
8
|
description = "Open-source genotype analysis toolkit. Format-agnostic ingestion, database-agnostic annotation, offline-first."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -1579,6 +1579,201 @@ class TestNoCaddFlag:
|
|
|
1579
1579
|
assert captured["no_cadd"] is True
|
|
1580
1580
|
|
|
1581
1581
|
|
|
1582
|
+
class TestParseFilterFile:
|
|
1583
|
+
"""Unit tests for _parse_filter_file (parser classification)."""
|
|
1584
|
+
|
|
1585
|
+
def test_rsid_lowercase(self, tmp_path):
|
|
1586
|
+
from allelix.cli import _parse_filter_file
|
|
1587
|
+
|
|
1588
|
+
f = tmp_path / "filter.txt"
|
|
1589
|
+
f.write_text("rs1801133\n")
|
|
1590
|
+
genes, rsids = _parse_filter_file(f)
|
|
1591
|
+
assert genes == frozenset()
|
|
1592
|
+
assert rsids == frozenset({"rs1801133"})
|
|
1593
|
+
|
|
1594
|
+
def test_rsid_uppercase_normalized_to_lowercase(self, tmp_path):
|
|
1595
|
+
"""Input case-tolerant, output canonical: RS1801133 → rs1801133."""
|
|
1596
|
+
from allelix.cli import _parse_filter_file
|
|
1597
|
+
|
|
1598
|
+
f = tmp_path / "filter.txt"
|
|
1599
|
+
f.write_text("RS1801133\n")
|
|
1600
|
+
genes, rsids = _parse_filter_file(f)
|
|
1601
|
+
assert genes == frozenset()
|
|
1602
|
+
assert rsids == frozenset({"rs1801133"})
|
|
1603
|
+
|
|
1604
|
+
def test_gene_lowercase_normalized_to_uppercase(self, tmp_path):
|
|
1605
|
+
"""Input case-tolerant, output canonical: mthfr → MTHFR."""
|
|
1606
|
+
from allelix.cli import _parse_filter_file
|
|
1607
|
+
|
|
1608
|
+
f = tmp_path / "filter.txt"
|
|
1609
|
+
f.write_text("mthfr\n")
|
|
1610
|
+
genes, rsids = _parse_filter_file(f)
|
|
1611
|
+
assert genes == frozenset({"MTHFR"})
|
|
1612
|
+
assert rsids == frozenset()
|
|
1613
|
+
|
|
1614
|
+
def test_mixed_messy_case_normalized(self, tmp_path):
|
|
1615
|
+
"""End-to-end case-mixing across rsIDs and genes."""
|
|
1616
|
+
from allelix.cli import _parse_filter_file
|
|
1617
|
+
|
|
1618
|
+
f = tmp_path / "filter.txt"
|
|
1619
|
+
f.write_text("Rs1801133\ncomt\nRSPO1\nRS4680\nmThFr\n")
|
|
1620
|
+
genes, rsids = _parse_filter_file(f)
|
|
1621
|
+
assert genes == frozenset({"COMT", "RSPO1", "MTHFR"})
|
|
1622
|
+
assert rsids == frozenset({"rs1801133", "rs4680"})
|
|
1623
|
+
|
|
1624
|
+
def test_gene_only(self, tmp_path):
|
|
1625
|
+
from allelix.cli import _parse_filter_file
|
|
1626
|
+
|
|
1627
|
+
f = tmp_path / "filter.txt"
|
|
1628
|
+
f.write_text("MTHFR\n")
|
|
1629
|
+
genes, rsids = _parse_filter_file(f)
|
|
1630
|
+
assert genes == frozenset({"MTHFR"})
|
|
1631
|
+
assert rsids == frozenset()
|
|
1632
|
+
|
|
1633
|
+
def test_gene_starting_with_rs_prefix_is_gene_not_rsid(self, tmp_path):
|
|
1634
|
+
"""RSPO1, RSF1, RSC1A1 are real gene names — must not be classified as rsIDs."""
|
|
1635
|
+
from allelix.cli import _parse_filter_file
|
|
1636
|
+
|
|
1637
|
+
f = tmp_path / "filter.txt"
|
|
1638
|
+
f.write_text("RSPO1\nRSF1\nRSC1A1\n")
|
|
1639
|
+
genes, rsids = _parse_filter_file(f)
|
|
1640
|
+
assert genes == frozenset({"RSPO1", "RSF1", "RSC1A1"})
|
|
1641
|
+
assert rsids == frozenset()
|
|
1642
|
+
|
|
1643
|
+
def test_mixed(self, tmp_path):
|
|
1644
|
+
from allelix.cli import _parse_filter_file
|
|
1645
|
+
|
|
1646
|
+
f = tmp_path / "filter.txt"
|
|
1647
|
+
f.write_text("rs1801133\nMTHFR\nrs4680\nCOMT\n")
|
|
1648
|
+
genes, rsids = _parse_filter_file(f)
|
|
1649
|
+
assert genes == frozenset({"MTHFR", "COMT"})
|
|
1650
|
+
assert rsids == frozenset({"rs1801133", "rs4680"})
|
|
1651
|
+
|
|
1652
|
+
def test_comments_and_blanks_ignored(self, tmp_path):
|
|
1653
|
+
from allelix.cli import _parse_filter_file
|
|
1654
|
+
|
|
1655
|
+
f = tmp_path / "filter.txt"
|
|
1656
|
+
f.write_text("# this is a comment\n\nMTHFR\n\n# another\nrs1801133\n")
|
|
1657
|
+
genes, rsids = _parse_filter_file(f)
|
|
1658
|
+
assert genes == frozenset({"MTHFR"})
|
|
1659
|
+
assert rsids == frozenset({"rs1801133"})
|
|
1660
|
+
|
|
1661
|
+
def test_empty_file_returns_empty_sets(self, tmp_path):
|
|
1662
|
+
from allelix.cli import _parse_filter_file
|
|
1663
|
+
|
|
1664
|
+
f = tmp_path / "filter.txt"
|
|
1665
|
+
f.write_text("")
|
|
1666
|
+
genes, rsids = _parse_filter_file(f)
|
|
1667
|
+
assert genes == frozenset()
|
|
1668
|
+
assert rsids == frozenset()
|
|
1669
|
+
|
|
1670
|
+
def test_comments_only_returns_empty_sets(self, tmp_path):
|
|
1671
|
+
from allelix.cli import _parse_filter_file
|
|
1672
|
+
|
|
1673
|
+
f = tmp_path / "filter.txt"
|
|
1674
|
+
f.write_text("# only a comment\n# another\n\n")
|
|
1675
|
+
genes, rsids = _parse_filter_file(f)
|
|
1676
|
+
assert genes == frozenset()
|
|
1677
|
+
assert rsids == frozenset()
|
|
1678
|
+
|
|
1679
|
+
|
|
1680
|
+
class TestFilterFileOnAnalyze:
|
|
1681
|
+
"""--filter-file is only on analyze; threads through _run_analysis_command."""
|
|
1682
|
+
|
|
1683
|
+
def test_analyze_rsid_only(self, mock_mhg_path, tmp_path, monkeypatch):
|
|
1684
|
+
captured: dict = {}
|
|
1685
|
+
|
|
1686
|
+
def fake_run(**kwargs):
|
|
1687
|
+
captured.update(kwargs)
|
|
1688
|
+
|
|
1689
|
+
monkeypatch.setattr("allelix.cli._run_analysis_command", fake_run)
|
|
1690
|
+
f = tmp_path / "filter.txt"
|
|
1691
|
+
f.write_text("rs1801133\n")
|
|
1692
|
+
runner = CliRunner()
|
|
1693
|
+
result = runner.invoke(main, ["analyze", str(mock_mhg_path), "--filter-file", str(f)])
|
|
1694
|
+
assert result.exit_code == 0, result.output
|
|
1695
|
+
assert captured["genes"] == frozenset()
|
|
1696
|
+
assert captured["rsids"] == frozenset({"rs1801133"})
|
|
1697
|
+
|
|
1698
|
+
def test_analyze_gene_only(self, mock_mhg_path, tmp_path, monkeypatch):
|
|
1699
|
+
captured: dict = {}
|
|
1700
|
+
|
|
1701
|
+
def fake_run(**kwargs):
|
|
1702
|
+
captured.update(kwargs)
|
|
1703
|
+
|
|
1704
|
+
monkeypatch.setattr("allelix.cli._run_analysis_command", fake_run)
|
|
1705
|
+
f = tmp_path / "filter.txt"
|
|
1706
|
+
f.write_text("MTHFR\n")
|
|
1707
|
+
runner = CliRunner()
|
|
1708
|
+
result = runner.invoke(main, ["analyze", str(mock_mhg_path), "--filter-file", str(f)])
|
|
1709
|
+
assert result.exit_code == 0, result.output
|
|
1710
|
+
assert captured["genes"] == frozenset({"MTHFR"})
|
|
1711
|
+
assert captured["rsids"] == frozenset()
|
|
1712
|
+
|
|
1713
|
+
def test_analyze_mixed_or_combination(self, mock_mhg_path, tmp_path, monkeypatch):
|
|
1714
|
+
captured: dict = {}
|
|
1715
|
+
|
|
1716
|
+
def fake_run(**kwargs):
|
|
1717
|
+
captured.update(kwargs)
|
|
1718
|
+
|
|
1719
|
+
monkeypatch.setattr("allelix.cli._run_analysis_command", fake_run)
|
|
1720
|
+
f = tmp_path / "filter.txt"
|
|
1721
|
+
f.write_text("rs1801133\nCOMT\n")
|
|
1722
|
+
runner = CliRunner()
|
|
1723
|
+
result = runner.invoke(main, ["analyze", str(mock_mhg_path), "--filter-file", str(f)])
|
|
1724
|
+
assert result.exit_code == 0, result.output
|
|
1725
|
+
assert captured["genes"] == frozenset({"COMT"})
|
|
1726
|
+
assert captured["rsids"] == frozenset({"rs1801133"})
|
|
1727
|
+
|
|
1728
|
+
def test_analyze_empty_filter_passes_empty_sets(self, mock_mhg_path, tmp_path, monkeypatch):
|
|
1729
|
+
"""Empty filter file (only comments/blanks) threads empty frozensets through.
|
|
1730
|
+
|
|
1731
|
+
The empty-set → match-nothing semantic on AnalysisResult.filter()
|
|
1732
|
+
is covered by a direct unit test in tests/test_pipeline_filter.py;
|
|
1733
|
+
here we verify only that the CLI layer forwards empty frozensets,
|
|
1734
|
+
not None.
|
|
1735
|
+
"""
|
|
1736
|
+
captured: dict = {}
|
|
1737
|
+
|
|
1738
|
+
def fake_run(**kwargs):
|
|
1739
|
+
captured.update(kwargs)
|
|
1740
|
+
|
|
1741
|
+
monkeypatch.setattr("allelix.cli._run_analysis_command", fake_run)
|
|
1742
|
+
f = tmp_path / "filter.txt"
|
|
1743
|
+
f.write_text("# only comments\n\n")
|
|
1744
|
+
runner = CliRunner()
|
|
1745
|
+
result = runner.invoke(main, ["analyze", str(mock_mhg_path), "--filter-file", str(f)])
|
|
1746
|
+
assert result.exit_code == 0, result.output
|
|
1747
|
+
assert captured["genes"] == frozenset()
|
|
1748
|
+
assert captured["rsids"] == frozenset()
|
|
1749
|
+
|
|
1750
|
+
def test_analyze_filter_file_nonexistent_path_errors(self, mock_mhg_path):
|
|
1751
|
+
runner = CliRunner()
|
|
1752
|
+
result = runner.invoke(
|
|
1753
|
+
main,
|
|
1754
|
+
["analyze", str(mock_mhg_path), "--filter-file", "/does/not/exist.txt"],
|
|
1755
|
+
)
|
|
1756
|
+
assert result.exit_code != 0
|
|
1757
|
+
|
|
1758
|
+
def test_methylation_does_not_have_filter_file(self, mock_mhg_path):
|
|
1759
|
+
runner = CliRunner()
|
|
1760
|
+
result = runner.invoke(
|
|
1761
|
+
main,
|
|
1762
|
+
["methylation", str(mock_mhg_path), "--filter-file", "/tmp/x.txt"],
|
|
1763
|
+
)
|
|
1764
|
+
assert result.exit_code != 0
|
|
1765
|
+
assert "no such option" in result.output.lower()
|
|
1766
|
+
|
|
1767
|
+
def test_pharmacogenomics_does_not_have_filter_file(self, mock_mhg_path):
|
|
1768
|
+
runner = CliRunner()
|
|
1769
|
+
result = runner.invoke(
|
|
1770
|
+
main,
|
|
1771
|
+
["pharmacogenomics", str(mock_mhg_path), "--filter-file", "/tmp/x.txt"],
|
|
1772
|
+
)
|
|
1773
|
+
assert result.exit_code != 0
|
|
1774
|
+
assert "no such option" in result.output.lower()
|
|
1775
|
+
|
|
1776
|
+
|
|
1582
1777
|
class TestHighValueNoCalls:
|
|
1583
1778
|
def test_stats_flags_dpyd_no_call(self, mock_mhg_path):
|
|
1584
1779
|
"""The MHG fixture has rs3918290 (DPYD) as a no-call; stats should flag it."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|