allelix 1.8.4__tar.gz → 1.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {allelix-1.8.4 → allelix-1.9.0}/PKG-INFO +6 -3
  2. {allelix-1.8.4 → allelix-1.9.0}/README.md +5 -2
  3. {allelix-1.8.4 → allelix-1.9.0}/allelix/cli.py +56 -1
  4. {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/_pipeline.py +13 -3
  5. {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/html.py +2 -0
  6. {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/json_report.py +4 -1
  7. {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/terminal.py +2 -0
  8. {allelix-1.8.4 → allelix-1.9.0}/allelix.egg-info/PKG-INFO +6 -3
  9. {allelix-1.8.4 → allelix-1.9.0}/pyproject.toml +1 -1
  10. {allelix-1.8.4 → allelix-1.9.0}/tests/test_cli.py +195 -0
  11. {allelix-1.8.4 → allelix-1.9.0}/LICENSE +0 -0
  12. {allelix-1.8.4 → allelix-1.9.0}/allelix/__init__.py +0 -0
  13. {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/__init__.py +0 -0
  14. {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/alphamissense.py +0 -0
  15. {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/base.py +0 -0
  16. {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/cadd.py +0 -0
  17. {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/clinvar.py +0 -0
  18. {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/gnomad.py +0 -0
  19. {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/gwas.py +0 -0
  20. {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/pharmgkb.py +0 -0
  21. {allelix-1.8.4 → allelix-1.9.0}/allelix/annotators/snpedia.py +0 -0
  22. {allelix-1.8.4 → allelix-1.9.0}/allelix/compare.py +0 -0
  23. {allelix-1.8.4 → allelix-1.9.0}/allelix/config.py +0 -0
  24. {allelix-1.8.4 → allelix-1.9.0}/allelix/data/__init__.py +0 -0
  25. {allelix-1.8.4 → allelix-1.9.0}/allelix/data/high_value_snps.yaml +0 -0
  26. {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/__init__.py +0 -0
  27. {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/_versions.py +0 -0
  28. {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/alphamissense_loader.py +0 -0
  29. {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/cadd_loader.py +0 -0
  30. {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/cpic_loader.py +0 -0
  31. {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/gnomad_loader.py +0 -0
  32. {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/gwas_loader.py +0 -0
  33. {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/loader_utils.py +0 -0
  34. {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/manager.py +0 -0
  35. {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/pharmgkb_loader.py +0 -0
  36. {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/schema.py +0 -0
  37. {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/snpedia_loader.py +0 -0
  38. {allelix-1.8.4 → allelix-1.9.0}/allelix/databases/snpedia_parser.py +0 -0
  39. {allelix-1.8.4 → allelix-1.9.0}/allelix/exporters/__init__.py +0 -0
  40. {allelix-1.8.4 → allelix-1.9.0}/allelix/exporters/plink.py +0 -0
  41. {allelix-1.8.4 → allelix-1.9.0}/allelix/models.py +0 -0
  42. {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/__init__.py +0 -0
  43. {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/_helpers.py +0 -0
  44. {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/ancestrydna.py +0 -0
  45. {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/base.py +0 -0
  46. {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/ftdna.py +0 -0
  47. {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/livingdna.py +0 -0
  48. {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/myhappygenes.py +0 -0
  49. {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/myheritage.py +0 -0
  50. {allelix-1.8.4 → allelix-1.9.0}/allelix/parsers/twentythreeandme.py +0 -0
  51. {allelix-1.8.4 → allelix-1.9.0}/allelix/py.typed +0 -0
  52. {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/__init__.py +0 -0
  53. {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/diff.py +0 -0
  54. {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/high_value.py +0 -0
  55. {allelix-1.8.4 → allelix-1.9.0}/allelix/reports/methylation.py +0 -0
  56. {allelix-1.8.4 → allelix-1.9.0}/allelix/utils/__init__.py +0 -0
  57. {allelix-1.8.4 → allelix-1.9.0}/allelix/utils/allele.py +0 -0
  58. {allelix-1.8.4 → allelix-1.9.0}/allelix/utils/build_detect.py +0 -0
  59. {allelix-1.8.4 → allelix-1.9.0}/allelix.egg-info/SOURCES.txt +0 -0
  60. {allelix-1.8.4 → allelix-1.9.0}/allelix.egg-info/dependency_links.txt +0 -0
  61. {allelix-1.8.4 → allelix-1.9.0}/allelix.egg-info/entry_points.txt +0 -0
  62. {allelix-1.8.4 → allelix-1.9.0}/allelix.egg-info/requires.txt +0 -0
  63. {allelix-1.8.4 → allelix-1.9.0}/allelix.egg-info/top_level.txt +0 -0
  64. {allelix-1.8.4 → allelix-1.9.0}/setup.cfg +0 -0
  65. {allelix-1.8.4 → allelix-1.9.0}/tests/test_cli_helpers.py +0 -0
  66. {allelix-1.8.4 → allelix-1.9.0}/tests/test_compare.py +0 -0
  67. {allelix-1.8.4 → allelix-1.9.0}/tests/test_config.py +0 -0
  68. {allelix-1.8.4 → allelix-1.9.0}/tests/test_end_to_end.py +0 -0
  69. {allelix-1.8.4 → allelix-1.9.0}/tests/test_mock_data_invariants.py +0 -0
  70. {allelix-1.8.4 → allelix-1.9.0}/tests/test_models.py +0 -0
  71. {allelix-1.8.4 → allelix-1.9.0}/tests/test_registry.py +0 -0
  72. {allelix-1.8.4 → allelix-1.9.0}/tests/test_version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: allelix
3
- Version: 1.8.4
3
+ Version: 1.9.0
4
4
  Summary: Open-source genotype analysis toolkit. Format-agnostic ingestion, database-agnostic annotation, offline-first.
5
5
  Author-email: dial481 <dial481@users.noreply.github.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -44,8 +44,8 @@ Open-source command-line toolkit for analyzing raw genotype files from consumer
44
44
  > HTML/JSON/terminal reports, methylation + pharmacogenomics focused
45
45
  > commands, report diffing, persistent config with commercial-mode
46
46
  > safety switch. Build auto-detection from position data (ADR-0021).
47
- > No regex on prose anywhere in production. **Latest: v1.8.4** —
48
- > `--no-cadd` flag for licensing exclusion parity.
47
+ > No regex on prose anywhere in production. **Latest: v1.9.0** —
48
+ > `--filter-file` flag for custom-panel filtering on `analyze`.
49
49
  > Release notes:
50
50
  > [`CHANGELOG.md`](https://github.com/dial481/allelix/blob/main/CHANGELOG.md).
51
51
 
@@ -61,6 +61,9 @@ allelix db update
61
61
 
62
62
  # Analyze a genotype file
63
63
  allelix analyze your_genotype_file.txt --output report.html
64
+
65
+ # Filter to a custom panel (rsIDs + gene names, one per line; '#' comments and blank lines ignored)
66
+ allelix analyze your_genotype_file.txt --filter-file my_panel.txt --output report.html
64
67
  ```
65
68
 
66
69
  Requires Python 3.11+. See [Development](#development) for source installs and running tests.
@@ -10,8 +10,8 @@ Open-source command-line toolkit for analyzing raw genotype files from consumer
10
10
  > HTML/JSON/terminal reports, methylation + pharmacogenomics focused
11
11
  > commands, report diffing, persistent config with commercial-mode
12
12
  > safety switch. Build auto-detection from position data (ADR-0021).
13
- > No regex on prose anywhere in production. **Latest: v1.8.4** —
14
- > `--no-cadd` flag for licensing exclusion parity.
13
+ > No regex on prose anywhere in production. **Latest: v1.9.0** —
14
+ > `--filter-file` flag for custom-panel filtering on `analyze`.
15
15
  > Release notes:
16
16
  > [`CHANGELOG.md`](https://github.com/dial481/allelix/blob/main/CHANGELOG.md).
17
17
 
@@ -27,6 +27,9 @@ allelix db update
27
27
 
28
28
  # Analyze a genotype file
29
29
  allelix analyze your_genotype_file.txt --output report.html
30
+
31
+ # Filter to a custom panel (rsIDs + gene names, one per line; '#' comments and blank lines ignored)
32
+ allelix analyze your_genotype_file.txt --filter-file my_panel.txt --output report.html
30
33
  ```
31
34
 
32
35
  Requires Python 3.11+. See [Development](#development) for source installs and running tests.
@@ -5,6 +5,7 @@
5
5
  from __future__ import annotations
6
6
 
7
7
  import logging
8
+ import re
8
9
  import sys
9
10
  import time
10
11
  from pathlib import Path
@@ -195,6 +196,35 @@ def _format_from_path(output: Path, override: str | None) -> str:
195
196
  )
196
197
 
197
198
 
199
+ _RSID_PATTERN = re.compile(r"^rs\d+$", re.IGNORECASE)
200
+
201
+
202
+ def _parse_filter_file(path: Path) -> tuple[frozenset[str], frozenset[str]]:
203
+ r"""Parse a filter file into ``(gene_names, rsids)``.
204
+
205
+ Lines matching ``^rs\d+$`` (case-insensitive) are rsIDs. Everything
206
+ else is a gene name. Lines starting with ``#`` and blank lines are
207
+ ignored. Gene names starting with ``RS`` (e.g., RSPO1, RSF1) are
208
+ correctly classified as gene names, not rsIDs.
209
+
210
+ Input is case-tolerant; output is canonical: rsIDs are normalized to
211
+ lowercase (``rs1801133``), gene names to uppercase (``MTHFR``). The
212
+ filter recorded in JSON output therefore looks identical regardless
213
+ of how the user typed the entries in the filter file.
214
+ """
215
+ genes: set[str] = set()
216
+ rsids: set[str] = set()
217
+ for raw in path.read_text().splitlines():
218
+ line = raw.strip()
219
+ if not line or line.startswith("#"):
220
+ continue
221
+ if _RSID_PATTERN.match(line):
222
+ rsids.add(line.lower())
223
+ else:
224
+ genes.add(line.upper())
225
+ return frozenset(genes), frozenset(rsids)
226
+
227
+
198
228
  def _run_analysis_command(
199
229
  file_path: Path,
200
230
  fmt: str | None,
@@ -204,6 +234,7 @@ def _run_analysis_command(
204
234
  min_magnitude: float,
205
235
  category: str | None,
206
236
  genes: frozenset[str] | None,
237
+ rsids: frozenset[str] | None = None,
207
238
  build: str | None = None,
208
239
  include_benign: bool = False,
209
240
  gwas_min_magnitude: float | None = None,
@@ -335,6 +366,7 @@ def _run_analysis_command(
335
366
  min_magnitude=min_magnitude,
336
367
  category=category,
337
368
  genes=genes,
369
+ rsids=rsids,
338
370
  source_min_magnitudes=source_floors,
339
371
  )
340
372
  from allelix.reports._pipeline import rollup_gwas_duplicates
@@ -356,6 +388,7 @@ def _run_analysis_command(
356
388
  min_magnitude=min_magnitude,
357
389
  category=category,
358
390
  genes=genes,
391
+ rsids=rsids,
359
392
  source_min_magnitudes=source_floors,
360
393
  )
361
394
  else:
@@ -373,6 +406,7 @@ def _run_analysis_command(
373
406
  min_magnitude=min_magnitude,
374
407
  category=category,
375
408
  genes=genes,
409
+ rsids=rsids,
376
410
  source_min_magnitudes=source_floors,
377
411
  diff=diff_result,
378
412
  high_value_no_calls=hv_dicts,
@@ -384,6 +418,7 @@ def _run_analysis_command(
384
418
  min_magnitude=min_magnitude,
385
419
  category=category,
386
420
  genes=genes,
421
+ rsids=rsids,
387
422
  source_min_magnitudes=source_floors,
388
423
  diff=diff_result,
389
424
  high_value_no_calls=hv_warning_lines,
@@ -560,6 +595,16 @@ _DIFF_OPT = click.option(
560
595
  "Not a monitoring tool — use for version-to-version validation."
561
596
  ),
562
597
  )
598
+ _FILTER_FILE_OPT = click.option(
599
+ "--filter-file",
600
+ type=click.Path(exists=True, dir_okay=False, path_type=Path),
601
+ default=None,
602
+ help=(
603
+ "Plain text file with rsIDs and/or gene names (one per line) to "
604
+ "filter the report. Lines matching '^rs\\d+$' are rsIDs; everything "
605
+ "else is a gene name. Comments (#) and blank lines are ignored."
606
+ ),
607
+ )
563
608
  _NO_UPDATE_OPT = click.option(
564
609
  "--no-update",
565
610
  is_flag=True,
@@ -677,6 +722,7 @@ def _emit_build_diagnostics(result: object) -> None:
677
722
  @_GWAS_ALL_OPT
678
723
  @_EXCLUDE_SNPEDIA_OPT
679
724
  @_DIFF_OPT
725
+ @_FILTER_FILE_OPT
680
726
  @_NO_UPDATE_OPT
681
727
  @_NO_GNOMAD_OPT
682
728
  @_NO_ALPHAMISSENSE_OPT
@@ -696,12 +742,20 @@ def analyze(
696
742
  gwas_all: bool,
697
743
  exclude_snpedia: bool,
698
744
  diff_path: Path | None,
745
+ filter_file: Path | None,
699
746
  no_update: bool,
700
747
  no_gnomad: bool,
701
748
  no_alphamissense: bool,
702
749
  no_cadd: bool,
703
750
  ) -> None:
704
751
  """Annotate a genotype file against all ready reference databases."""
752
+ filter_genes: frozenset[str] | None = None
753
+ filter_rsids: frozenset[str] | None = None
754
+ if filter_file is not None:
755
+ filter_genes, filter_rsids = _parse_filter_file(filter_file)
756
+ # Empty sets (file had only comments/blanks) still apply — they
757
+ # mean "match nothing", producing an empty report.
758
+
705
759
  _run_analysis_command(
706
760
  file_path=file_path,
707
761
  fmt=fmt,
@@ -710,7 +764,8 @@ def analyze(
710
764
  report_format=report_format,
711
765
  min_magnitude=min_magnitude,
712
766
  category=category,
713
- genes=None,
767
+ genes=filter_genes,
768
+ rsids=filter_rsids,
714
769
  build=_normalize_cli_build(build),
715
770
  include_benign=include_benign,
716
771
  gwas_min_magnitude=gwas_min_magnitude,
@@ -105,6 +105,7 @@ class AnalysisResult:
105
105
  min_magnitude: float = 0.0,
106
106
  category: str | None = None,
107
107
  genes: Iterable[str] | None = None,
108
+ rsids: Iterable[str] | None = None,
108
109
  source_min_magnitudes: dict[str, float] | None = None,
109
110
  ) -> list[Annotation]:
110
111
  """Apply the standard filters and return a sorted list of annotations.
@@ -117,8 +118,14 @@ class AnalysisResult:
117
118
  entry, that value IS the floor for that source — it can raise OR
118
119
  lower the global ``min_magnitude``. Sources without an entry use
119
120
  the global floor.
121
+
122
+ `genes` and `rsids` combine with OR: when either is provided, an
123
+ annotation passes if it matches the gene set OR the rsid set.
124
+ Empty collections (vs None) mean "match nothing" — an empty
125
+ filter file produces an empty report.
120
126
  """
121
- gene_set = {g.upper() for g in genes} if genes else None
127
+ gene_set = {g.upper() for g in genes} if genes is not None else None
128
+ rsid_set = {r.lower() for r in rsids} if rsids is not None else None
122
129
  out: list[Annotation] = []
123
130
  for a in self.annotations:
124
131
  if (
@@ -133,8 +140,11 @@ class AnalysisResult:
133
140
  continue
134
141
  if category is not None and a.category != category:
135
142
  continue
136
- if gene_set is not None and (a.gene or "").upper() not in gene_set:
137
- continue
143
+ if gene_set is not None or rsid_set is not None:
144
+ gene_match = gene_set is not None and (a.gene or "").upper() in gene_set
145
+ rsid_match = rsid_set is not None and a.rsid.lower() in rsid_set
146
+ if not gene_match and not rsid_match:
147
+ continue
138
148
  out.append(a)
139
149
  out.sort(key=lambda a: (-a.magnitude, a.rsid))
140
150
  return out
@@ -951,6 +951,7 @@ def render_html(
951
951
  min_magnitude: float = 0.0,
952
952
  category: str | None = None,
953
953
  genes: Iterable[str] | None = None,
954
+ rsids: Iterable[str] | None = None,
954
955
  source_min_magnitudes: dict[str, float] | None = None,
955
956
  title: str = "Allelix Genotype Report",
956
957
  diff: DiffResult | None = None,
@@ -961,6 +962,7 @@ def render_html(
961
962
  min_magnitude=min_magnitude,
962
963
  category=category,
963
964
  genes=genes,
965
+ rsids=rsids,
964
966
  source_min_magnitudes=source_min_magnitudes,
965
967
  )
966
968
  filtered = rollup_gwas_duplicates(filtered)
@@ -103,6 +103,7 @@ def render_json(
103
103
  min_magnitude: float = 0.0,
104
104
  category: str | None = None,
105
105
  genes: Iterable[str] | None = None,
106
+ rsids: Iterable[str] | None = None,
106
107
  source_min_magnitudes: dict[str, float] | None = None,
107
108
  diff: DiffResult | None = None,
108
109
  high_value_no_calls: list[dict[str, str]] | None = None,
@@ -112,6 +113,7 @@ def render_json(
112
113
  min_magnitude=min_magnitude,
113
114
  category=category,
114
115
  genes=genes,
116
+ rsids=rsids,
115
117
  source_min_magnitudes=source_min_magnitudes,
116
118
  )
117
119
  filtered = rollup_gwas_duplicates(filtered)
@@ -134,7 +136,8 @@ def render_json(
134
136
  "filters": {
135
137
  "min_magnitude": min_magnitude,
136
138
  "category": category,
137
- "genes": sorted(genes) if genes else None,
139
+ "genes": sorted(genes) if genes is not None else None,
140
+ "rsids": sorted(rsids) if rsids is not None else None,
138
141
  },
139
142
  "annotations": [_annotation_dict(a) for a in filtered],
140
143
  }
@@ -27,6 +27,7 @@ def render_terminal(
27
27
  min_magnitude: float = 0.0,
28
28
  category: str | None = None,
29
29
  genes: Iterable[str] | None = None,
30
+ rsids: Iterable[str] | None = None,
30
31
  source_min_magnitudes: dict[str, float] | None = None,
31
32
  ) -> int:
32
33
  """Render an AnalysisResult as a Rich table. Returns annotation count.
@@ -38,6 +39,7 @@ def render_terminal(
38
39
  min_magnitude=min_magnitude,
39
40
  category=category,
40
41
  genes=genes,
42
+ rsids=rsids,
41
43
  source_min_magnitudes=source_min_magnitudes,
42
44
  )
43
45
  filtered = rollup_gwas_duplicates(filtered)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: allelix
3
- Version: 1.8.4
3
+ Version: 1.9.0
4
4
  Summary: Open-source genotype analysis toolkit. Format-agnostic ingestion, database-agnostic annotation, offline-first.
5
5
  Author-email: dial481 <dial481@users.noreply.github.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -44,8 +44,8 @@ Open-source command-line toolkit for analyzing raw genotype files from consumer
44
44
  > HTML/JSON/terminal reports, methylation + pharmacogenomics focused
45
45
  > commands, report diffing, persistent config with commercial-mode
46
46
  > safety switch. Build auto-detection from position data (ADR-0021).
47
- > No regex on prose anywhere in production. **Latest: v1.8.4** —
48
- > `--no-cadd` flag for licensing exclusion parity.
47
+ > No regex on prose anywhere in production. **Latest: v1.9.0** —
48
+ > `--filter-file` flag for custom-panel filtering on `analyze`.
49
49
  > Release notes:
50
50
  > [`CHANGELOG.md`](https://github.com/dial481/allelix/blob/main/CHANGELOG.md).
51
51
 
@@ -61,6 +61,9 @@ allelix db update
61
61
 
62
62
  # Analyze a genotype file
63
63
  allelix analyze your_genotype_file.txt --output report.html
64
+
65
+ # Filter to a custom panel (rsIDs + gene names, one per line; '#' comments and blank lines ignored)
66
+ allelix analyze your_genotype_file.txt --filter-file my_panel.txt --output report.html
64
67
  ```
65
68
 
66
69
  Requires Python 3.11+. See [Development](#development) for source installs and running tests.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "allelix"
7
- version = "1.8.4"
7
+ version = "1.9.0"
8
8
  description = "Open-source genotype analysis toolkit. Format-agnostic ingestion, database-agnostic annotation, offline-first."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -1579,6 +1579,201 @@ class TestNoCaddFlag:
1579
1579
  assert captured["no_cadd"] is True
1580
1580
 
1581
1581
 
1582
+ class TestParseFilterFile:
1583
+ """Unit tests for _parse_filter_file (parser classification)."""
1584
+
1585
+ def test_rsid_lowercase(self, tmp_path):
1586
+ from allelix.cli import _parse_filter_file
1587
+
1588
+ f = tmp_path / "filter.txt"
1589
+ f.write_text("rs1801133\n")
1590
+ genes, rsids = _parse_filter_file(f)
1591
+ assert genes == frozenset()
1592
+ assert rsids == frozenset({"rs1801133"})
1593
+
1594
+ def test_rsid_uppercase_normalized_to_lowercase(self, tmp_path):
1595
+ """Input case-tolerant, output canonical: RS1801133 → rs1801133."""
1596
+ from allelix.cli import _parse_filter_file
1597
+
1598
+ f = tmp_path / "filter.txt"
1599
+ f.write_text("RS1801133\n")
1600
+ genes, rsids = _parse_filter_file(f)
1601
+ assert genes == frozenset()
1602
+ assert rsids == frozenset({"rs1801133"})
1603
+
1604
+ def test_gene_lowercase_normalized_to_uppercase(self, tmp_path):
1605
+ """Input case-tolerant, output canonical: mthfr → MTHFR."""
1606
+ from allelix.cli import _parse_filter_file
1607
+
1608
+ f = tmp_path / "filter.txt"
1609
+ f.write_text("mthfr\n")
1610
+ genes, rsids = _parse_filter_file(f)
1611
+ assert genes == frozenset({"MTHFR"})
1612
+ assert rsids == frozenset()
1613
+
1614
+ def test_mixed_messy_case_normalized(self, tmp_path):
1615
+ """End-to-end case-mixing across rsIDs and genes."""
1616
+ from allelix.cli import _parse_filter_file
1617
+
1618
+ f = tmp_path / "filter.txt"
1619
+ f.write_text("Rs1801133\ncomt\nRSPO1\nRS4680\nmThFr\n")
1620
+ genes, rsids = _parse_filter_file(f)
1621
+ assert genes == frozenset({"COMT", "RSPO1", "MTHFR"})
1622
+ assert rsids == frozenset({"rs1801133", "rs4680"})
1623
+
1624
+ def test_gene_only(self, tmp_path):
1625
+ from allelix.cli import _parse_filter_file
1626
+
1627
+ f = tmp_path / "filter.txt"
1628
+ f.write_text("MTHFR\n")
1629
+ genes, rsids = _parse_filter_file(f)
1630
+ assert genes == frozenset({"MTHFR"})
1631
+ assert rsids == frozenset()
1632
+
1633
+ def test_gene_starting_with_rs_prefix_is_gene_not_rsid(self, tmp_path):
1634
+ """RSPO1, RSF1, RSC1A1 are real gene names — must not be classified as rsIDs."""
1635
+ from allelix.cli import _parse_filter_file
1636
+
1637
+ f = tmp_path / "filter.txt"
1638
+ f.write_text("RSPO1\nRSF1\nRSC1A1\n")
1639
+ genes, rsids = _parse_filter_file(f)
1640
+ assert genes == frozenset({"RSPO1", "RSF1", "RSC1A1"})
1641
+ assert rsids == frozenset()
1642
+
1643
+ def test_mixed(self, tmp_path):
1644
+ from allelix.cli import _parse_filter_file
1645
+
1646
+ f = tmp_path / "filter.txt"
1647
+ f.write_text("rs1801133\nMTHFR\nrs4680\nCOMT\n")
1648
+ genes, rsids = _parse_filter_file(f)
1649
+ assert genes == frozenset({"MTHFR", "COMT"})
1650
+ assert rsids == frozenset({"rs1801133", "rs4680"})
1651
+
1652
+ def test_comments_and_blanks_ignored(self, tmp_path):
1653
+ from allelix.cli import _parse_filter_file
1654
+
1655
+ f = tmp_path / "filter.txt"
1656
+ f.write_text("# this is a comment\n\nMTHFR\n\n# another\nrs1801133\n")
1657
+ genes, rsids = _parse_filter_file(f)
1658
+ assert genes == frozenset({"MTHFR"})
1659
+ assert rsids == frozenset({"rs1801133"})
1660
+
1661
+ def test_empty_file_returns_empty_sets(self, tmp_path):
1662
+ from allelix.cli import _parse_filter_file
1663
+
1664
+ f = tmp_path / "filter.txt"
1665
+ f.write_text("")
1666
+ genes, rsids = _parse_filter_file(f)
1667
+ assert genes == frozenset()
1668
+ assert rsids == frozenset()
1669
+
1670
+ def test_comments_only_returns_empty_sets(self, tmp_path):
1671
+ from allelix.cli import _parse_filter_file
1672
+
1673
+ f = tmp_path / "filter.txt"
1674
+ f.write_text("# only a comment\n# another\n\n")
1675
+ genes, rsids = _parse_filter_file(f)
1676
+ assert genes == frozenset()
1677
+ assert rsids == frozenset()
1678
+
1679
+
1680
+ class TestFilterFileOnAnalyze:
1681
+ """--filter-file is only on analyze; threads through _run_analysis_command."""
1682
+
1683
+ def test_analyze_rsid_only(self, mock_mhg_path, tmp_path, monkeypatch):
1684
+ captured: dict = {}
1685
+
1686
+ def fake_run(**kwargs):
1687
+ captured.update(kwargs)
1688
+
1689
+ monkeypatch.setattr("allelix.cli._run_analysis_command", fake_run)
1690
+ f = tmp_path / "filter.txt"
1691
+ f.write_text("rs1801133\n")
1692
+ runner = CliRunner()
1693
+ result = runner.invoke(main, ["analyze", str(mock_mhg_path), "--filter-file", str(f)])
1694
+ assert result.exit_code == 0, result.output
1695
+ assert captured["genes"] == frozenset()
1696
+ assert captured["rsids"] == frozenset({"rs1801133"})
1697
+
1698
+ def test_analyze_gene_only(self, mock_mhg_path, tmp_path, monkeypatch):
1699
+ captured: dict = {}
1700
+
1701
+ def fake_run(**kwargs):
1702
+ captured.update(kwargs)
1703
+
1704
+ monkeypatch.setattr("allelix.cli._run_analysis_command", fake_run)
1705
+ f = tmp_path / "filter.txt"
1706
+ f.write_text("MTHFR\n")
1707
+ runner = CliRunner()
1708
+ result = runner.invoke(main, ["analyze", str(mock_mhg_path), "--filter-file", str(f)])
1709
+ assert result.exit_code == 0, result.output
1710
+ assert captured["genes"] == frozenset({"MTHFR"})
1711
+ assert captured["rsids"] == frozenset()
1712
+
1713
+ def test_analyze_mixed_or_combination(self, mock_mhg_path, tmp_path, monkeypatch):
1714
+ captured: dict = {}
1715
+
1716
+ def fake_run(**kwargs):
1717
+ captured.update(kwargs)
1718
+
1719
+ monkeypatch.setattr("allelix.cli._run_analysis_command", fake_run)
1720
+ f = tmp_path / "filter.txt"
1721
+ f.write_text("rs1801133\nCOMT\n")
1722
+ runner = CliRunner()
1723
+ result = runner.invoke(main, ["analyze", str(mock_mhg_path), "--filter-file", str(f)])
1724
+ assert result.exit_code == 0, result.output
1725
+ assert captured["genes"] == frozenset({"COMT"})
1726
+ assert captured["rsids"] == frozenset({"rs1801133"})
1727
+
1728
+ def test_analyze_empty_filter_passes_empty_sets(self, mock_mhg_path, tmp_path, monkeypatch):
1729
+ """Empty filter file (only comments/blanks) threads empty frozensets through.
1730
+
1731
+ The empty-set → match-nothing semantic on AnalysisResult.filter()
1732
+ is covered by a direct unit test in tests/test_pipeline_filter.py;
1733
+ here we verify only that the CLI layer forwards empty frozensets,
1734
+ not None.
1735
+ """
1736
+ captured: dict = {}
1737
+
1738
+ def fake_run(**kwargs):
1739
+ captured.update(kwargs)
1740
+
1741
+ monkeypatch.setattr("allelix.cli._run_analysis_command", fake_run)
1742
+ f = tmp_path / "filter.txt"
1743
+ f.write_text("# only comments\n\n")
1744
+ runner = CliRunner()
1745
+ result = runner.invoke(main, ["analyze", str(mock_mhg_path), "--filter-file", str(f)])
1746
+ assert result.exit_code == 0, result.output
1747
+ assert captured["genes"] == frozenset()
1748
+ assert captured["rsids"] == frozenset()
1749
+
1750
+ def test_analyze_filter_file_nonexistent_path_errors(self, mock_mhg_path):
1751
+ runner = CliRunner()
1752
+ result = runner.invoke(
1753
+ main,
1754
+ ["analyze", str(mock_mhg_path), "--filter-file", "/does/not/exist.txt"],
1755
+ )
1756
+ assert result.exit_code != 0
1757
+
1758
+ def test_methylation_does_not_have_filter_file(self, mock_mhg_path):
1759
+ runner = CliRunner()
1760
+ result = runner.invoke(
1761
+ main,
1762
+ ["methylation", str(mock_mhg_path), "--filter-file", "/tmp/x.txt"],
1763
+ )
1764
+ assert result.exit_code != 0
1765
+ assert "no such option" in result.output.lower()
1766
+
1767
+ def test_pharmacogenomics_does_not_have_filter_file(self, mock_mhg_path):
1768
+ runner = CliRunner()
1769
+ result = runner.invoke(
1770
+ main,
1771
+ ["pharmacogenomics", str(mock_mhg_path), "--filter-file", "/tmp/x.txt"],
1772
+ )
1773
+ assert result.exit_code != 0
1774
+ assert "no such option" in result.output.lower()
1775
+
1776
+
1582
1777
  class TestHighValueNoCalls:
1583
1778
  def test_stats_flags_dpyd_no_call(self, mock_mhg_path):
1584
1779
  """The MHG fixture has rs3918290 (DPYD) as a no-call; stats should flag it."""
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes