PyPI - allelix - Versions diffs - 1.8.1__py3-none-any.whl - Mend

allelix 1.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

allelix/__init__.py +12 -0
allelix/annotators/__init__.py +90 -0
allelix/annotators/alphamissense.py +228 -0
allelix/annotators/base.py +214 -0
allelix/annotators/cadd.py +283 -0
allelix/annotators/clinvar.py +404 -0
allelix/annotators/gnomad.py +212 -0
allelix/annotators/gwas.py +354 -0
allelix/annotators/pharmgkb.py +406 -0
allelix/annotators/snpedia.py +276 -0
allelix/cli.py +1524 -0
allelix/compare.py +149 -0
allelix/config.py +143 -0
allelix/data/__init__.py +3 -0
allelix/data/high_value_snps.yaml +64 -0
allelix/databases/__init__.py +30 -0
allelix/databases/_versions.py +16 -0
allelix/databases/alphamissense_loader.py +48 -0
allelix/databases/cadd_loader.py +49 -0
allelix/databases/cpic_loader.py +234 -0
allelix/databases/gnomad_loader.py +49 -0
allelix/databases/gwas_loader.py +546 -0
allelix/databases/loader_utils.py +80 -0
allelix/databases/manager.py +515 -0
allelix/databases/pharmgkb_loader.py +437 -0
allelix/databases/schema.py +165 -0
allelix/databases/snpedia_loader.py +44 -0
allelix/databases/snpedia_parser.py +342 -0
allelix/exporters/__init__.py +3 -0
allelix/exporters/plink.py +144 -0
allelix/models.py +117 -0
allelix/parsers/__init__.py +73 -0
allelix/parsers/_helpers.py +41 -0
allelix/parsers/ancestrydna.py +130 -0
allelix/parsers/base.py +97 -0
allelix/parsers/ftdna.py +129 -0
allelix/parsers/livingdna.py +121 -0
allelix/parsers/myhappygenes.py +135 -0
allelix/parsers/myheritage.py +118 -0
allelix/parsers/twentythreeandme.py +150 -0
allelix/py.typed +0 -0
allelix/reports/__init__.py +40 -0
allelix/reports/_pipeline.py +497 -0
allelix/reports/diff.py +169 -0
allelix/reports/high_value.py +133 -0
allelix/reports/html.py +1130 -0
allelix/reports/json_report.py +163 -0
allelix/reports/methylation.py +50 -0
allelix/reports/terminal.py +203 -0
allelix/utils/__init__.py +3 -0
allelix/utils/allele.py +87 -0
allelix/utils/build_detect.py +203 -0
allelix-1.8.1.dist-info/METADATA +276 -0
allelix-1.8.1.dist-info/RECORD +58 -0
allelix-1.8.1.dist-info/WHEEL +5 -0
allelix-1.8.1.dist-info/entry_points.txt +2 -0
allelix-1.8.1.dist-info/licenses/LICENSE +671 -0
allelix-1.8.1.dist-info/top_level.txt +1 -0

allelix/reports/json_report.py ADDED Viewed

@@ -0,0 +1,163 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# Copyright (C) 2026 dial481
+"""JSON report renderer.
+Output schema (versioned via `schema_version`):
+    {
+      "schema_version": "4",
+      "allelix_version": "1.1.0",
+      "generated_at": "2026-05-11T12:34:56+00:00",
+      "regulatory_notice": "...",
+      "input": {
+        "file": "genotype.txt",
+        "format": "myhappygenes",
+        "sample_id": "MHG000001",
+        "build": "GRCh37",
+        "total_variants": 2015,
+        "skipped_lines": 0
+      },
+      "annotators": [
+        {"name": "clinvar", "version": "20260101"}
+      ],
+      "filters": {
+        "min_magnitude": 5.0,
+        "category": null,
+        "genes": null
+      },
+      "annotations": [ ... ]
+    }
+Every annotation is source-attributed (ADR-0003); the renderer never adds
+or strips that field.
+"""
+from __future__ import annotations
+import json
+from dataclasses import asdict
+from datetime import UTC, datetime
+from typing import TYPE_CHECKING
+from allelix import __version__
+from allelix.reports import REGULATORY_NOTICE, atomic_write_text
+from allelix.reports._pipeline import rollup_gwas_duplicates
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+    from pathlib import Path
+    from allelix.models import Annotation
+    from allelix.reports._pipeline import AnalysisResult
+    from allelix.reports.diff import DiffResult
+SCHEMA_VERSION = "4"
+def _annotation_dict(a: Annotation) -> dict:
+    """Serialize an annotation, adding AM caveat for non-protein sources."""
+    d = {k: v for k, v in asdict(a).items() if k != "is_must_include"}
+    d["zygosity"] = a.zygosity
+    if a.am_pathogenicity is not None and a.source == "pharmgkb":
+        d["am_caveat"] = "protein structure impact only"
+    return d
+__all__ = ["REGULATORY_NOTICE", "SCHEMA_VERSION", "render_json"]
+def _license_attributions(
+    annotators_used: list[tuple[str, str | None]],
+) -> list[dict[str, str]]:
+    """Return license attribution dicts from annotator LicenseDescriptors."""
+    import logging
+    from allelix.annotators import get_annotator_class
+    logger = logging.getLogger(__name__)
+    result: list[dict[str, str]] = []
+    for name, _version in annotators_used:
+        cls = get_annotator_class(name)
+        if cls is None:
+            logger.warning("No annotator class found for '%s' — attribution omitted", name)
+            continue
+        desc = cls.license
+        entry: dict[str, str] = {
+            "source": cls.display_name,
+            "source_url": desc.source_url or desc.license_url,
+            "license": desc.spdx,
+            "license_url": desc.license_url,
+            "attribution": desc.attribution_text,
+        }
+        if desc.citation:
+            entry["citation"] = desc.citation
+        result.append(entry)
+    return result
+def render_json(
+    result: AnalysisResult,
+    *,
+    output_path: Path,
+    min_magnitude: float = 0.0,
+    category: str | None = None,
+    genes: Iterable[str] | None = None,
+    source_min_magnitudes: dict[str, float] | None = None,
+    diff: DiffResult | None = None,
+    high_value_no_calls: list[dict[str, str]] | None = None,
+) -> int:
+    """Write a JSON report to `output_path`. Returns the number of annotations included."""
+    filtered = result.filter(
+        min_magnitude=min_magnitude,
+        category=category,
+        genes=genes,
+        source_min_magnitudes=source_min_magnitudes,
+    )
+    filtered = rollup_gwas_duplicates(filtered)
+    payload: dict = {
+        "schema_version": SCHEMA_VERSION,
+        "allelix_version": __version__,
+        "generated_at": datetime.now(UTC).isoformat(),
+        "regulatory_notice": REGULATORY_NOTICE,
+        "input": {
+            "file": result.file_path.name,
+            "format": result.parser_name,
+            "sample_id": result.sample_id,
+            "build": result.build,
+            "total_variants": result.total_variants,
+            "skipped_lines": result.skipped_count,
+        },
+        "annotators": [
+            {"name": name, "version": version} for name, version in result.annotators_used
+        ],
+        "filters": {
+            "min_magnitude": min_magnitude,
+            "category": category,
+            "genes": sorted(genes) if genes else None,
+        },
+        "annotations": [_annotation_dict(a) for a in filtered],
+    }
+    license_attrs = _license_attributions(result.annotators_used)
+    if license_attrs:
+        payload["license_attributions"] = license_attrs
+    if high_value_no_calls:
+        payload["high_value_no_calls"] = high_value_no_calls
+    if diff is not None:
+        from allelix.reports.diff import diff_annotation_to_dict, summarize_diff
+        payload["diff"] = {
+            "previous_report": diff.previous_generated_at,
+            "summary": summarize_diff(diff),
+            "new": [
+                {k: v for k, v in asdict(a).items() if k != "is_must_include"} for a in diff.new
+            ],
+            "changed": [diff_annotation_to_dict(c) for c in diff.changed],
+            "removed": diff.removed,
+        }
+    atomic_write_text(output_path, json.dumps(payload, indent=2, sort_keys=False) + "\n")
+    return len(filtered)

allelix/reports/methylation.py ADDED Viewed

@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# Copyright (C) 2026 dial481
+"""Methylation pathway gene panel.
+The set is intentionally small and curated — covering the one-carbon /
+folate / methylation cycle genes most often discussed in consumer methylation
+reports. Not medical guidance; see ADR-0003 (regulatory posture).
+"""
+from __future__ import annotations
+# Folate / one-carbon / methylation cycle genes. Add via PR + ADR if expanding.
+METHYLATION_PANEL_GENES: frozenset[str] = frozenset(
+    {
+        "ACAT1",
+        "AHCY",
+        "BHMT",
+        "BHMT2",
+        "CBS",
+        "COMT",
+        "DHFR",
+        "DNMT1",
+        "DNMT3A",
+        "DNMT3B",
+        "FOLR1",
+        "FOLR2",
+        "FUT2",
+        "GNMT",
+        "GSTM1",
+        "GSTP1",
+        "MAOA",
+        "MAT1A",
+        "MAT2A",
+        "MAT2B",
+        "MTHFD1",
+        "MTHFD1L",
+        "MTHFR",
+        "MTR",
+        "MTRR",
+        "NOS3",
+        "PEMT",
+        "SHMT1",
+        "SHMT2",
+        "SLC19A1",
+        "SUOX",
+        "TCN1",
+        "TCN2",
+        "VDR",
+    }
+)

allelix/reports/terminal.py ADDED Viewed

@@ -0,0 +1,203 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# Copyright (C) 2026 dial481
+"""Terminal report rendering for `allelix analyze`."""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from rich.table import Table
+from allelix.reports._pipeline import rollup_gwas_duplicates
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+    from rich.console import Console
+    from allelix.models import Annotation
+    from allelix.reports._pipeline import AnalysisResult
+    from allelix.reports.diff import DiffResult
+def render_terminal(
+    result: AnalysisResult,
+    console: Console,
+    *,
+    min_magnitude: float = 0.0,
+    category: str | None = None,
+    genes: Iterable[str] | None = None,
+    source_min_magnitudes: dict[str, float] | None = None,
+) -> int:
+    """Render an AnalysisResult as a Rich table. Returns annotation count.
+    Per ADR-0003 (regulatory posture), every row shows the source attribution
+    in its own column — no rendered claim is unattributed.
+    """
+    filtered = result.filter(
+        min_magnitude=min_magnitude,
+        category=category,
+        genes=genes,
+        source_min_magnitudes=source_min_magnitudes,
+    )
+    filtered = rollup_gwas_duplicates(filtered)
+    _print_table(filtered, console)
+    return len(filtered)
+def render_terminal_diff(
+    diff: DiffResult,
+    console: Console,
+) -> int:
+    """Render a diff summary and tables for new/changed/removed annotations."""
+    from allelix.reports.diff import summarize_diff
+    summary = summarize_diff(diff)
+    if not diff.has_changes:
+        console.print(f"[green]{summary}[/green]")
+        return 0
+    console.print(f"[bold]{summary}[/bold]")
+    total = 0
+    if diff.new:
+        table = Table(title=f"New Annotations ({len(diff.new)})")
+        table.add_column("rsID", style="cyan", no_wrap=True)
+        table.add_column("Gene", style="magenta", no_wrap=True)
+        table.add_column("Source", style="blue", no_wrap=True)
+        table.add_column("Significance", style="yellow")
+        table.add_column("Review Status", style="dim")
+        table.add_column("Magnitude", justify="right")
+        table.add_column("Genotype", no_wrap=True)
+        table.add_column("Condition", overflow="fold")
+        for a in diff.new:
+            table.add_row(
+                a.rsid,
+                a.gene or "—",
+                a.attribution,
+                a.significance,
+                a.review_status or "—",
+                f"{a.magnitude:.1f}",
+                a.genotype_match,
+                a.condition or "—",
+            )
+        console.print(table)
+        total += len(diff.new)
+    if diff.changed:
+        table = Table(title=f"Changed Annotations ({len(diff.changed)})")
+        table.add_column("rsID", style="cyan", no_wrap=True)
+        table.add_column("Gene", style="magenta", no_wrap=True)
+        table.add_column("Source", style="blue", no_wrap=True)
+        table.add_column("Old Sig", style="dim")
+        table.add_column("New Sig", style="yellow")
+        table.add_column("Review Status", style="dim")
+        table.add_column("Old Mag", justify="right", style="dim")
+        table.add_column("New Mag", justify="right")
+        table.add_column("Condition", overflow="fold")
+        for c in diff.changed:
+            table.add_row(
+                c.current.rsid,
+                c.current.gene or "—",
+                c.current.attribution,
+                c.previous_significance,
+                c.current.significance,
+                c.current.review_status or "—",
+                f"{c.previous_magnitude:.1f}",
+                f"{c.current.magnitude:.1f}",
+                c.current.condition or "—",
+            )
+        console.print(table)
+        total += len(diff.changed)
+    if diff.removed:
+        table = Table(title=f"Removed Annotations ({len(diff.removed)})")
+        table.add_column("rsID", style="dim cyan", no_wrap=True)
+        table.add_column("Gene", style="dim magenta", no_wrap=True)
+        table.add_column("Source", style="dim blue", no_wrap=True)
+        table.add_column("Significance", style="dim")
+        table.add_column("Review Status", style="dim")
+        table.add_column("Magnitude", justify="right", style="dim")
+        table.add_column("Condition", overflow="fold", style="dim")
+        for d in diff.removed:
+            table.add_row(
+                d.get("rsid", ""),
+                d.get("gene", "") or "—",
+                d.get("attribution", ""),
+                d.get("significance", ""),
+                d.get("review_status", "") or "—",
+                f"{d.get('magnitude', 0.0):.1f}",
+                d.get("condition", "") or "—",
+            )
+        console.print(table)
+        total += len(diff.removed)
+    return total
+def _format_freq(af: float | None) -> str:
+    if af is None:
+        return "—"
+    pct = af * 100
+    if pct < 0.01:
+        return "<0.01%"
+    return f"{pct:.2f}%"
+def _print_table(filtered: list[Annotation], console: Console) -> None:
+    if not filtered:
+        console.print("[yellow]No annotations matched the current filters.[/yellow]")
+        return
+    has_freq = any(a.allele_frequency is not None for a in filtered)
+    has_am = any(a.am_pathogenicity is not None for a in filtered)
+    has_am_caveat = any(
+        a.am_pathogenicity is not None and a.source == "pharmgkb" for a in filtered
+    )
+    has_cadd = any(a.cadd_phred is not None for a in filtered)
+    table = Table(title=f"Annotations ({len(filtered)})")
+    table.add_column("rsID", style="cyan", no_wrap=True)
+    table.add_column("Gene", style="magenta", no_wrap=True)
+    table.add_column("Source", style="blue", no_wrap=True)
+    table.add_column("Significance", style="yellow")
+    table.add_column("Review Status", style="dim")
+    table.add_column("Magnitude", justify="right")
+    table.add_column("Genotype", no_wrap=True)
+    table.add_column("Zygosity", no_wrap=True)
+    if has_freq:
+        table.add_column("Freq", justify="right", no_wrap=True)
+    if has_am:
+        table.add_column("AM", justify="right", no_wrap=True)
+    if has_cadd:
+        table.add_column("CADD", justify="right", no_wrap=True)
+    table.add_column("Condition", overflow="fold")
+    for a in filtered:
+        row = [
+            a.rsid,
+            a.gene or "—",
+            a.attribution,
+            a.significance,
+            a.review_status or "—",
+            f"{a.magnitude:.1f}",
+            a.genotype_match,
+            a.zygosity,
+        ]
+        if has_freq:
+            row.append(_format_freq(a.allele_frequency))
+        if has_am:
+            if a.am_pathogenicity is not None:
+                am_str = f"{a.am_pathogenicity:.3f}"
+                if a.source == "pharmgkb":
+                    am_str = f"[dim]{am_str}*[/dim]"
+                row.append(am_str)
+            else:
+                row.append("—")
+        if has_cadd:
+            row.append(f"{a.cadd_phred:.1f}" if a.cadd_phred is not None else "—")
+        row.append(a.condition or "—")
+        table.add_row(*row)
+    console.print(table)
+    if has_am_caveat:
+        console.print("[dim]* AM score on drug-response row — protein structure impact only[/dim]")

allelix/utils/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# Copyright (C) 2026 dial481
+"""Utility modules: strand flipping, allele complement, etc."""

allelix/utils/allele.py ADDED Viewed

@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# Copyright (C) 2026 dial481
+"""Strand flipping, complement logic, and ambiguous-SNP detection.
+A SNP read on the reverse strand has its alleles complemented (A↔T, C↔G).
+Two databases reporting the "same" variant on opposite strands will list
+opposite allele letters. For most SNPs this is unambiguous and reversible.
+For A/T and C/G SNPs (palindromic), the complement equals the alternative —
+so a strand-flip is undetectable from sequence alone and is best handled by
+extra information (allele frequency, surrounding context).
+ADR-0010 documents the design.
+"""
+from __future__ import annotations
+from allelix.models import NO_CALL_MARKER
+_COMPLEMENT: dict[str, str] = {"A": "T", "T": "A", "C": "G", "G": "C"}
+# A/T and C/G SNPs are palindromic; their complement equals the alternative,
+# so strand orientation cannot be inferred from the alleles alone.
+_AMBIGUOUS_PAIRS: frozenset[frozenset[str]] = frozenset(
+    {frozenset({"A", "T"}), frozenset({"C", "G"})}
+)
+def complement(allele: str) -> str:
+    """Return the reverse-complement of a single allele string.
+    A → T, T → A, C → G, G → C. The no-call marker `-` and any unrecognized
+    character are returned unchanged. Handles indels (multi-base alleles) by
+    complementing each base in reverse order.
+    """
+    if allele == NO_CALL_MARKER or not allele:
+        return allele
+    if len(allele) == 1:
+        return _COMPLEMENT.get(allele, allele)
+    return "".join(_COMPLEMENT.get(b, b) for b in reversed(allele))
+def flip_genotype(allele1: str, allele2: str) -> tuple[str, str]:
+    """Return both alleles complemented (the reverse-strand reading)."""
+    return complement(allele1), complement(allele2)
+_PALINDROMIC = frozenset({("A", "T"), ("T", "A"), ("C", "G"), ("G", "C")})
+def resolve_strand(user_allele: str, gnomad_ref: str, gnomad_alt: str) -> str | None:
+    """Return reference-forward allele, or None if ambiguous.
+    Maps an array-reported allele to its reference-forward equivalent
+    using gnomAD's ref/alt as the ground truth. If the user allele
+    matches ref or alt directly, it's already forward. If the
+    complement matches, the array was minus-strand. Palindromic SNPs
+    (A/T, C/G ref/alt pairs) cannot be resolved and return None.
+    Only operates on single-base alleles. Multi-base alleles (indels)
+    pass through as-is — array indels are rare and not minus-strand
+    reported.
+    """
+    if len(user_allele) != 1:
+        return user_allele
+    if user_allele in (gnomad_ref, gnomad_alt):
+        return user_allele
+    comp = _COMPLEMENT.get(user_allele)
+    if comp is None:
+        return None
+    if comp in (gnomad_ref, gnomad_alt):
+        if (gnomad_ref, gnomad_alt) in _PALINDROMIC:
+            return None
+        return comp
+    return None
+def is_strand_ambiguous(ref: str, alt: str) -> bool:
+    """True if (ref, alt) is an A/T or C/G pair — strand cannot be inferred.
+    Multi-base indels and any allele containing a no-call or unknown letter
+    are reported as not ambiguous (they have other ways to disambiguate).
+    """
+    if len(ref) != 1 or len(alt) != 1:
+        return False
+    if ref not in _COMPLEMENT or alt not in _COMPLEMENT:
+        return False
+    return frozenset({ref, alt}) in _AMBIGUOUS_PAIRS