PyPI - babappaomega - Versions diffs - 0.1.6__py3-none-any.whl - Mend

babappaomega 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

babappaomega/__init__.py +1 -0
babappaomega/cli.py +31 -0
babappaomega/encoding.py +23 -0
babappaomega/inference.py +184 -0
babappaomega/models.py +52 -0
babappaomega/tree.py +27 -0
babappaomega/utils.py +25 -0
babappaomega-0.1.6.dist-info/METADATA +142 -0
babappaomega-0.1.6.dist-info/RECORD +13 -0
babappaomega-0.1.6.dist-info/WHEEL +5 -0
babappaomega-0.1.6.dist-info/entry_points.txt +2 -0
babappaomega-0.1.6.dist-info/licenses/LICENSE +21 -0
babappaomega-0.1.6.dist-info/top_level.txt +1 -0

babappaomega/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.1.0"

babappaomega/cli.py ADDED Viewed

@@ -0,0 +1,31 @@
+import argparse
+from babappaomega.inference import run_inference
+def main():
+    parser = argparse.ArgumentParser(
+        prog="babappaomega",
+        description="BABAPPAΩ: episodic branch–site selection inference"
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+    p = sub.add_parser("predict", help="Run inference on an alignment")
+    p.add_argument("--alignment", required=True, help="Codon alignment (FASTA)")
+    p.add_argument("--tree", required=True, help="Phylogenetic tree (Newick)")
+    p.add_argument("--out", required=True, help="Output JSON file")
+    p.add_argument("--device", default="auto", choices=["auto", "cpu", "cuda"])
+    p.add_argument("--model", default="frozen")
+    args = parser.parse_args()
+    if args.command == "predict":
+        run_inference(
+            alignment_path=args.alignment,
+            tree_path=args.tree,
+            out_path=args.out,
+            device=args.device,
+            model_tag=args.model,
+        )
+if __name__ == "__main__":
+    main()

babappaomega/encoding.py ADDED Viewed

@@ -0,0 +1,23 @@
+import numpy as np
+from Bio import SeqIO
+CODONS = [
+    a+b+c for a in "ACGT" for b in "ACGT" for c in "ACGT"
+    if a+b+c not in ["TAA", "TAG", "TGA"]
+]
+CODON_TO_ID = {c: i for i, c in enumerate(CODONS)}
+def encode_alignment(fasta_path):
+    records = list(SeqIO.parse(fasta_path, "fasta"))
+    ntaxa = len(records)
+    seq_len = len(records[0].seq) // 3
+    tensor = np.zeros((ntaxa, seq_len), dtype=np.int64)
+    for i, rec in enumerate(records):
+        seq = str(rec.seq)
+        for j in range(seq_len):
+            codon = seq[3*j:3*j+3]
+            tensor[i, j] = CODON_TO_ID.get(codon, 0)
+    return tensor, ntaxa, seq_len

babappaomega/inference.py ADDED Viewed

@@ -0,0 +1,184 @@
+import json
+import csv
+import os
+import torch
+import numpy as np
+from datetime import datetime
+from babappaomega.utils import resolve_device
+from babappaomega.encoding import encode_alignment
+from babappaomega.tree import load_tree, enumerate_branches
+from babappaomega.models import ensure_model
+def load_model(model_tag: str, device: torch.device):
+    """
+    Load TorchScript BABAPPAΩ model from Zenodo.
+    """
+    if model_tag != "frozen":
+        raise ValueError(
+            f"Model '{model_tag}' is not available. "
+            "Only the frozen reference model is supported."
+        )
+    model_path = ensure_model(model_tag)
+    model = torch.jit.load(model_path, map_location=device)
+    model.eval()
+    return model
+@torch.no_grad()
+def run_inference(
+    alignment_path: str,
+    tree_path: str,
+    out_path: str,
+    device: str = "auto",
+    model_tag: str = "frozen",
+):
+    """
+    Run BABAPPAΩ inference on a codon alignment and phylogenetic tree.
+    """
+    # -------------------------
+    # Device resolution
+    # -------------------------
+    device = resolve_device(device)
+    # -------------------------
+    # Load model
+    # -------------------------
+    model = load_model(model_tag, device)
+    # -------------------------
+    # Encode inputs
+    # -------------------------
+    X, ntaxa, L = encode_alignment(alignment_path)
+    tree = load_tree(tree_path)
+    branches = enumerate_branches(tree)
+    X = torch.tensor(X, dtype=torch.long, device=device).unsqueeze(0)
+    if device.type == "cpu" and ntaxa > 120:
+        print(
+            "[BABAPPAΩ WARNING] Large number of taxa detected "
+            f"(n={ntaxa}). GPU acceleration is strongly recommended."
+        )
+    # -------------------------
+    # Forward pass (per-branch)
+    # -------------------------
+    n_branches = len(branches)
+    # --- Run ONCE to determine n_regimes ---
+    branch_mask = torch.zeros(
+        (1, n_branches),
+        dtype=torch.long,
+        device=device,
+    )
+    branch_mask[0, 0] = 1
+    outputs = model(X, branch_mask)
+    det_example, regime_example, _ = outputs
+    det_example = det_example.detach().cpu().numpy()[0]
+    regime_example = regime_example.detach().cpu().numpy()[0]
+    n_regimes = regime_example.shape[-1]
+    # --- Allocate matrices ---
+    det_matrix = np.zeros((n_branches, L), dtype=float)
+    regime_matrix = np.zeros((n_branches, L, n_regimes), dtype=float)
+    # --- Fill matrices ---
+    for b in range(n_branches):
+        branch_mask.zero_()
+        branch_mask[0, b] = 1
+        outputs = model(X, branch_mask)
+        det, regime, _ = outputs
+        det = torch.sigmoid(det).detach().cpu().numpy()[0]
+        regime = regime.detach().cpu().numpy()[0]
+        det_matrix[b] = det[b]
+        regime_matrix[b] = regime[b]
+    # -------------------------
+    # Assemble results (FINAL)
+    # -------------------------
+    results = []
+    for b, branch in enumerate(branches):
+        for site in range(L):
+            ep = det_matrix[b, site]
+            regime_probs = regime_matrix[b, site]
+            regime_idx = int(np.argmax(regime_probs))
+            rp = float(np.max(regime_probs))
+            results.append(
+                {
+                    "branch": branch,
+                    "site": site + 1,
+                    "episodic_probability": round(float(ep), 6),
+                    "regime": regime_idx,
+                    "regime_probability": round(rp, 6),
+                }
+            )
+    # -------------------------
+    # Metadata (LOCKED)
+    # -------------------------
+    metadata = {
+        "engine": "BABAPPAΩ",
+        "model": model_tag,
+        "device": device.type,
+        "ntaxa": ntaxa,
+        "sites": L,
+        "n_branches": len(branches),
+        "timestamp_utc": datetime.utcnow().isoformat() + "Z",
+        "model_source": "Zenodo",
+        "model_doi": "10.5281/zenodo.18195868",
+    }
+    # -------------------------
+    # Write output
+    # -------------------------
+    ext = os.path.splitext(out_path)[1].lower()
+    if ext == ".json":
+        with open(out_path, "w") as f:
+            json.dump(
+                {"metadata": metadata, "results": results},
+                f,
+                indent=2,
+            )
+    elif ext in {".csv", ".tsv"}:
+        delimiter = "," if ext == ".csv" else "\t"
+        with open(out_path, "w", newline="") as f:
+            writer = csv.DictWriter(
+                f,
+                fieldnames=[
+                    "branch",
+                    "site",
+                    "episodic_probability",
+                    "regime",
+                    "regime_probability",
+                ],
+                delimiter=delimiter,
+            )
+            writer.writeheader()
+            writer.writerows(results)
+    else:
+        raise ValueError(
+            f"Unsupported output format '{ext}'. "
+            "Use .json, .csv, or .tsv"
+        )

babappaomega/models.py ADDED Viewed

@@ -0,0 +1,52 @@
+import hashlib
+from pathlib import Path
+import urllib.request
+from platformdirs import user_cache_dir
+ZENODO_MODELS = {
+    "frozen": {
+        "url": "https://zenodo.org/record/18195869/files/BABAPPAomega_frozen.pt",
+        "md5": "610280486be2c16fe0709d4e9ad7e28c",
+        "doi": "10.5281/zenodo.18195869"
+    }
+}
+def get_cache_dir():
+    cache = Path(user_cache_dir("babappaomega"))
+    cache.mkdir(parents=True, exist_ok=True)
+    return cache
+def md5sum(path):
+    h = hashlib.md5()
+    with open(path, "rb") as f:
+        for block in iter(lambda: f.read(8192), b""):
+            h.update(block)
+    return h.hexdigest()
+def ensure_model(model_tag="frozen"):
+    if model_tag not in ZENODO_MODELS:
+        raise ValueError(f"Unknown model tag: {model_tag}")
+    entry = ZENODO_MODELS[model_tag]
+    cache_dir = get_cache_dir()
+    model_path = cache_dir / f"BABAPPAomega_{model_tag}.pt"
+    if model_path.exists():
+        if md5sum(model_path) == entry["md5"]:
+            return model_path
+        else:
+            model_path.unlink()
+    print(
+        f"[BABAPPAΩ] Downloading model '{model_tag}' from Zenodo "
+        f"(DOI: {entry['doi']})"
+    )
+    urllib.request.urlretrieve(entry["url"], model_path)
+    if md5sum(model_path) != entry["md5"]:
+        model_path.unlink()
+        raise RuntimeError("Model download failed MD5 verification")
+    return model_path

babappaomega/tree.py ADDED Viewed

@@ -0,0 +1,27 @@
+def load_tree(tree_path):
+    """
+    Load a phylogenetic tree from Newick format.
+    ete3 is imported lazily to avoid unnecessary dependencies
+    during CLI startup.
+    """
+    try:
+        from ete3 import Tree
+    except ImportError as e:
+        raise ImportError(
+            "The 'ete3' package is required for tree handling. "
+            "Install it via: pip install ete3"
+        ) from e
+    return Tree(tree_path, format=1)
+def enumerate_branches(tree):
+    """
+    Enumerate non-root branches in a stable traversal order.
+    """
+    branches = []
+    for node in tree.traverse():
+        if not node.is_root():
+            branches.append(node.name or f"node_{id(node)}")
+    return branches

babappaomega/utils.py ADDED Viewed

@@ -0,0 +1,25 @@
+import torch
+import json
+from importlib.resources import files
+def resolve_device(requested="auto"):
+    if requested == "cuda":
+        if not torch.cuda.is_available():
+            raise RuntimeError("CUDA requested but not available.")
+        return torch.device("cuda")
+    if requested == "cpu":
+        return torch.device("cpu")
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    return torch.device("cpu")
+def get_model_path(filename):
+    return files("babappaomega.assets.models") / filename
+def load_metadata():
+    path = files("babappaomega.assets") / "metadata.json"
+    with open(path) as f:
+        return json.load(f)

babappaomega-0.1.6.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,142 @@
+Metadata-Version: 2.4
+Name: babappaomega
+Version: 0.1.6
+Summary: BABAPPAΩ: Likelihood-free branch–site inference of episodic positive selection
+Author: Krishnendu Sinha
+License: MIT
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: torch>=2.0
+Requires-Dist: numpy
+Requires-Dist: biopython
+Requires-Dist: ete3
+Requires-Dist: six
+Requires-Dist: platformdirs
+Dynamic: license-file
+# BABAPPAΩ
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.18197957.svg)](https://doi.org/10.5281/zenodo.18197957)
+BABAPPAΩ is a mechanistically grounded inference engine for detecting episodic
+positive selection under branch–site models using likelihood-free,
+amortized neural inference.
+The software provides a production-grade command-line interface for
+branch–site scans on codon alignments, with GPU-accelerated inference,
+deterministic output, and fully reproducible model distribution.
+---
+## Key Features
+- Branch–site inference of episodic positive selection
+- Likelihood-free neural inference without explicit likelihood optimization
+- GPU-first execution with automatic CPU fallback
+- Deterministic, machine-readable output
+- Clean command-line interface suitable for large-scale scans
+- Separation of inference software and trained model artifacts
+- Reviewer-safe and reproducible distribution strategy
+---
+## Installation
+Install BABAPPAΩ directly from PyPI:
+pip install babappaomega
+Python version 3.9 or later is required.
+---
+## Basic Usage
+babappaomega predict \
+  --alignment alignment.fasta \
+  --tree tree.nwk \
+  --out results.json
+Supported output formats:
+- .json
+- .csv
+- .tsv
+Each run performs an exploratory branch–site scan, conditioning on each
+branch as foreground in turn.
+---
+## Output
+The output reports results at the branch–site level and includes:
+- Posterior probability of episodic positive selection
+- Most probable evolutionary regime
+- Posterior probability of the inferred regime
+Probabilities equal to 1 indicate numerical saturation corresponding to
+near-unity posterior support, which may occur for small or low-noise
+alignments.
+All outputs follow a stable and documented schema to facilitate
+downstream filtering, visualization, and statistical analysis.
+---
+## Model Weights and Reproducibility
+The frozen reference model used by BABAPPAΩ is archived on Zenodo:
+DOI: 10.5281/zenodo.18195868
+The trained model is not bundled with the Python package. On first use:
+1. The model is downloaded automatically from Zenodo
+2. The archival checksum is verified
+3. The model is cached locally
+4. Subsequent runs reuse the cached artifact
+This design ensures:
+- Lightweight PyPI distribution
+- Transparent model provenance
+- Full reproducibility
+- Drop-in replacement for future model versions without API changes
+---
+## Performance
+Inference is GPU-accelerated when a compatible device is available and
+automatically falls back to CPU execution otherwise.
+The inference engine is designed for high-throughput exploratory scans
+across branches and sites.
+---
+## License
+This project is released under the MIT License.
+---
+## Development Status
+The inference engine, command-line interface, packaging, and model
+distribution pipeline are finalized and stable.
+Ongoing and future work focuses on:
+- Benchmarking against classical likelihood-based methods
+- Expanded documentation and worked examples
+- Large-scale empirical applications
+---
+## Citation
+A manuscript describing BABAPPAΩ is in preparation.
+Until publication, please cite the Zenodo record associated with the frozen
+reference model.

babappaomega-0.1.6.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+babappaomega/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
+babappaomega/cli.py,sha256=lf2DbypIS_0Xjm0f2PqHqFZKXcqDQDFSc8Br2L2Z7sw,1032
+babappaomega/encoding.py,sha256=-NHytWPUVbd8-mKMU1ekaHu0n4SysUsT9SbUk4wPE7Q,637
+babappaomega/inference.py,sha256=yP7jUusIUDEU94D9j-ecQLebu4oDOCt6dttjP8hUCgo,4941
+babappaomega/models.py,sha256=WadjH0itbA_oxwwxLaa3jmrW3z5-gq49IeVEZS4-zn4,1394
+babappaomega/tree.py,sha256=isfqT1MF4_0xCxJJxYakB9oEQRYv4HxuZxe1jJOT2KI,712
+babappaomega/utils.py,sha256=4CH67C2XMb10uUf14Q_UhuHNyHpxWByyI1A0GfbmwcE,665
+babappaomega-0.1.6.dist-info/licenses/LICENSE,sha256=Qauehk6ZOXz4NQdHgSJ7FBoEK6Au3jUCa1GrXIUbi7o,1073
+babappaomega-0.1.6.dist-info/METADATA,sha256=04FH1EfX5gGlG_W_RWWCsYY9LIMx4eICLPoH2Hx-dvk,3628
+babappaomega-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+babappaomega-0.1.6.dist-info/entry_points.txt,sha256=Jn1HF7Fnt_flVGSpJXG3_FrcYNFEIPWCw_J3m1GsE4M,55
+babappaomega-0.1.6.dist-info/top_level.txt,sha256=THBnC7o3L7WOrf66vkxekVJvuVcUBoq5EgdChqg_5Ik,13
+babappaomega-0.1.6.dist-info/RECORD,,

babappaomega-0.1.6.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

babappaomega-0.1.6.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ babappaomega = babappaomega.cli:main

babappaomega-0.1.6.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 KRISHNENDU SINHA
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

babappaomega-0.1.6.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ babappaomega