babappaomega 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
babappaomega/cli.py ADDED
@@ -0,0 +1,31 @@
1
+ import argparse
2
+ from babappaomega.inference import run_inference
3
+
4
+ def main():
5
+ parser = argparse.ArgumentParser(
6
+ prog="babappaomega",
7
+ description="BABAPPAΩ: episodic branch–site selection inference"
8
+ )
9
+
10
+ sub = parser.add_subparsers(dest="command", required=True)
11
+
12
+ p = sub.add_parser("predict", help="Run inference on an alignment")
13
+ p.add_argument("--alignment", required=True, help="Codon alignment (FASTA)")
14
+ p.add_argument("--tree", required=True, help="Phylogenetic tree (Newick)")
15
+ p.add_argument("--out", required=True, help="Output JSON file")
16
+ p.add_argument("--device", default="auto", choices=["auto", "cpu", "cuda"])
17
+ p.add_argument("--model", default="frozen")
18
+
19
+ args = parser.parse_args()
20
+
21
+ if args.command == "predict":
22
+ run_inference(
23
+ alignment_path=args.alignment,
24
+ tree_path=args.tree,
25
+ out_path=args.out,
26
+ device=args.device,
27
+ model_tag=args.model,
28
+ )
29
+
30
+ if __name__ == "__main__":
31
+ main()
@@ -0,0 +1,23 @@
1
+ import numpy as np
2
+ from Bio import SeqIO
3
+
4
+ CODONS = [
5
+ a+b+c for a in "ACGT" for b in "ACGT" for c in "ACGT"
6
+ if a+b+c not in ["TAA", "TAG", "TGA"]
7
+ ]
8
+ CODON_TO_ID = {c: i for i, c in enumerate(CODONS)}
9
+
10
+ def encode_alignment(fasta_path):
11
+ records = list(SeqIO.parse(fasta_path, "fasta"))
12
+ ntaxa = len(records)
13
+ seq_len = len(records[0].seq) // 3
14
+
15
+ tensor = np.zeros((ntaxa, seq_len), dtype=np.int64)
16
+
17
+ for i, rec in enumerate(records):
18
+ seq = str(rec.seq)
19
+ for j in range(seq_len):
20
+ codon = seq[3*j:3*j+3]
21
+ tensor[i, j] = CODON_TO_ID.get(codon, 0)
22
+
23
+ return tensor, ntaxa, seq_len
@@ -0,0 +1,184 @@
1
+ import json
2
+ import csv
3
+ import os
4
+ import torch
5
+ import numpy as np
6
+ from datetime import datetime
7
+
8
+ from babappaomega.utils import resolve_device
9
+ from babappaomega.encoding import encode_alignment
10
+ from babappaomega.tree import load_tree, enumerate_branches
11
+ from babappaomega.models import ensure_model
12
+
13
+
14
+ def load_model(model_tag: str, device: torch.device):
15
+ """
16
+ Load TorchScript BABAPPAΩ model from Zenodo.
17
+ """
18
+
19
+ if model_tag != "frozen":
20
+ raise ValueError(
21
+ f"Model '{model_tag}' is not available. "
22
+ "Only the frozen reference model is supported."
23
+ )
24
+
25
+ model_path = ensure_model(model_tag)
26
+
27
+ model = torch.jit.load(model_path, map_location=device)
28
+ model.eval()
29
+
30
+ return model
31
+
32
+
33
+
34
+ @torch.no_grad()
35
+ def run_inference(
36
+ alignment_path: str,
37
+ tree_path: str,
38
+ out_path: str,
39
+ device: str = "auto",
40
+ model_tag: str = "frozen",
41
+ ):
42
+ """
43
+ Run BABAPPAΩ inference on a codon alignment and phylogenetic tree.
44
+ """
45
+
46
+ # -------------------------
47
+ # Device resolution
48
+ # -------------------------
49
+ device = resolve_device(device)
50
+
51
+ # -------------------------
52
+ # Load model
53
+ # -------------------------
54
+ model = load_model(model_tag, device)
55
+
56
+ # -------------------------
57
+ # Encode inputs
58
+ # -------------------------
59
+ X, ntaxa, L = encode_alignment(alignment_path)
60
+ tree = load_tree(tree_path)
61
+ branches = enumerate_branches(tree)
62
+
63
+ X = torch.tensor(X, dtype=torch.long, device=device).unsqueeze(0)
64
+
65
+ if device.type == "cpu" and ntaxa > 120:
66
+ print(
67
+ "[BABAPPAΩ WARNING] Large number of taxa detected "
68
+ f"(n={ntaxa}). GPU acceleration is strongly recommended."
69
+ )
70
+
71
+ # -------------------------
72
+ # Forward pass (per-branch)
73
+ # -------------------------
74
+ n_branches = len(branches)
75
+
76
+ # --- Run ONCE to determine n_regimes ---
77
+ branch_mask = torch.zeros(
78
+ (1, n_branches),
79
+ dtype=torch.long,
80
+ device=device,
81
+ )
82
+ branch_mask[0, 0] = 1
83
+
84
+ outputs = model(X, branch_mask)
85
+
86
+ det_example, regime_example, _ = outputs
87
+
88
+ det_example = det_example.detach().cpu().numpy()[0]
89
+ regime_example = regime_example.detach().cpu().numpy()[0]
90
+
91
+ n_regimes = regime_example.shape[-1]
92
+
93
+ # --- Allocate matrices ---
94
+ det_matrix = np.zeros((n_branches, L), dtype=float)
95
+ regime_matrix = np.zeros((n_branches, L, n_regimes), dtype=float)
96
+
97
+ # --- Fill matrices ---
98
+ for b in range(n_branches):
99
+ branch_mask.zero_()
100
+ branch_mask[0, b] = 1
101
+
102
+ outputs = model(X, branch_mask)
103
+ det, regime, _ = outputs
104
+
105
+ det = torch.sigmoid(det).detach().cpu().numpy()[0]
106
+ regime = regime.detach().cpu().numpy()[0]
107
+
108
+ det_matrix[b] = det[b]
109
+ regime_matrix[b] = regime[b]
110
+
111
+
112
+ # -------------------------
113
+ # Assemble results (FINAL)
114
+ # -------------------------
115
+ results = []
116
+ for b, branch in enumerate(branches):
117
+ for site in range(L):
118
+ ep = det_matrix[b, site]
119
+ regime_probs = regime_matrix[b, site]
120
+
121
+ regime_idx = int(np.argmax(regime_probs))
122
+ rp = float(np.max(regime_probs))
123
+
124
+ results.append(
125
+ {
126
+ "branch": branch,
127
+ "site": site + 1,
128
+ "episodic_probability": round(float(ep), 6),
129
+ "regime": regime_idx,
130
+ "regime_probability": round(rp, 6),
131
+ }
132
+ )
133
+
134
+
135
+ # -------------------------
136
+ # Metadata (LOCKED)
137
+ # -------------------------
138
+ metadata = {
139
+ "engine": "BABAPPAΩ",
140
+ "model": model_tag,
141
+ "device": device.type,
142
+ "ntaxa": ntaxa,
143
+ "sites": L,
144
+ "n_branches": len(branches),
145
+ "timestamp_utc": datetime.utcnow().isoformat() + "Z",
146
+ "model_source": "Zenodo",
147
+ "model_doi": "10.5281/zenodo.18195868",
148
+ }
149
+
150
+ # -------------------------
151
+ # Write output
152
+ # -------------------------
153
+ ext = os.path.splitext(out_path)[1].lower()
154
+
155
+ if ext == ".json":
156
+ with open(out_path, "w") as f:
157
+ json.dump(
158
+ {"metadata": metadata, "results": results},
159
+ f,
160
+ indent=2,
161
+ )
162
+
163
+ elif ext in {".csv", ".tsv"}:
164
+ delimiter = "," if ext == ".csv" else "\t"
165
+ with open(out_path, "w", newline="") as f:
166
+ writer = csv.DictWriter(
167
+ f,
168
+ fieldnames=[
169
+ "branch",
170
+ "site",
171
+ "episodic_probability",
172
+ "regime",
173
+ "regime_probability",
174
+ ],
175
+ delimiter=delimiter,
176
+ )
177
+ writer.writeheader()
178
+ writer.writerows(results)
179
+
180
+ else:
181
+ raise ValueError(
182
+ f"Unsupported output format '{ext}'. "
183
+ "Use .json, .csv, or .tsv"
184
+ )
babappaomega/models.py ADDED
@@ -0,0 +1,52 @@
1
+ import hashlib
2
+ from pathlib import Path
3
+ import urllib.request
4
+
5
+ from platformdirs import user_cache_dir
6
+
7
+ ZENODO_MODELS = {
8
+ "frozen": {
9
+ "url": "https://zenodo.org/record/18195869/files/BABAPPAomega_frozen.pt",
10
+ "md5": "610280486be2c16fe0709d4e9ad7e28c",
11
+ "doi": "10.5281/zenodo.18195869"
12
+ }
13
+ }
14
+
15
+ def get_cache_dir():
16
+ cache = Path(user_cache_dir("babappaomega"))
17
+ cache.mkdir(parents=True, exist_ok=True)
18
+ return cache
19
+
20
+ def md5sum(path):
21
+ h = hashlib.md5()
22
+ with open(path, "rb") as f:
23
+ for block in iter(lambda: f.read(8192), b""):
24
+ h.update(block)
25
+ return h.hexdigest()
26
+
27
+ def ensure_model(model_tag="frozen"):
28
+ if model_tag not in ZENODO_MODELS:
29
+ raise ValueError(f"Unknown model tag: {model_tag}")
30
+
31
+ entry = ZENODO_MODELS[model_tag]
32
+ cache_dir = get_cache_dir()
33
+ model_path = cache_dir / f"BABAPPAomega_{model_tag}.pt"
34
+
35
+ if model_path.exists():
36
+ if md5sum(model_path) == entry["md5"]:
37
+ return model_path
38
+ else:
39
+ model_path.unlink()
40
+
41
+ print(
42
+ f"[BABAPPAΩ] Downloading model '{model_tag}' from Zenodo "
43
+ f"(DOI: {entry['doi']})"
44
+ )
45
+
46
+ urllib.request.urlretrieve(entry["url"], model_path)
47
+
48
+ if md5sum(model_path) != entry["md5"]:
49
+ model_path.unlink()
50
+ raise RuntimeError("Model download failed MD5 verification")
51
+
52
+ return model_path
babappaomega/tree.py ADDED
@@ -0,0 +1,27 @@
1
+ def load_tree(tree_path):
2
+ """
3
+ Load a phylogenetic tree from Newick format.
4
+
5
+ ete3 is imported lazily to avoid unnecessary dependencies
6
+ during CLI startup.
7
+ """
8
+ try:
9
+ from ete3 import Tree
10
+ except ImportError as e:
11
+ raise ImportError(
12
+ "The 'ete3' package is required for tree handling. "
13
+ "Install it via: pip install ete3"
14
+ ) from e
15
+
16
+ return Tree(tree_path, format=1)
17
+
18
+
19
+ def enumerate_branches(tree):
20
+ """
21
+ Enumerate non-root branches in a stable traversal order.
22
+ """
23
+ branches = []
24
+ for node in tree.traverse():
25
+ if not node.is_root():
26
+ branches.append(node.name or f"node_{id(node)}")
27
+ return branches
babappaomega/utils.py ADDED
@@ -0,0 +1,25 @@
1
+ import torch
2
+ import json
3
+ from importlib.resources import files
4
+
5
+ def resolve_device(requested="auto"):
6
+ if requested == "cuda":
7
+ if not torch.cuda.is_available():
8
+ raise RuntimeError("CUDA requested but not available.")
9
+ return torch.device("cuda")
10
+
11
+ if requested == "cpu":
12
+ return torch.device("cpu")
13
+
14
+ if torch.cuda.is_available():
15
+ return torch.device("cuda")
16
+
17
+ return torch.device("cpu")
18
+
19
+ def get_model_path(filename):
20
+ return files("babappaomega.assets.models") / filename
21
+
22
+ def load_metadata():
23
+ path = files("babappaomega.assets") / "metadata.json"
24
+ with open(path) as f:
25
+ return json.load(f)
@@ -0,0 +1,142 @@
1
+ Metadata-Version: 2.4
2
+ Name: babappaomega
3
+ Version: 0.1.6
4
+ Summary: BABAPPAΩ: Likelihood-free branch–site inference of episodic positive selection
5
+ Author: Krishnendu Sinha
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: torch>=2.0
11
+ Requires-Dist: numpy
12
+ Requires-Dist: biopython
13
+ Requires-Dist: ete3
14
+ Requires-Dist: six
15
+ Requires-Dist: platformdirs
16
+ Dynamic: license-file
17
+
18
+ # BABAPPAΩ
19
+
20
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.18197957.svg)](https://doi.org/10.5281/zenodo.18197957)
21
+
22
+ BABAPPAΩ is a mechanistically grounded inference engine for detecting episodic
23
+ positive selection under branch–site models using likelihood-free,
24
+ amortized neural inference.
25
+
26
+ The software provides a production-grade command-line interface for
27
+ branch–site scans on codon alignments, with GPU-accelerated inference,
28
+ deterministic output, and fully reproducible model distribution.
29
+
30
+ ---
31
+
32
+ ## Key Features
33
+
34
+ - Branch–site inference of episodic positive selection
35
+ - Likelihood-free neural inference without explicit likelihood optimization
36
+ - GPU-first execution with automatic CPU fallback
37
+ - Deterministic, machine-readable output
38
+ - Clean command-line interface suitable for large-scale scans
39
+ - Separation of inference software and trained model artifacts
40
+ - Reviewer-safe and reproducible distribution strategy
41
+
42
+ ---
43
+
44
+ ## Installation
45
+
46
+ Install BABAPPAΩ directly from PyPI:
47
+
48
+ pip install babappaomega
49
+
50
+ Python version 3.9 or later is required.
51
+
52
+ ---
53
+
54
+ ## Basic Usage
55
+
56
+ babappaomega predict \
57
+ --alignment alignment.fasta \
58
+ --tree tree.nwk \
59
+ --out results.json
60
+
61
+ Supported output formats:
62
+ - .json
63
+ - .csv
64
+ - .tsv
65
+
66
+ Each run performs an exploratory branch–site scan, conditioning on each
67
+ branch as foreground in turn.
68
+
69
+ ---
70
+
71
+ ## Output
72
+
73
+ The output reports results at the branch–site level and includes:
74
+
75
+ - Posterior probability of episodic positive selection
76
+ - Most probable evolutionary regime
77
+ - Posterior probability of the inferred regime
78
+
79
+ Probabilities equal to 1 indicate numerical saturation corresponding to
80
+ near-unity posterior support, which may occur for small or low-noise
81
+ alignments.
82
+
83
+ All outputs follow a stable and documented schema to facilitate
84
+ downstream filtering, visualization, and statistical analysis.
85
+
86
+ ---
87
+
88
+ ## Model Weights and Reproducibility
89
+
90
+ The frozen reference model used by BABAPPAΩ is archived on Zenodo:
91
+
92
+ DOI: 10.5281/zenodo.18195868
93
+
94
+ The trained model is not bundled with the Python package. On first use:
95
+
96
+ 1. The model is downloaded automatically from Zenodo
97
+ 2. The archival checksum is verified
98
+ 3. The model is cached locally
99
+ 4. Subsequent runs reuse the cached artifact
100
+
101
+ This design ensures:
102
+ - Lightweight PyPI distribution
103
+ - Transparent model provenance
104
+ - Full reproducibility
105
+ - Drop-in replacement for future model versions without API changes
106
+
107
+ ---
108
+
109
+ ## Performance
110
+
111
+ Inference is GPU-accelerated when a compatible device is available and
112
+ automatically falls back to CPU execution otherwise.
113
+
114
+ The inference engine is designed for high-throughput exploratory scans
115
+ across branches and sites.
116
+
117
+ ---
118
+
119
+ ## License
120
+
121
+ This project is released under the MIT License.
122
+
123
+ ---
124
+
125
+ ## Development Status
126
+
127
+ The inference engine, command-line interface, packaging, and model
128
+ distribution pipeline are finalized and stable.
129
+
130
+ Ongoing and future work focuses on:
131
+ - Benchmarking against classical likelihood-based methods
132
+ - Expanded documentation and worked examples
133
+ - Large-scale empirical applications
134
+
135
+ ---
136
+
137
+ ## Citation
138
+
139
+ A manuscript describing BABAPPAΩ is in preparation.
140
+
141
+ Until publication, please cite the Zenodo record associated with the frozen
142
+ reference model.
@@ -0,0 +1,13 @@
1
+ babappaomega/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
2
+ babappaomega/cli.py,sha256=lf2DbypIS_0Xjm0f2PqHqFZKXcqDQDFSc8Br2L2Z7sw,1032
3
+ babappaomega/encoding.py,sha256=-NHytWPUVbd8-mKMU1ekaHu0n4SysUsT9SbUk4wPE7Q,637
4
+ babappaomega/inference.py,sha256=yP7jUusIUDEU94D9j-ecQLebu4oDOCt6dttjP8hUCgo,4941
5
+ babappaomega/models.py,sha256=WadjH0itbA_oxwwxLaa3jmrW3z5-gq49IeVEZS4-zn4,1394
6
+ babappaomega/tree.py,sha256=isfqT1MF4_0xCxJJxYakB9oEQRYv4HxuZxe1jJOT2KI,712
7
+ babappaomega/utils.py,sha256=4CH67C2XMb10uUf14Q_UhuHNyHpxWByyI1A0GfbmwcE,665
8
+ babappaomega-0.1.6.dist-info/licenses/LICENSE,sha256=Qauehk6ZOXz4NQdHgSJ7FBoEK6Au3jUCa1GrXIUbi7o,1073
9
+ babappaomega-0.1.6.dist-info/METADATA,sha256=04FH1EfX5gGlG_W_RWWCsYY9LIMx4eICLPoH2Hx-dvk,3628
10
+ babappaomega-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ babappaomega-0.1.6.dist-info/entry_points.txt,sha256=Jn1HF7Fnt_flVGSpJXG3_FrcYNFEIPWCw_J3m1GsE4M,55
12
+ babappaomega-0.1.6.dist-info/top_level.txt,sha256=THBnC7o3L7WOrf66vkxekVJvuVcUBoq5EgdChqg_5Ik,13
13
+ babappaomega-0.1.6.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ babappaomega = babappaomega.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 KRISHNENDU SINHA
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ babappaomega