babappai 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- babappai-2.0.0/LICENSE +21 -0
- babappai-2.0.0/MANIFEST.in +8 -0
- babappai-2.0.0/PKG-INFO +220 -0
- babappai-2.0.0/README.md +181 -0
- babappai-2.0.0/babappai/__init__.py +20 -0
- babappai-2.0.0/babappai/assets/__init__.py +0 -0
- babappai-2.0.0/babappai/assets/metadata.json +8 -0
- babappai-2.0.0/babappai/calibration/__init__.py +100 -0
- babappai-2.0.0/babappai/calibration/neutral_generator_adapter.py +100 -0
- babappai-2.0.0/babappai/cli.py +472 -0
- babappai-2.0.0/babappai/data/__init__.py +0 -0
- babappai-2.0.0/babappai/data/neutral_reference_frozen.json +82 -0
- babappai-2.0.0/babappai/encoding.py +60 -0
- babappai-2.0.0/babappai/identifiability.py +42 -0
- babappai-2.0.0/babappai/inference.py +329 -0
- babappai-2.0.0/babappai/interpret.py +135 -0
- babappai-2.0.0/babappai/metadata.py +20 -0
- babappai-2.0.0/babappai/model_manager.py +165 -0
- babappai-2.0.0/babappai/models.py +13 -0
- babappai-2.0.0/babappai/py.typed +0 -0
- babappai-2.0.0/babappai/run_pipeline.py +248 -0
- babappai-2.0.0/babappai/tree.py +113 -0
- babappai-2.0.0/babappai/tree_calibration.py +65 -0
- babappai-2.0.0/babappai/tree_neutral.py +54 -0
- babappai-2.0.0/babappai/utils.py +160 -0
- babappai-2.0.0/babappai/validation/__init__.py +1 -0
- babappai-2.0.0/babappai/validation/empirical_validation.py +308 -0
- babappai-2.0.0/babappai/validation/orthogroup_qc.py +192 -0
- babappai-2.0.0/babappai/validation/orthogroup_selection.py +237 -0
- babappai-2.0.0/babappai/validation/simulator_adapter.py +119 -0
- babappai-2.0.0/babappai/validation/synthetic_validation.py +193 -0
- babappai-2.0.0/babappai/validation/validation_plots.py +52 -0
- babappai-2.0.0/babappai/validation/validation_reporting.py +159 -0
- babappai-2.0.0/babappai.egg-info/PKG-INFO +220 -0
- babappai-2.0.0/babappai.egg-info/SOURCES.txt +64 -0
- babappai-2.0.0/babappai.egg-info/dependency_links.txt +1 -0
- babappai-2.0.0/babappai.egg-info/entry_points.txt +3 -0
- babappai-2.0.0/babappai.egg-info/requires.txt +15 -0
- babappai-2.0.0/babappai.egg-info/top_level.txt +2 -0
- babappai-2.0.0/babappaomega/__init__.py +13 -0
- babappai-2.0.0/babappaomega/assets/__init__.py +0 -0
- babappai-2.0.0/babappaomega/assets/metadata.json +7 -0
- babappai-2.0.0/babappaomega/calibration.py +3 -0
- babappai-2.0.0/babappaomega/cli.py +18 -0
- babappai-2.0.0/babappaomega/data/__init__.py +0 -0
- babappai-2.0.0/babappaomega/data/neutral_reference_frozen.json +82 -0
- babappai-2.0.0/babappaomega/encoding.py +3 -0
- babappai-2.0.0/babappaomega/identifiability.py +3 -0
- babappai-2.0.0/babappaomega/inference.py +3 -0
- babappai-2.0.0/babappaomega/interpret.py +3 -0
- babappai-2.0.0/babappaomega/metadata.py +3 -0
- babappai-2.0.0/babappaomega/models.py +3 -0
- babappai-2.0.0/babappaomega/py.typed +0 -0
- babappai-2.0.0/babappaomega/tree.py +3 -0
- babappai-2.0.0/babappaomega/tree_calibration.py +3 -0
- babappai-2.0.0/babappaomega/tree_neutral.py +3 -0
- babappai-2.0.0/babappaomega/utils.py +3 -0
- babappai-2.0.0/pyproject.toml +92 -0
- babappai-2.0.0/setup.cfg +4 -0
- babappai-2.0.0/tests/test_adapters.py +61 -0
- babappai-2.0.0/tests/test_cli.py +154 -0
- babappai-2.0.0/tests/test_identifiability.py +27 -0
- babappai-2.0.0/tests/test_model_manager.py +73 -0
- babappai-2.0.0/tests/test_orthogroup_selection.py +62 -0
- babappai-2.0.0/tests/test_package_alias.py +22 -0
- babappai-2.0.0/tests/test_validation_report.py +30 -0
babappai-2.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 KRISHNENDU SINHA
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
include README.md
|
|
2
|
+
include LICENSE
|
|
3
|
+
include babappai/py.typed
|
|
4
|
+
include babappaomega/py.typed
|
|
5
|
+
recursive-include babappai/data *.json
|
|
6
|
+
recursive-include babappai/assets *.json
|
|
7
|
+
recursive-include babappaomega/data *.json
|
|
8
|
+
recursive-include babappaomega/assets *.json
|
babappai-2.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: babappai
|
|
3
|
+
Version: 2.0.0
|
|
4
|
+
Summary: BABAPPAi: diagnostic framework for identifiability of episodic branch-site structure
|
|
5
|
+
Author: Krishnendu Sinha
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/krishnendusinha/babappai
|
|
8
|
+
Project-URL: Documentation, https://github.com/krishnendusinha/babappai
|
|
9
|
+
Project-URL: Source, https://github.com/krishnendusinha/babappai
|
|
10
|
+
Project-URL: LegacySoftwareDOI, https://doi.org/10.5281/zenodo.18520163
|
|
11
|
+
Project-URL: LegacyModelDOI, https://doi.org/10.5281/zenodo.18195869
|
|
12
|
+
Keywords: episodic selection,branch-site,identifiability,evolutionary genomics,likelihood-free inference,deep learning
|
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Operating System :: OS Independent
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: torch>=2.0
|
|
26
|
+
Requires-Dist: numpy
|
|
27
|
+
Requires-Dist: biopython
|
|
28
|
+
Requires-Dist: ete3
|
|
29
|
+
Requires-Dist: six
|
|
30
|
+
Requires-Dist: platformdirs
|
|
31
|
+
Provides-Extra: test
|
|
32
|
+
Requires-Dist: pytest>=8; extra == "test"
|
|
33
|
+
Requires-Dist: pytest-mock>=3.14; extra == "test"
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
36
|
+
Requires-Dist: twine>=5; extra == "dev"
|
|
37
|
+
Requires-Dist: pytest>=8; extra == "dev"
|
|
38
|
+
Dynamic: license-file
|
|
39
|
+
|
|
40
|
+
# BABAPPAi (`babappai`)
|
|
41
|
+
|
|
42
|
+
BABAPPAi is the renamed continuation of the BABAPPAΩ codebase.
|
|
43
|
+
It is a diagnostic software/manuscript ecosystem for identifiability of episodic branch-site structure.
|
|
44
|
+
|
|
45
|
+
## 1) What BABAPPAi is
|
|
46
|
+
|
|
47
|
+
BABAPPAi is a likelihood-free diagnostic framework that estimates whether episodic branch-site structure is statistically identifiable from finite coding-sequence alignments.
|
|
48
|
+
|
|
49
|
+
## 2) What it is not
|
|
50
|
+
|
|
51
|
+
- Not a classical dN/dS estimator.
|
|
52
|
+
- Not a likelihood-ratio branch-site test.
|
|
53
|
+
- Not definitive proof of adaptive substitution.
|
|
54
|
+
|
|
55
|
+
## 3) Installation
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install babappai
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Optional CLI-centric install:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pipx install babappai
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## 4) Quickstart
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
babappai model fetch
|
|
71
|
+
babappai example write --outdir demo
|
|
72
|
+
babappai run --alignment demo/aln.fasta --tree demo/tree.nwk --outdir demo_out
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## 5) CLI reference
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
babappai --help
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Core commands:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
babappai run --alignment aln.fasta --tree tree.nwk --outdir results
|
|
85
|
+
babappai run --alignment aln.fasta --tree tree.nwk --outdir results --tree-calibration
|
|
86
|
+
babappai model fetch
|
|
87
|
+
babappai model status
|
|
88
|
+
babappai model verify
|
|
89
|
+
babappai doctor
|
|
90
|
+
babappai example write --outdir demo
|
|
91
|
+
babappai validate orthogroups select --input ORTHOGROUP_DIR --outdir selection_out
|
|
92
|
+
babappai validate orthogroups run --input selection_out --outdir empirical_out
|
|
93
|
+
babappai validate synthetic run --simulator scripts/simulator.py --neutral-generator scripts/generate_neutral_calibration.py --outdir synthetic_out
|
|
94
|
+
babappai validate report --input validation_root --outdir report_out
|
|
95
|
+
babappai version
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## 6) Output files explained
|
|
99
|
+
|
|
100
|
+
`babappai run ...` emits:
|
|
101
|
+
|
|
102
|
+
- `results.json`
|
|
103
|
+
- `branch_summary.tsv`
|
|
104
|
+
- `site_summary.tsv`
|
|
105
|
+
- `interpretation.txt`
|
|
106
|
+
- `run_metadata.json`
|
|
107
|
+
|
|
108
|
+
Validation workflows emit additional summaries/reports (selection, empirical, synthetic, master report outputs).
|
|
109
|
+
|
|
110
|
+
## 7) Interpretation of `EII_z` and `EII_01`
|
|
111
|
+
|
|
112
|
+
- `EII_z`: calibrated raw identifiability score.
|
|
113
|
+
- `EII_01`: bounded companion score in `[0,1]`.
|
|
114
|
+
|
|
115
|
+
Deterministic transform:
|
|
116
|
+
|
|
117
|
+
```text
|
|
118
|
+
EII_01 = sigmoid(EII_z)
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## 8) Definitive identifiability regimes
|
|
122
|
+
|
|
123
|
+
- `0.00 <= EII_01 < 0.30` -> `not_identifiable`
|
|
124
|
+
- `0.30 <= EII_01 < 0.70` -> `weak_or_ambiguous`
|
|
125
|
+
- `0.70 <= EII_01 < 0.90` -> `identifiable`
|
|
126
|
+
- `0.90 <= EII_01 <= 1.00` -> `strongly_identifiable`
|
|
127
|
+
|
|
128
|
+
Also emitted everywhere:
|
|
129
|
+
|
|
130
|
+
- `identifiable_bool = (EII_01 >= 0.70)`
|
|
131
|
+
- `identifiability_extent`
|
|
132
|
+
|
|
133
|
+
## 9) Legacy model download/cache/checksum/provenance notes
|
|
134
|
+
|
|
135
|
+
The currently configured frozen model is a **legacy BABAPPAΩ model asset** used for backward-compatible inference in BABAPPAi.
|
|
136
|
+
|
|
137
|
+
- model file: `babappaomega.pt`
|
|
138
|
+
- legacy model DOI: `10.5281/zenodo.18195869`
|
|
139
|
+
- URL: `https://zenodo.org/records/18195869/files/babappaomega.pt?download=1`
|
|
140
|
+
- SHA-256: `657a662563af31304abcb208fc903d2770a9184632a9bab2095db4c538fed8eb`
|
|
141
|
+
|
|
142
|
+
Cache uses `platformdirs.user_cache_dir("babappai")` (or `BABAPPAI_CACHE_DIR`).
|
|
143
|
+
Checksum verification is always enforced.
|
|
144
|
+
|
|
145
|
+
## 10) Validation and benchmarking
|
|
146
|
+
|
|
147
|
+
Validation includes:
|
|
148
|
+
|
|
149
|
+
- empirical orthogroup-based validation
|
|
150
|
+
- simulator-driven synthetic benchmarking
|
|
151
|
+
- unified report generation with figures/tables
|
|
152
|
+
|
|
153
|
+
## 11) Orthogroup selection workflow
|
|
154
|
+
|
|
155
|
+
Deterministic top-100 selection with hard filters and anti-cherry-picking stratification:
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
babappai validate orthogroups select --input ORTHOGROUP_DIR --outdir selection_out
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
Outputs:
|
|
162
|
+
|
|
163
|
+
- `selected_100_orthogroups.tsv`
|
|
164
|
+
- `rejected_orthogroups.tsv`
|
|
165
|
+
- `orthogroup_qc_metrics.tsv`
|
|
166
|
+
- `orthogroup_selection_report.txt`
|
|
167
|
+
- `selection_metadata.json`
|
|
168
|
+
|
|
169
|
+
## 12) Synthetic benchmarking using the supplied simulator
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
babappai validate synthetic run \
|
|
173
|
+
--simulator scripts/simulator.py \
|
|
174
|
+
--outdir synthetic_out \
|
|
175
|
+
--grid-config demo/synthetic_grid.json
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
Optional neutral generator integration:
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
babappai validate synthetic run \
|
|
182
|
+
--simulator scripts/simulator.py \
|
|
183
|
+
--neutral-generator scripts/generate_neutral_calibration.py \
|
|
184
|
+
--outdir synthetic_out
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## 13) Neutral calibration generator integration
|
|
188
|
+
|
|
189
|
+
BABAPPAi includes an adapter for external neutral calibration generators:
|
|
190
|
+
|
|
191
|
+
- module: `babappai/calibration/neutral_generator_adapter.py`
|
|
192
|
+
- CLI integration: `--neutral-generator PATH`
|
|
193
|
+
- metadata logging includes script path, attempted command, seed, and output reference file.
|
|
194
|
+
|
|
195
|
+
## 14) Reproducibility/version metadata
|
|
196
|
+
|
|
197
|
+
Run/validation outputs include software version, command string, model DOI/SHA/cache path, and calibration metadata.
|
|
198
|
+
|
|
199
|
+
## 15) Citation
|
|
200
|
+
|
|
201
|
+
Cite BABAPPAi software release/version and, while legacy frozen model assets are still used, cite the legacy model DOI.
|
|
202
|
+
|
|
203
|
+
Legacy records currently referenced for provenance:
|
|
204
|
+
|
|
205
|
+
- legacy software record: `https://zenodo.org/records/18520163`
|
|
206
|
+
- legacy frozen model record: `https://zenodo.org/records/18195869`
|
|
207
|
+
|
|
208
|
+
## 16) Legacy BABAPPAΩ compatibility note
|
|
209
|
+
|
|
210
|
+
BABAPPAi is the renamed continuation of BABAPPAΩ.
|
|
211
|
+
Some legacy artifacts and DOI records still carry the BABAPPAΩ naming. These are retained for provenance and reproducibility and are explicitly marked as legacy assets.
|
|
212
|
+
|
|
213
|
+
## Development quick commands
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
pip install -e .[test]
|
|
217
|
+
pytest
|
|
218
|
+
python -m build --sdist --wheel
|
|
219
|
+
python -m twine check dist/*
|
|
220
|
+
```
|
babappai-2.0.0/README.md
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
# BABAPPAi (`babappai`)
|
|
2
|
+
|
|
3
|
+
BABAPPAi is the renamed continuation of the BABAPPAΩ codebase.
|
|
4
|
+
It is a diagnostic software/manuscript ecosystem for identifiability of episodic branch-site structure.
|
|
5
|
+
|
|
6
|
+
## 1) What BABAPPAi is
|
|
7
|
+
|
|
8
|
+
BABAPPAi is a likelihood-free diagnostic framework that estimates whether episodic branch-site structure is statistically identifiable from finite coding-sequence alignments.
|
|
9
|
+
|
|
10
|
+
## 2) What it is not
|
|
11
|
+
|
|
12
|
+
- Not a classical dN/dS estimator.
|
|
13
|
+
- Not a likelihood-ratio branch-site test.
|
|
14
|
+
- Not definitive proof of adaptive substitution.
|
|
15
|
+
|
|
16
|
+
## 3) Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install babappai
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Optional CLI-centric install:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pipx install babappai
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## 4) Quickstart
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
babappai model fetch
|
|
32
|
+
babappai example write --outdir demo
|
|
33
|
+
babappai run --alignment demo/aln.fasta --tree demo/tree.nwk --outdir demo_out
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## 5) CLI reference
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
babappai --help
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Core commands:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
babappai run --alignment aln.fasta --tree tree.nwk --outdir results
|
|
46
|
+
babappai run --alignment aln.fasta --tree tree.nwk --outdir results --tree-calibration
|
|
47
|
+
babappai model fetch
|
|
48
|
+
babappai model status
|
|
49
|
+
babappai model verify
|
|
50
|
+
babappai doctor
|
|
51
|
+
babappai example write --outdir demo
|
|
52
|
+
babappai validate orthogroups select --input ORTHOGROUP_DIR --outdir selection_out
|
|
53
|
+
babappai validate orthogroups run --input selection_out --outdir empirical_out
|
|
54
|
+
babappai validate synthetic run --simulator scripts/simulator.py --neutral-generator scripts/generate_neutral_calibration.py --outdir synthetic_out
|
|
55
|
+
babappai validate report --input validation_root --outdir report_out
|
|
56
|
+
babappai version
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## 6) Output files explained
|
|
60
|
+
|
|
61
|
+
`babappai run ...` emits:
|
|
62
|
+
|
|
63
|
+
- `results.json`
|
|
64
|
+
- `branch_summary.tsv`
|
|
65
|
+
- `site_summary.tsv`
|
|
66
|
+
- `interpretation.txt`
|
|
67
|
+
- `run_metadata.json`
|
|
68
|
+
|
|
69
|
+
Validation workflows emit additional summaries/reports (selection, empirical, synthetic, master report outputs).
|
|
70
|
+
|
|
71
|
+
## 7) Interpretation of `EII_z` and `EII_01`
|
|
72
|
+
|
|
73
|
+
- `EII_z`: calibrated raw identifiability score.
|
|
74
|
+
- `EII_01`: bounded companion score in `[0,1]`.
|
|
75
|
+
|
|
76
|
+
Deterministic transform:
|
|
77
|
+
|
|
78
|
+
```text
|
|
79
|
+
EII_01 = sigmoid(EII_z)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## 8) Definitive identifiability regimes
|
|
83
|
+
|
|
84
|
+
- `0.00 <= EII_01 < 0.30` -> `not_identifiable`
|
|
85
|
+
- `0.30 <= EII_01 < 0.70` -> `weak_or_ambiguous`
|
|
86
|
+
- `0.70 <= EII_01 < 0.90` -> `identifiable`
|
|
87
|
+
- `0.90 <= EII_01 <= 1.00` -> `strongly_identifiable`
|
|
88
|
+
|
|
89
|
+
Also emitted everywhere:
|
|
90
|
+
|
|
91
|
+
- `identifiable_bool = (EII_01 >= 0.70)`
|
|
92
|
+
- `identifiability_extent`
|
|
93
|
+
|
|
94
|
+
## 9) Legacy model download/cache/checksum/provenance notes
|
|
95
|
+
|
|
96
|
+
The currently configured frozen model is a **legacy BABAPPAΩ model asset** used for backward-compatible inference in BABAPPAi.
|
|
97
|
+
|
|
98
|
+
- model file: `babappaomega.pt`
|
|
99
|
+
- legacy model DOI: `10.5281/zenodo.18195869`
|
|
100
|
+
- URL: `https://zenodo.org/records/18195869/files/babappaomega.pt?download=1`
|
|
101
|
+
- SHA-256: `657a662563af31304abcb208fc903d2770a9184632a9bab2095db4c538fed8eb`
|
|
102
|
+
|
|
103
|
+
Cache uses `platformdirs.user_cache_dir("babappai")` (or `BABAPPAI_CACHE_DIR`).
|
|
104
|
+
Checksum verification is always enforced.
|
|
105
|
+
|
|
106
|
+
## 10) Validation and benchmarking
|
|
107
|
+
|
|
108
|
+
Validation includes:
|
|
109
|
+
|
|
110
|
+
- empirical orthogroup-based validation
|
|
111
|
+
- simulator-driven synthetic benchmarking
|
|
112
|
+
- unified report generation with figures/tables
|
|
113
|
+
|
|
114
|
+
## 11) Orthogroup selection workflow
|
|
115
|
+
|
|
116
|
+
Deterministic top-100 selection with hard filters and anti-cherry-picking stratification:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
babappai validate orthogroups select --input ORTHOGROUP_DIR --outdir selection_out
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Outputs:
|
|
123
|
+
|
|
124
|
+
- `selected_100_orthogroups.tsv`
|
|
125
|
+
- `rejected_orthogroups.tsv`
|
|
126
|
+
- `orthogroup_qc_metrics.tsv`
|
|
127
|
+
- `orthogroup_selection_report.txt`
|
|
128
|
+
- `selection_metadata.json`
|
|
129
|
+
|
|
130
|
+
## 12) Synthetic benchmarking using the supplied simulator
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
babappai validate synthetic run \
|
|
134
|
+
--simulator scripts/simulator.py \
|
|
135
|
+
--outdir synthetic_out \
|
|
136
|
+
--grid-config demo/synthetic_grid.json
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Optional neutral generator integration:
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
babappai validate synthetic run \
|
|
143
|
+
--simulator scripts/simulator.py \
|
|
144
|
+
--neutral-generator scripts/generate_neutral_calibration.py \
|
|
145
|
+
--outdir synthetic_out
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## 13) Neutral calibration generator integration
|
|
149
|
+
|
|
150
|
+
BABAPPAi includes an adapter for external neutral calibration generators:
|
|
151
|
+
|
|
152
|
+
- module: `babappai/calibration/neutral_generator_adapter.py`
|
|
153
|
+
- CLI integration: `--neutral-generator PATH`
|
|
154
|
+
- metadata logging includes script path, attempted command, seed, and output reference file.
|
|
155
|
+
|
|
156
|
+
## 14) Reproducibility/version metadata
|
|
157
|
+
|
|
158
|
+
Run/validation outputs include software version, command string, model DOI/SHA/cache path, and calibration metadata.
|
|
159
|
+
|
|
160
|
+
## 15) Citation
|
|
161
|
+
|
|
162
|
+
Cite BABAPPAi software release/version and, while legacy frozen model assets are still used, cite the legacy model DOI.
|
|
163
|
+
|
|
164
|
+
Legacy records currently referenced for provenance:
|
|
165
|
+
|
|
166
|
+
- legacy software record: `https://zenodo.org/records/18520163`
|
|
167
|
+
- legacy frozen model record: `https://zenodo.org/records/18195869`
|
|
168
|
+
|
|
169
|
+
## 16) Legacy BABAPPAΩ compatibility note
|
|
170
|
+
|
|
171
|
+
BABAPPAi is the renamed continuation of BABAPPAΩ.
|
|
172
|
+
Some legacy artifacts and DOI records still carry the BABAPPAΩ naming. These are retained for provenance and reproducibility and are explicitly marked as legacy assets.
|
|
173
|
+
|
|
174
|
+
## Development quick commands
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
pip install -e .[test]
|
|
178
|
+
pytest
|
|
179
|
+
python -m build --sdist --wheel
|
|
180
|
+
python -m twine check dist/*
|
|
181
|
+
```
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""BABAPPAi package metadata exports."""
|
|
2
|
+
|
|
3
|
+
from babappai.metadata import (
|
|
4
|
+
LEGACY_CODEBASE_NAME,
|
|
5
|
+
MODEL_COMPATIBILITY_NOTE,
|
|
6
|
+
PACKAGE_NAME,
|
|
7
|
+
SOFTWARE_NAME,
|
|
8
|
+
SOFTWARE_VERSION,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__version__ = SOFTWARE_VERSION
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"__version__",
|
|
15
|
+
"PACKAGE_NAME",
|
|
16
|
+
"SOFTWARE_NAME",
|
|
17
|
+
"LEGACY_CODEBASE_NAME",
|
|
18
|
+
"MODEL_COMPATIBILITY_NOTE",
|
|
19
|
+
]
|
|
20
|
+
|
|
File without changes
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "BABAPPAi",
|
|
3
|
+
"lineage": "BABAPPAΩ",
|
|
4
|
+
"version": "2.0.0",
|
|
5
|
+
"model": "legacy_frozen",
|
|
6
|
+
"description": "BABAPPAi diagnostic identifiability software (renamed continuation of BABAPPAΩ)",
|
|
7
|
+
"provenance_note": "BABAPPAi is the renamed continuation of the BABAPPAΩ codebase."
|
|
8
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""BABAPPAi neutral calibration utilities and adapters."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
from babappai.metadata import MODEL_TAG
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _get_data_dir() -> Path:
|
|
13
|
+
# Keep calibration references in the shared package data directory.
|
|
14
|
+
return Path(__file__).resolve().parent.parent / "data"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _get_reference_path(model_tag: str) -> Path:
|
|
18
|
+
if model_tag in {"legacy_frozen", "frozen"}:
|
|
19
|
+
suffix = "frozen"
|
|
20
|
+
else:
|
|
21
|
+
suffix = model_tag
|
|
22
|
+
return _get_data_dir() / f"neutral_reference_{suffix}.json"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def load_neutral_reference(model_tag: str = MODEL_TAG) -> Dict[str, Dict[str, float]]:
|
|
26
|
+
path = _get_reference_path(model_tag)
|
|
27
|
+
if not path.exists():
|
|
28
|
+
raise FileNotFoundError(f"Neutral calibration file not found: {path}")
|
|
29
|
+
return json.loads(path.read_text())
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _extract_grid(table: Dict[str, Dict[str, float]]) -> Tuple[list[int], list[int]]:
|
|
33
|
+
l_values = set()
|
|
34
|
+
k_values = set()
|
|
35
|
+
for key in table.keys():
|
|
36
|
+
parts = key.split("_")
|
|
37
|
+
l_values.add(int(parts[1]))
|
|
38
|
+
k_values.add(int(parts[3]))
|
|
39
|
+
return sorted(l_values), sorted(k_values)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def lookup_interpolated(
|
|
43
|
+
L: int,
|
|
44
|
+
K: int,
|
|
45
|
+
table: Dict[str, Dict[str, float]],
|
|
46
|
+
) -> Optional[Dict[str, float]]:
|
|
47
|
+
l_grid, k_grid = _extract_grid(table)
|
|
48
|
+
if not l_grid or not k_grid:
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
l_low = max((x for x in l_grid if x <= L), default=None)
|
|
52
|
+
l_high = min((x for x in l_grid if x >= L), default=None)
|
|
53
|
+
k_low = max((x for x in k_grid if x <= K), default=None)
|
|
54
|
+
k_high = min((x for x in k_grid if x >= K), default=None)
|
|
55
|
+
if None in (l_low, l_high, k_low, k_high):
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
def get(lv: int, kv: int) -> Optional[Dict[str, float]]:
|
|
59
|
+
return table.get(f"L_{lv}_K_{kv}")
|
|
60
|
+
|
|
61
|
+
q11 = get(l_low, k_low)
|
|
62
|
+
q12 = get(l_low, k_high)
|
|
63
|
+
q21 = get(l_high, k_low)
|
|
64
|
+
q22 = get(l_high, k_high)
|
|
65
|
+
if None in (q11, q12, q21, q22):
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
denom = (l_high - l_low) * (k_high - k_low)
|
|
69
|
+
if denom == 0:
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
def bilinear(field: str) -> float:
|
|
73
|
+
return float(
|
|
74
|
+
(
|
|
75
|
+
q11[field] * (l_high - L) * (k_high - K)
|
|
76
|
+
+ q21[field] * (L - l_low) * (k_high - K)
|
|
77
|
+
+ q12[field] * (l_high - L) * (K - k_low)
|
|
78
|
+
+ q22[field] * (L - l_low) * (K - k_low)
|
|
79
|
+
)
|
|
80
|
+
/ denom
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
return {"sigma2_mean": bilinear("sigma2_mean"), "sigma2_sd": bilinear("sigma2_sd")}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def get_neutral_reference(
|
|
87
|
+
L: int,
|
|
88
|
+
K: int,
|
|
89
|
+
model_tag: str = MODEL_TAG,
|
|
90
|
+
) -> Optional[Dict[str, float]]:
|
|
91
|
+
table = load_neutral_reference(model_tag)
|
|
92
|
+
return lookup_interpolated(L, K, table)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
__all__ = [
|
|
96
|
+
"_get_reference_path",
|
|
97
|
+
"get_neutral_reference",
|
|
98
|
+
"load_neutral_reference",
|
|
99
|
+
"lookup_interpolated",
|
|
100
|
+
]
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""Adapter layer for external neutral calibration generator scripts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import shutil
|
|
7
|
+
import subprocess
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Dict, Iterable, Optional
|
|
11
|
+
|
|
12
|
+
from babappai.calibration import _get_reference_path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _run_command(command: list[str], cwd: Path) -> subprocess.CompletedProcess[str]:
|
|
16
|
+
return subprocess.run(
|
|
17
|
+
command,
|
|
18
|
+
cwd=str(cwd),
|
|
19
|
+
text=True,
|
|
20
|
+
capture_output=True,
|
|
21
|
+
check=False,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _discover_reference_file(output_dir: Path, model_tag: str) -> Optional[Path]:
|
|
26
|
+
candidates = sorted(output_dir.glob("neutral_reference*.json"))
|
|
27
|
+
if candidates:
|
|
28
|
+
return candidates[0]
|
|
29
|
+
|
|
30
|
+
package_reference = _get_reference_path(model_tag)
|
|
31
|
+
if package_reference.exists():
|
|
32
|
+
return package_reference
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def run_neutral_generator(
|
|
37
|
+
*,
|
|
38
|
+
generator_path: str,
|
|
39
|
+
output_dir: str,
|
|
40
|
+
model_tag: str,
|
|
41
|
+
seed: Optional[int] = None,
|
|
42
|
+
extra_args: Optional[Iterable[str]] = None,
|
|
43
|
+
) -> Dict[str, Any]:
|
|
44
|
+
"""Execute an external neutral-generator script with robust fallback behavior."""
|
|
45
|
+
|
|
46
|
+
script = Path(generator_path).expanduser().resolve()
|
|
47
|
+
if not script.exists():
|
|
48
|
+
raise FileNotFoundError(f"Neutral generator script not found: {script}")
|
|
49
|
+
|
|
50
|
+
outdir = Path(output_dir).expanduser().resolve()
|
|
51
|
+
outdir.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
|
|
53
|
+
extra = list(extra_args or [])
|
|
54
|
+
command_with_args = [sys.executable, str(script), "--outdir", str(outdir)]
|
|
55
|
+
if seed is not None:
|
|
56
|
+
command_with_args.extend(["--seed", str(seed)])
|
|
57
|
+
command_with_args.extend(extra)
|
|
58
|
+
|
|
59
|
+
attempted_commands: list[list[str]] = [command_with_args]
|
|
60
|
+
first = _run_command(command_with_args, cwd=outdir)
|
|
61
|
+
|
|
62
|
+
if first.returncode != 0:
|
|
63
|
+
fallback = [sys.executable, str(script), *extra]
|
|
64
|
+
attempted_commands.append(fallback)
|
|
65
|
+
second = _run_command(fallback, cwd=outdir)
|
|
66
|
+
result = second
|
|
67
|
+
else:
|
|
68
|
+
result = first
|
|
69
|
+
|
|
70
|
+
if result.returncode != 0:
|
|
71
|
+
raise RuntimeError(
|
|
72
|
+
"Neutral generator failed. "
|
|
73
|
+
f"Attempted commands: {attempted_commands}. "
|
|
74
|
+
f"stderr: {result.stderr.strip()}"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
discovered = _discover_reference_file(outdir, model_tag=model_tag)
|
|
78
|
+
if discovered is None:
|
|
79
|
+
raise RuntimeError(
|
|
80
|
+
"Neutral generator completed but no neutral reference JSON was found."
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
copied_reference = outdir / discovered.name
|
|
84
|
+
if discovered.resolve() != copied_reference.resolve():
|
|
85
|
+
shutil.copy2(discovered, copied_reference)
|
|
86
|
+
|
|
87
|
+
metadata = {
|
|
88
|
+
"generator_path": str(script),
|
|
89
|
+
"attempted_commands": attempted_commands,
|
|
90
|
+
"seed": seed,
|
|
91
|
+
"output_dir": str(outdir),
|
|
92
|
+
"reference_file": str(copied_reference),
|
|
93
|
+
"stdout": result.stdout,
|
|
94
|
+
"stderr": result.stderr,
|
|
95
|
+
}
|
|
96
|
+
(outdir / "neutral_generator_run_metadata.json").write_text(
|
|
97
|
+
json.dumps(metadata, indent=2) + "\n"
|
|
98
|
+
)
|
|
99
|
+
return metadata
|
|
100
|
+
|