oncosplice 3.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oncosplice-3.2.0/LICENSE +21 -0
- oncosplice-3.2.0/PKG-INFO +313 -0
- oncosplice-3.2.0/README.md +251 -0
- oncosplice-3.2.0/oncosplice/__init__.py +82 -0
- oncosplice-3.2.0/oncosplice/_geney_compat.py +138 -0
- oncosplice-3.2.0/oncosplice/engine.py +1200 -0
- oncosplice-3.2.0/oncosplice/engines/__init__.py +129 -0
- oncosplice-3.2.0/oncosplice/engines/_pangolin_arch.py +140 -0
- oncosplice-3.2.0/oncosplice/engines/_vendor/__init__.py +9 -0
- oncosplice-3.2.0/oncosplice/engines/_vendor/spliceformer/LICENSE +21 -0
- oncosplice-3.2.0/oncosplice/engines/_vendor/spliceformer/__init__.py +16 -0
- oncosplice-3.2.0/oncosplice/engines/_vendor/spliceformer/model.py +350 -0
- oncosplice-3.2.0/oncosplice/engines/_vendor/spliceformer/weight_init.py +20 -0
- oncosplice-3.2.0/oncosplice/engines/base.py +110 -0
- oncosplice-3.2.0/oncosplice/engines/ensemble.py +53 -0
- oncosplice-3.2.0/oncosplice/engines/openspliceai.py +141 -0
- oncosplice-3.2.0/oncosplice/engines/pangolin.py +192 -0
- oncosplice-3.2.0/oncosplice/engines/spliceai_keras.py +98 -0
- oncosplice-3.2.0/oncosplice/engines/spliceai_pytorch.py +242 -0
- oncosplice-3.2.0/oncosplice/engines/spliceformer.py +189 -0
- oncosplice-3.2.0/oncosplice/results.py +440 -0
- oncosplice-3.2.0/oncosplice/scoring/__init__.py +31 -0
- oncosplice-3.2.0/oncosplice/scoring/epistasis.py +625 -0
- oncosplice-3.2.0/oncosplice/scoring/fingerprint.py +169 -0
- oncosplice-3.2.0/oncosplice/scoring/oncosplice.py +112 -0
- oncosplice-3.2.0/oncosplice/scoring/site_query.py +225 -0
- oncosplice-3.2.0/oncosplice/scoring/splicing.py +201 -0
- oncosplice-3.2.0/oncosplice/variants.py +137 -0
- oncosplice-3.2.0/oncosplice/viz.py +846 -0
- oncosplice-3.2.0/oncosplice/weights/__init__.py +239 -0
- oncosplice-3.2.0/oncosplice/weights/__main__.py +8 -0
- oncosplice-3.2.0/oncosplice/weights/manifest.json +45 -0
- oncosplice-3.2.0/oncosplice.egg-info/PKG-INFO +313 -0
- oncosplice-3.2.0/oncosplice.egg-info/SOURCES.txt +42 -0
- oncosplice-3.2.0/oncosplice.egg-info/dependency_links.txt +1 -0
- oncosplice-3.2.0/oncosplice.egg-info/entry_points.txt +2 -0
- oncosplice-3.2.0/oncosplice.egg-info/requires.txt +42 -0
- oncosplice-3.2.0/oncosplice.egg-info/top_level.txt +1 -0
- oncosplice-3.2.0/pyproject.toml +104 -0
- oncosplice-3.2.0/setup.cfg +4 -0
- oncosplice-3.2.0/tests/test_engines_selfcontained.py +63 -0
- oncosplice-3.2.0/tests/test_scoring.py +129 -0
- oncosplice-3.2.0/tests/test_spliceai_equivalence.py +146 -0
- oncosplice-3.2.0/tests/test_variants.py +43 -0
oncosplice-3.2.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Nicolas Lynn
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: oncosplice
|
|
3
|
+
Version: 3.2.0
|
|
4
|
+
Summary: Sequence-level pipeline for splicing-epistasis analysis of single, double, and N-variant constructs
|
|
5
|
+
Author-email: Nicolas Lynn <nicolasalynn@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/nicolasalynn/oncosplice
|
|
8
|
+
Project-URL: Documentation, https://nicolasalynn.github.io/oncosplice
|
|
9
|
+
Project-URL: Repository, https://github.com/nicolasalynn/oncosplice
|
|
10
|
+
Project-URL: Issues, https://github.com/nicolasalynn/oncosplice/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/nicolasalynn/oncosplice/blob/main/CHANGELOG.md
|
|
12
|
+
Keywords: splicing,epistasis,oncosplice,spliceai,openspliceai,pangolin,spliceformer,cancer-genomics,bioinformatics,rna-splicing
|
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Intended Audience :: Healthcare Industry
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
|
|
24
|
+
Classifier: Typing :: Typed
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Requires-Dist: numpy>=1.23
|
|
29
|
+
Requires-Dist: pandas>=2.2.2
|
|
30
|
+
Requires-Dist: matplotlib>=3.9
|
|
31
|
+
Requires-Dist: biopython>=1.84
|
|
32
|
+
Requires-Dist: seqmat>=1.3
|
|
33
|
+
Requires-Dist: huggingface_hub>=0.20
|
|
34
|
+
Provides-Extra: openspliceai
|
|
35
|
+
Requires-Dist: torch>=2.0; extra == "openspliceai"
|
|
36
|
+
Requires-Dist: openspliceai; extra == "openspliceai"
|
|
37
|
+
Provides-Extra: spliceai-pytorch
|
|
38
|
+
Requires-Dist: torch>=2.0; extra == "spliceai-pytorch"
|
|
39
|
+
Provides-Extra: pangolin
|
|
40
|
+
Requires-Dist: torch>=2.0; extra == "pangolin"
|
|
41
|
+
Provides-Extra: spliceformer
|
|
42
|
+
Requires-Dist: torch>=2.0; extra == "spliceformer"
|
|
43
|
+
Requires-Dist: einops; extra == "spliceformer"
|
|
44
|
+
Provides-Extra: spliceai-keras
|
|
45
|
+
Requires-Dist: tensorflow>=2.8; extra == "spliceai-keras"
|
|
46
|
+
Requires-Dist: spliceai; extra == "spliceai-keras"
|
|
47
|
+
Provides-Extra: all
|
|
48
|
+
Requires-Dist: torch>=2.0; extra == "all"
|
|
49
|
+
Requires-Dist: openspliceai; extra == "all"
|
|
50
|
+
Requires-Dist: einops; extra == "all"
|
|
51
|
+
Provides-Extra: protein
|
|
52
|
+
Requires-Dist: geney>=2.0; extra == "protein"
|
|
53
|
+
Provides-Extra: dev
|
|
54
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
55
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
56
|
+
Requires-Dist: ruff; extra == "dev"
|
|
57
|
+
Provides-Extra: docs
|
|
58
|
+
Requires-Dist: mkdocs-material>=9.5; extra == "docs"
|
|
59
|
+
Requires-Dist: mkdocstrings[python]>=0.24; extra == "docs"
|
|
60
|
+
Requires-Dist: pymdown-extensions>=10; extra == "docs"
|
|
61
|
+
Dynamic: license-file
|
|
62
|
+
|
|
63
|
+
# oncosplice
|
|
64
|
+
|
|
65
|
+
[](https://pypi.org/project/oncosplice/)
|
|
66
|
+
[](https://github.com/nicolasalynn/oncosplice/actions/workflows/ci.yml)
|
|
67
|
+
[](https://codecov.io/gh/nicolasalynn/oncosplice)
|
|
68
|
+
[](https://nicolasalynn.github.io/oncosplice)
|
|
69
|
+
[](https://pypi.org/project/oncosplice/)
|
|
70
|
+
[](LICENSE)
|
|
71
|
+
|
|
72
|
+
> Given two (or more) mutations in the same gene, classify how their joint
|
|
73
|
+
> effect on splicing differs from the additive prediction — into one of four
|
|
74
|
+
> mutually-exclusive mechanism classes: **rescue**, **cryptic rescue**,
|
|
75
|
+
> **deletion synergy**, or **cryptic synergy**.
|
|
76
|
+
|
|
77
|
+
**oncosplice** is a sequence-level pipeline for splicing-epistasis analysis of
|
|
78
|
+
single-, double-, and N-variant constructs. It runs a splice-site predictor
|
|
79
|
+
(SpliceAI, OpenSpliceAI, Pangolin, or Spliceformer) under each variant context,
|
|
80
|
+
computes per-site residuals against the additive expectation, and applies a
|
|
81
|
+
crisp 4-class mechanistic classifier.
|
|
82
|
+
|
|
83
|
+
Implements the algorithms from:
|
|
84
|
+
|
|
85
|
+
1. *Detecting and understanding meaningful cancerous mutations based on computational models of mRNA splicing* — Lynn & Tuller, *npj Systems Biology* 2024.
|
|
86
|
+
2. *Large-scale insight into missplicing, intra-gene epistasis and its relevance to human cancer* — in preparation.
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
pip install oncosplice[spliceai_pytorch]
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## What it does
|
|
93
|
+
|
|
94
|
+
Given two (or more) genomic variants in the same gene, oncosplice answers:
|
|
95
|
+
|
|
96
|
+
- **Single-variant impact.** For each mutation alone, how much does it perturb every splice site in the gene? `analyze_single()`.
|
|
97
|
+
- **Joint behavior.** What does splicing look like when both mutations co-occur, and how does that compare to the additive prediction? `analyze_pair()` / `analyze_multi()`.
|
|
98
|
+
- **Mechanism.** Is the joint effect a *synergistic* gain (joint > additive), a *rescue* (single disrupts, joint restores WT), a *compounding* sub-additive stack, or just dominance / noise? Per-site and pair-level classification.
|
|
99
|
+
- **Bulk classification.** Run the same analysis over a DataFrame of hundreds of thousands of pairs with per-gene scheduling, batched inference, and resumable checkpointing. `scan()` / `classify_dataframe()`.
|
|
100
|
+
|
|
101
|
+
## Install
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
# Recommended — original SpliceAI weights, PyTorch backbone (no TF dependency)
|
|
105
|
+
pip install oncosplice[spliceai_pytorch]
|
|
106
|
+
|
|
107
|
+
# Or pick another engine
|
|
108
|
+
pip install oncosplice[openspliceai] # OpenSpliceAI (MANE-trained, retrained)
|
|
109
|
+
pip install oncosplice[pangolin] # Pangolin (40-model multi-tissue)
|
|
110
|
+
pip install oncosplice[spliceformer] # Spliceformer (40k transformer)
|
|
111
|
+
pip install oncosplice[all] # all 4 production engines
|
|
112
|
+
|
|
113
|
+
# Optional add-ons
|
|
114
|
+
pip install oncosplice[protein] # protein-divergence score (Lynn & Tuller 2024)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Core requires `numpy`, `pandas`, `matplotlib`, `biopython`, `seqmat`. The
|
|
118
|
+
classification core (`analyze_pair`, `scan`, `classify_dataframe`) has **no
|
|
119
|
+
`geney` dependency** — `geney` is only needed for the protein-divergence score
|
|
120
|
+
path (`[protein]` extra).
|
|
121
|
+
|
|
122
|
+
**Model weights download automatically** from the [Hugging Face Hub](https://huggingface.co/nicolynnvila/oncosplice-weights)
|
|
123
|
+
on first use and are cached in `~/.oncosplice/weights/` — no manual step. Set
|
|
124
|
+
`ONCOSPLICE_AUTO_DOWNLOAD=0` to require an explicit `oncosplice-download-weights`
|
|
125
|
+
instead (useful offline / in CI).
|
|
126
|
+
|
|
127
|
+
## Highlights
|
|
128
|
+
|
|
129
|
+
- **Four production engines under one interface** — SpliceAI (PyTorch port,
|
|
130
|
+
numerically identical to Keras), OpenSpliceAI, Pangolin, Spliceformer. Swap
|
|
131
|
+
with one string. Cross-engine ensembling via `ensemble:a,b,c`.
|
|
132
|
+
- **Four-class mechanistic classifier** — rescue / cryptic rescue / deletion
|
|
133
|
+
synergy / cryptic synergy, defined on probability bands with a hard
|
|
134
|
+
WT-vs-annotation prerequisite that filters predictor noise.
|
|
135
|
+
- **TCGA-scale runner** — `classify_dataframe()` does per-gene grouping +
|
|
136
|
+
batched inference + resumable checkpointing. ~23× faster than per-pair after
|
|
137
|
+
the 3.2.0 vectorization; 800k pairs in ~22 hours on an L40S.
|
|
138
|
+
- **Numerical parity tests** between Keras SpliceAI and the PyTorch port so
|
|
139
|
+
the migration is auditable.
|
|
140
|
+
- **Pure-python scoring core** (`oncosplice.scoring`) with no model
|
|
141
|
+
dependencies — usable as a library in other splicing-prediction stacks.
|
|
142
|
+
|
|
143
|
+
## Quickstart
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
from oncosplice import OncospliceEngine
|
|
147
|
+
|
|
148
|
+
eng = OncospliceEngine(splicing_engine="spliceai_pytorch")
|
|
149
|
+
|
|
150
|
+
# Single variant — does this mutation cause missplicing?
|
|
151
|
+
single = eng.analyze_single("KRAS:12:25227344:A:T")
|
|
152
|
+
print(single.summary())
|
|
153
|
+
print(single.missplicing.to_dataframe()) # missed + discovered sites
|
|
154
|
+
single.plot_missplicing()
|
|
155
|
+
|
|
156
|
+
# Pair — what happens when both mutations co-occur?
|
|
157
|
+
pair = eng.analyze_pair("KRAS:12:25227343:G:T", "KRAS:12:25227344:A:T")
|
|
158
|
+
print(pair.pair_classification) # → "rescue"
|
|
159
|
+
print(pair.epistatic_sites()) # only the syn/rescue/comp sites
|
|
160
|
+
pair.plot_case_study() # the bar figure
|
|
161
|
+
|
|
162
|
+
# N-variant (higher-order)
|
|
163
|
+
multi = eng.analyze_multi([
|
|
164
|
+
"KRAS:12:25227343:G:T",
|
|
165
|
+
"KRAS:12:25227344:A:T",
|
|
166
|
+
"KRAS:12:25227345:G:C",
|
|
167
|
+
])
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Bulk classification of a DataFrame
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
import pandas as pd
|
|
174
|
+
df = pd.read_csv("pairs.csv") # column: epistasis_id (e.g. "GENE:CHR:POS:REF:ALT|GENE:CHR:POS:REF:ALT")
|
|
175
|
+
|
|
176
|
+
out = eng.classify_dataframe(
|
|
177
|
+
df, epistasis_id_col="epistasis_id",
|
|
178
|
+
checkpoint_path="results.csv",
|
|
179
|
+
)
|
|
180
|
+
# adds: pair_classification, max_abs_residual, max_abs_event_delta,
|
|
181
|
+
# n_del_syn, n_cryp_syn, n_rescue, n_cryp_rescue, engine, error
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Per-gene grouping + batched `scan()` underneath — typically 10–40× faster than the per-pair path on TCGA-shaped datasets. The runner is resume-safe (re-running with the same checkpoint path skips already-done pairs) and emits both per-pair and per-single CSVs.
|
|
185
|
+
|
|
186
|
+
### Engine-only API (no geney needed)
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
from oncosplice.engines import get_predictor, list_available_engines
|
|
190
|
+
print(list_available_engines())
|
|
191
|
+
|
|
192
|
+
p = get_predictor("spliceai_pytorch")
|
|
193
|
+
pred = p.predict(padded_sequence) # → SplicingPrediction(acceptor, donor)
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
## The classifier — 4 mechanism classes
|
|
197
|
+
|
|
198
|
+
At every splice site, given four predicted probabilities `ref`, `mut1`,
|
|
199
|
+
`mut2`, `event` (all in [0, 1]) and the annotation flag, we test four
|
|
200
|
+
mutually-exclusive rules. The residual `expected − event` (or `event − expected`,
|
|
201
|
+
depending on direction) plus the band-membership of `ref`, `mut1`, `mut2`,
|
|
202
|
+
`event` decide the class. `expected = mut1 + mut2 − ref` is the additive null.
|
|
203
|
+
|
|
204
|
+
**Thresholds (one set, used everywhere):**
|
|
205
|
+
|
|
206
|
+
| Symbol | Value | Meaning |
|
|
207
|
+
|---|---|---|
|
|
208
|
+
| `HIGH` | 0.50 | "site present" (includes alt-spliced sites) |
|
|
209
|
+
| `LOW` | 0.05 | "site absent" |
|
|
210
|
+
| `RES` | 0.10 | minimum residual magnitude |
|
|
211
|
+
| `NEAR_WT` | 0.20 | `|event − ref|` tolerance for rescue |
|
|
212
|
+
|
|
213
|
+
**Hard prerequisite — WT prediction must agree with annotation.** Every rule
|
|
214
|
+
first checks that the engine's wild-type prediction is consistent with the
|
|
215
|
+
annotation: `annotated == True ⇒ ref ≥ HIGH`, `annotated == False ⇒ ref ≤ LOW`.
|
|
216
|
+
Sites where the engine disagrees with the annotation are dropped as
|
|
217
|
+
non-epistatic without consulting the mutations. This is the noise filter.
|
|
218
|
+
|
|
219
|
+
### The four rules
|
|
220
|
+
|
|
221
|
+
| Class | When the site is annotated (`ref ≥ HIGH`) | Rule | Residual |
|
|
222
|
+
|---|---|---|---|
|
|
223
|
+
| **rescue** | one single deletes, joint restores | `min(mut1, mut2) ≤ ref − HIGH` ∧ `|event − ref| ≤ NEAR_WT` ∧ `event − min(mut1, mut2) ≥ RES` | `rescue_residual = event − min(mut1, mut2)` |
|
|
224
|
+
| **deletion synergy** | both singles preserve, joint destroys | `min(mut1, mut2) ≥ HIGH` ∧ `ref − event ≥ RES` ∧ `expected − event ≥ RES` | `synergy_residual = expected − event` |
|
|
225
|
+
|
|
226
|
+
| Class | When the site is not annotated (`ref ≤ LOW`) | Rule | Residual |
|
|
227
|
+
|---|---|---|---|
|
|
228
|
+
| **cryptic rescue** | one single creates, joint silences | `max(mut1, mut2) ≥ HIGH` ∧ `event ≤ LOW` ∧ `max(mut1, mut2) − event ≥ RES` | `rescue_residual = max(mut1, mut2) − event` |
|
|
229
|
+
| **cryptic synergy** | both silent, joint creates | `max(mut1, mut2) ≤ LOW` ∧ `event ≥ HIGH` ∧ `event − expected ≥ RES` | `synergy_residual = event − expected` |
|
|
230
|
+
|
|
231
|
+
Anything else → **non-epistatic**.
|
|
232
|
+
|
|
233
|
+
### Numeric example
|
|
234
|
+
|
|
235
|
+
```
|
|
236
|
+
# annotated acceptor in INPP5J — spliceai_pytorch
|
|
237
|
+
ref = 0.972 annotated = True
|
|
238
|
+
m1 = 0.658 (m1 alone preserves: 0.658 ≥ 0.50)
|
|
239
|
+
m2 = 0.841 (m2 alone preserves: 0.841 ≥ 0.50)
|
|
240
|
+
event = 0.339
|
|
241
|
+
expected = m1 + m2 - ref = 0.527
|
|
242
|
+
|
|
243
|
+
# ref ≥ HIGH ✓ and annotated ✓ → annotated branch
|
|
244
|
+
# min(m1, m2) = 0.658 ≥ HIGH ✓ → not rescue (singles preserve)
|
|
245
|
+
# ref - event = 0.633 ≥ RES (0.10) ✓
|
|
246
|
+
# expected - event = 0.188 ≥ RES (0.10) ✓ → deletion_synergy
|
|
247
|
+
# synergy_residual = 0.188
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### Pair-level aggregation
|
|
251
|
+
|
|
252
|
+
A pair's overall label is the class of the splice site with the *largest*
|
|
253
|
+
mechanism residual (rescue or synergy). Ties break by class priority:
|
|
254
|
+
`deletion_synergy > cryptic_synergy > rescue > cryptic_rescue > non-epistatic`.
|
|
255
|
+
The full per-site breakdown is always retained in `pair.site_residuals`.
|
|
256
|
+
|
|
257
|
+
## Available splicing engines
|
|
258
|
+
|
|
259
|
+
| Name | Architecture | Notes |
|
|
260
|
+
|---|---|---|
|
|
261
|
+
| `spliceai_pytorch` (default for production) | Original SpliceAI weights (Jaganathan 2019), plain-ReLU PyTorch architecture | Numerically identical to Keras SpliceAI, ~2.5× faster, no TF dependency |
|
|
262
|
+
| `openspliceai` | OpenSpliceAI PyTorch port, MANE-trained 5-model ensemble | Independent retrain; differs from Keras SpliceAI in fine numerics |
|
|
263
|
+
| `pangolin` | 40-model multi-tissue PyTorch ensemble (Zeng & Li 2022) | Tissue-specific splice usage |
|
|
264
|
+
| `spliceformer` | 40k-context transformer ensemble (Jónsson 2024) | Long-range context; requires the Spliceformer repo |
|
|
265
|
+
| `spliceai_keras` | Original Illumina `.h5` weights | **Reference only** — prefer `spliceai_pytorch` |
|
|
266
|
+
| `ensemble:a,b,c` / `average` | Mean probabilities across N constituent engines | Cross-engine consensus |
|
|
267
|
+
|
|
268
|
+
## Package layout
|
|
269
|
+
|
|
270
|
+
```
|
|
271
|
+
oncosplice/
|
|
272
|
+
├── engine.py # OncospliceEngine — orchestrator (analyze_single/pair/multi, scan, classify_dataframe)
|
|
273
|
+
├── results.py # typed dataclasses: SingleVariantResult, DoubleVariantResult, MultiVariantResult
|
|
274
|
+
├── variants.py # Variant + VariantPair (no geney dependency)
|
|
275
|
+
├── viz.py # plot_case_study + supporting bar figures
|
|
276
|
+
├── engines/ # standalone splice-site predictor adapters (uniform interface)
|
|
277
|
+
│ ├── base.py
|
|
278
|
+
│ ├── spliceai_pytorch.py
|
|
279
|
+
│ ├── openspliceai.py
|
|
280
|
+
│ ├── pangolin.py
|
|
281
|
+
│ ├── spliceformer.py
|
|
282
|
+
│ ├── spliceai_keras.py
|
|
283
|
+
│ └── ensemble.py
|
|
284
|
+
├── scoring/ # pure-Python scoring primitives
|
|
285
|
+
│ ├── splicing.py
|
|
286
|
+
│ ├── epistasis.py # the 3-bucket classifier + vectorized residual computation
|
|
287
|
+
│ ├── oncosplice.py # protein-divergence Oncosplice score
|
|
288
|
+
│ └── fingerprint.py # splicing-outcome hashing
|
|
289
|
+
└── weights/ # weight-resolution + downloader CLI
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
## Examples
|
|
293
|
+
|
|
294
|
+
See `examples/`:
|
|
295
|
+
|
|
296
|
+
- `KRAS_rescue.ipynb` — a canonical KRAS donor disrupted by mut1 alone, restored by the joint event. The mechanism the classifier surfaces as **rescue**.
|
|
297
|
+
- `CREBBP_synergistic.ipynb` — the joint event activates one cryptic acceptor (synergy) while rescuing another from each single's activation (rescue). The classifier reports the dominant **synergistic** call with the rescue site preserved in the per-site table.
|
|
298
|
+
|
|
299
|
+
## Testing
|
|
300
|
+
|
|
301
|
+
```bash
|
|
302
|
+
pytest tests/ # full suite
|
|
303
|
+
pytest tests/test_scoring.py # classifier + residual rules
|
|
304
|
+
pytest tests/test_spliceai_equivalence.py # Keras ↔ PyTorch numerical parity
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
## Citing
|
|
308
|
+
|
|
309
|
+
If you use this code in a published analysis, please cite the two papers above.
|
|
310
|
+
|
|
311
|
+
## License
|
|
312
|
+
|
|
313
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# oncosplice
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/oncosplice/)
|
|
4
|
+
[](https://github.com/nicolasalynn/oncosplice/actions/workflows/ci.yml)
|
|
5
|
+
[](https://codecov.io/gh/nicolasalynn/oncosplice)
|
|
6
|
+
[](https://nicolasalynn.github.io/oncosplice)
|
|
7
|
+
[](https://pypi.org/project/oncosplice/)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
|
|
10
|
+
> Given two (or more) mutations in the same gene, classify how their joint
|
|
11
|
+
> effect on splicing differs from the additive prediction — into one of four
|
|
12
|
+
> mutually-exclusive mechanism classes: **rescue**, **cryptic rescue**,
|
|
13
|
+
> **deletion synergy**, or **cryptic synergy**.
|
|
14
|
+
|
|
15
|
+
**oncosplice** is a sequence-level pipeline for splicing-epistasis analysis of
|
|
16
|
+
single-, double-, and N-variant constructs. It runs a splice-site predictor
|
|
17
|
+
(SpliceAI, OpenSpliceAI, Pangolin, or Spliceformer) under each variant context,
|
|
18
|
+
computes per-site residuals against the additive expectation, and applies a
|
|
19
|
+
crisp 4-class mechanistic classifier.
|
|
20
|
+
|
|
21
|
+
Implements the algorithms from:
|
|
22
|
+
|
|
23
|
+
1. *Detecting and understanding meaningful cancerous mutations based on computational models of mRNA splicing* — Lynn & Tuller, *npj Systems Biology* 2024.
|
|
24
|
+
2. *Large-scale insight into missplicing, intra-gene epistasis and its relevance to human cancer* — in preparation.
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install oncosplice[spliceai_pytorch]
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## What it does
|
|
31
|
+
|
|
32
|
+
Given two (or more) genomic variants in the same gene, oncosplice answers:
|
|
33
|
+
|
|
34
|
+
- **Single-variant impact.** For each mutation alone, how much does it perturb every splice site in the gene? `analyze_single()`.
|
|
35
|
+
- **Joint behavior.** What does splicing look like when both mutations co-occur, and how does that compare to the additive prediction? `analyze_pair()` / `analyze_multi()`.
|
|
36
|
+
- **Mechanism.** Is the joint effect a *synergistic* gain (joint > additive), a *rescue* (single disrupts, joint restores WT), a *compounding* sub-additive stack, or just dominance / noise? Per-site and pair-level classification.
|
|
37
|
+
- **Bulk classification.** Run the same analysis over a DataFrame of hundreds of thousands of pairs with per-gene scheduling, batched inference, and resumable checkpointing. `scan()` / `classify_dataframe()`.
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Recommended — original SpliceAI weights, PyTorch backbone (no TF dependency)
|
|
43
|
+
pip install oncosplice[spliceai_pytorch]
|
|
44
|
+
|
|
45
|
+
# Or pick another engine
|
|
46
|
+
pip install oncosplice[openspliceai] # OpenSpliceAI (MANE-trained, retrained)
|
|
47
|
+
pip install oncosplice[pangolin] # Pangolin (40-model multi-tissue)
|
|
48
|
+
pip install oncosplice[spliceformer] # Spliceformer (40k transformer)
|
|
49
|
+
pip install oncosplice[all] # all 4 production engines
|
|
50
|
+
|
|
51
|
+
# Optional add-ons
|
|
52
|
+
pip install oncosplice[protein] # protein-divergence score (Lynn & Tuller 2024)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Core requires `numpy`, `pandas`, `matplotlib`, `biopython`, `seqmat`. The
|
|
56
|
+
classification core (`analyze_pair`, `scan`, `classify_dataframe`) has **no
|
|
57
|
+
`geney` dependency** — `geney` is only needed for the protein-divergence score
|
|
58
|
+
path (`[protein]` extra).
|
|
59
|
+
|
|
60
|
+
**Model weights download automatically** from the [Hugging Face Hub](https://huggingface.co/nicolynnvila/oncosplice-weights)
|
|
61
|
+
on first use and are cached in `~/.oncosplice/weights/` — no manual step. Set
|
|
62
|
+
`ONCOSPLICE_AUTO_DOWNLOAD=0` to require an explicit `oncosplice-download-weights`
|
|
63
|
+
instead (useful offline / in CI).
|
|
64
|
+
|
|
65
|
+
## Highlights
|
|
66
|
+
|
|
67
|
+
- **Four production engines under one interface** — SpliceAI (PyTorch port,
|
|
68
|
+
numerically identical to Keras), OpenSpliceAI, Pangolin, Spliceformer. Swap
|
|
69
|
+
with one string. Cross-engine ensembling via `ensemble:a,b,c`.
|
|
70
|
+
- **Four-class mechanistic classifier** — rescue / cryptic rescue / deletion
|
|
71
|
+
synergy / cryptic synergy, defined on probability bands with a hard
|
|
72
|
+
WT-vs-annotation prerequisite that filters predictor noise.
|
|
73
|
+
- **TCGA-scale runner** — `classify_dataframe()` does per-gene grouping +
|
|
74
|
+
batched inference + resumable checkpointing. ~23× faster than per-pair after
|
|
75
|
+
the 3.2.0 vectorization; 800k pairs in ~22 hours on an L40S.
|
|
76
|
+
- **Numerical parity tests** between Keras SpliceAI and the PyTorch port so
|
|
77
|
+
the migration is auditable.
|
|
78
|
+
- **Pure-python scoring core** (`oncosplice.scoring`) with no model
|
|
79
|
+
dependencies — usable as a library in other splicing-prediction stacks.
|
|
80
|
+
|
|
81
|
+
## Quickstart
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from oncosplice import OncospliceEngine
|
|
85
|
+
|
|
86
|
+
eng = OncospliceEngine(splicing_engine="spliceai_pytorch")
|
|
87
|
+
|
|
88
|
+
# Single variant — does this mutation cause missplicing?
|
|
89
|
+
single = eng.analyze_single("KRAS:12:25227344:A:T")
|
|
90
|
+
print(single.summary())
|
|
91
|
+
print(single.missplicing.to_dataframe()) # missed + discovered sites
|
|
92
|
+
single.plot_missplicing()
|
|
93
|
+
|
|
94
|
+
# Pair — what happens when both mutations co-occur?
|
|
95
|
+
pair = eng.analyze_pair("KRAS:12:25227343:G:T", "KRAS:12:25227344:A:T")
|
|
96
|
+
print(pair.pair_classification) # → "rescue"
|
|
97
|
+
print(pair.epistatic_sites()) # only the syn/rescue/comp sites
|
|
98
|
+
pair.plot_case_study() # the bar figure
|
|
99
|
+
|
|
100
|
+
# N-variant (higher-order)
|
|
101
|
+
multi = eng.analyze_multi([
|
|
102
|
+
"KRAS:12:25227343:G:T",
|
|
103
|
+
"KRAS:12:25227344:A:T",
|
|
104
|
+
"KRAS:12:25227345:G:C",
|
|
105
|
+
])
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Bulk classification of a DataFrame
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
import pandas as pd
|
|
112
|
+
df = pd.read_csv("pairs.csv") # column: epistasis_id (e.g. "GENE:CHR:POS:REF:ALT|GENE:CHR:POS:REF:ALT")
|
|
113
|
+
|
|
114
|
+
out = eng.classify_dataframe(
|
|
115
|
+
df, epistasis_id_col="epistasis_id",
|
|
116
|
+
checkpoint_path="results.csv",
|
|
117
|
+
)
|
|
118
|
+
# adds: pair_classification, max_abs_residual, max_abs_event_delta,
|
|
119
|
+
# n_del_syn, n_cryp_syn, n_rescue, n_cryp_rescue, engine, error
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Per-gene grouping + batched `scan()` underneath — typically 10–40× faster than the per-pair path on TCGA-shaped datasets. The runner is resume-safe (re-running with the same checkpoint path skips already-done pairs) and emits both per-pair and per-single CSVs.
|
|
123
|
+
|
|
124
|
+
### Engine-only API (no geney needed)
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from oncosplice.engines import get_predictor, list_available_engines
|
|
128
|
+
print(list_available_engines())
|
|
129
|
+
|
|
130
|
+
p = get_predictor("spliceai_pytorch")
|
|
131
|
+
pred = p.predict(padded_sequence) # → SplicingPrediction(acceptor, donor)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## The classifier — 4 mechanism classes
|
|
135
|
+
|
|
136
|
+
At every splice site, given four predicted probabilities `ref`, `mut1`,
|
|
137
|
+
`mut2`, `event` (all in [0, 1]) and the annotation flag, we test four
|
|
138
|
+
mutually-exclusive rules. The residual `expected − event` (or `event − expected`,
|
|
139
|
+
depending on direction) plus the band-membership of `ref`, `mut1`, `mut2`,
|
|
140
|
+
`event` decide the class. `expected = mut1 + mut2 − ref` is the additive null.
|
|
141
|
+
|
|
142
|
+
**Thresholds (one set, used everywhere):**
|
|
143
|
+
|
|
144
|
+
| Symbol | Value | Meaning |
|
|
145
|
+
|---|---|---|
|
|
146
|
+
| `HIGH` | 0.50 | "site present" (includes alt-spliced sites) |
|
|
147
|
+
| `LOW` | 0.05 | "site absent" |
|
|
148
|
+
| `RES` | 0.10 | minimum residual magnitude |
|
|
149
|
+
| `NEAR_WT` | 0.20 | `|event − ref|` tolerance for rescue |
|
|
150
|
+
|
|
151
|
+
**Hard prerequisite — WT prediction must agree with annotation.** Every rule
|
|
152
|
+
first checks that the engine's wild-type prediction is consistent with the
|
|
153
|
+
annotation: `annotated == True ⇒ ref ≥ HIGH`, `annotated == False ⇒ ref ≤ LOW`.
|
|
154
|
+
Sites where the engine disagrees with the annotation are dropped as
|
|
155
|
+
non-epistatic without consulting the mutations. This is the noise filter.
|
|
156
|
+
|
|
157
|
+
### The four rules
|
|
158
|
+
|
|
159
|
+
| Class | When the site is annotated (`ref ≥ HIGH`) | Rule | Residual |
|
|
160
|
+
|---|---|---|---|
|
|
161
|
+
| **rescue** | one single deletes, joint restores | `min(mut1, mut2) ≤ ref − HIGH` ∧ `|event − ref| ≤ NEAR_WT` ∧ `event − min(mut1, mut2) ≥ RES` | `rescue_residual = event − min(mut1, mut2)` |
|
|
162
|
+
| **deletion synergy** | both singles preserve, joint destroys | `min(mut1, mut2) ≥ HIGH` ∧ `ref − event ≥ RES` ∧ `expected − event ≥ RES` | `synergy_residual = expected − event` |
|
|
163
|
+
|
|
164
|
+
| Class | When the site is not annotated (`ref ≤ LOW`) | Rule | Residual |
|
|
165
|
+
|---|---|---|---|
|
|
166
|
+
| **cryptic rescue** | one single creates, joint silences | `max(mut1, mut2) ≥ HIGH` ∧ `event ≤ LOW` ∧ `max(mut1, mut2) − event ≥ RES` | `rescue_residual = max(mut1, mut2) − event` |
|
|
167
|
+
| **cryptic synergy** | both silent, joint creates | `max(mut1, mut2) ≤ LOW` ∧ `event ≥ HIGH` ∧ `event − expected ≥ RES` | `synergy_residual = event − expected` |
|
|
168
|
+
|
|
169
|
+
Anything else → **non-epistatic**.
|
|
170
|
+
|
|
171
|
+
### Numeric example
|
|
172
|
+
|
|
173
|
+
```
|
|
174
|
+
# annotated acceptor in INPP5J — spliceai_pytorch
|
|
175
|
+
ref = 0.972 annotated = True
|
|
176
|
+
m1 = 0.658 (m1 alone preserves: 0.658 ≥ 0.50)
|
|
177
|
+
m2 = 0.841 (m2 alone preserves: 0.841 ≥ 0.50)
|
|
178
|
+
event = 0.339
|
|
179
|
+
expected = m1 + m2 - ref = 0.527
|
|
180
|
+
|
|
181
|
+
# ref ≥ HIGH ✓ and annotated ✓ → annotated branch
|
|
182
|
+
# min(m1, m2) = 0.658 ≥ HIGH ✓ → not rescue (singles preserve)
|
|
183
|
+
# ref - event = 0.633 ≥ RES (0.10) ✓
|
|
184
|
+
# expected - event = 0.188 ≥ RES (0.10) ✓ → deletion_synergy
|
|
185
|
+
# synergy_residual = 0.188
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Pair-level aggregation
|
|
189
|
+
|
|
190
|
+
A pair's overall label is the class of the splice site with the *largest*
|
|
191
|
+
mechanism residual (rescue or synergy). Ties break by class priority:
|
|
192
|
+
`deletion_synergy > cryptic_synergy > rescue > cryptic_rescue > non-epistatic`.
|
|
193
|
+
The full per-site breakdown is always retained in `pair.site_residuals`.
|
|
194
|
+
|
|
195
|
+
## Available splicing engines
|
|
196
|
+
|
|
197
|
+
| Name | Architecture | Notes |
|
|
198
|
+
|---|---|---|
|
|
199
|
+
| `spliceai_pytorch` (default for production) | Original SpliceAI weights (Jaganathan 2019), plain-ReLU PyTorch architecture | Numerically identical to Keras SpliceAI, ~2.5× faster, no TF dependency |
|
|
200
|
+
| `openspliceai` | OpenSpliceAI PyTorch port, MANE-trained 5-model ensemble | Independent retrain; differs from Keras SpliceAI in fine numerics |
|
|
201
|
+
| `pangolin` | 40-model multi-tissue PyTorch ensemble (Zeng & Li 2022) | Tissue-specific splice usage |
|
|
202
|
+
| `spliceformer` | 40k-context transformer ensemble (Jónsson 2024) | Long-range context; requires the Spliceformer repo |
|
|
203
|
+
| `spliceai_keras` | Original Illumina `.h5` weights | **Reference only** — prefer `spliceai_pytorch` |
|
|
204
|
+
| `ensemble:a,b,c` / `average` | Mean probabilities across N constituent engines | Cross-engine consensus |
|
|
205
|
+
|
|
206
|
+
## Package layout
|
|
207
|
+
|
|
208
|
+
```
|
|
209
|
+
oncosplice/
|
|
210
|
+
├── engine.py # OncospliceEngine — orchestrator (analyze_single/pair/multi, scan, classify_dataframe)
|
|
211
|
+
├── results.py # typed dataclasses: SingleVariantResult, DoubleVariantResult, MultiVariantResult
|
|
212
|
+
├── variants.py # Variant + VariantPair (no geney dependency)
|
|
213
|
+
├── viz.py # plot_case_study + supporting bar figures
|
|
214
|
+
├── engines/ # standalone splice-site predictor adapters (uniform interface)
|
|
215
|
+
│ ├── base.py
|
|
216
|
+
│ ├── spliceai_pytorch.py
|
|
217
|
+
│ ├── openspliceai.py
|
|
218
|
+
│ ├── pangolin.py
|
|
219
|
+
│ ├── spliceformer.py
|
|
220
|
+
│ ├── spliceai_keras.py
|
|
221
|
+
│ └── ensemble.py
|
|
222
|
+
├── scoring/ # pure-Python scoring primitives
|
|
223
|
+
│ ├── splicing.py
|
|
224
|
+
│ ├── epistasis.py # the 3-bucket classifier + vectorized residual computation
|
|
225
|
+
│ ├── oncosplice.py # protein-divergence Oncosplice score
|
|
226
|
+
│ └── fingerprint.py # splicing-outcome hashing
|
|
227
|
+
└── weights/ # weight-resolution + downloader CLI
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
## Examples
|
|
231
|
+
|
|
232
|
+
See `examples/`:
|
|
233
|
+
|
|
234
|
+
- `KRAS_rescue.ipynb` — a canonical KRAS donor disrupted by mut1 alone, restored by the joint event. The mechanism the classifier surfaces as **rescue**.
|
|
235
|
+
- `CREBBP_synergistic.ipynb` — the joint event activates one cryptic acceptor (synergy) while rescuing another from each single's activation (rescue). The classifier reports the dominant **synergistic** call with the rescue site preserved in the per-site table.
|
|
236
|
+
|
|
237
|
+
## Testing
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
pytest tests/ # full suite
|
|
241
|
+
pytest tests/test_scoring.py # classifier + residual rules
|
|
242
|
+
pytest tests/test_spliceai_equivalence.py # Keras ↔ PyTorch numerical parity
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
## Citing
|
|
246
|
+
|
|
247
|
+
If you use this code in a published analysis, please cite the two papers above.
|
|
248
|
+
|
|
249
|
+
## License
|
|
250
|
+
|
|
251
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""
|
|
2
|
+
oncosplice — sequence-level splicing-epistasis pipeline.
|
|
3
|
+
|
|
4
|
+
Top-level layout:
|
|
5
|
+
|
|
6
|
+
- :class:`OncospliceEngine` — orchestrator. Requires ``geney`` + ``seqmat``.
|
|
7
|
+
- :mod:`oncosplice.engines` — standalone splice-site predictor adapters
|
|
8
|
+
(``OpenSpliceAI``, ``SpliceAIPyTorch``, ``Pangolin``, ``Spliceformer``,
|
|
9
|
+
``EnsemblePredictor``). Importable without ``geney``. ``SpliceAIKeras`` is
|
|
10
|
+
also available as the *reference implementation* for verification only —
|
|
11
|
+
prefer ``SpliceAIPyTorch`` (identical output, ~2.5× faster, no TF dep).
|
|
12
|
+
- :mod:`oncosplice.scoring` — splicing / epistasis / Oncosplice scoring
|
|
13
|
+
primitives. Pure-python; no model dependencies.
|
|
14
|
+
- :mod:`oncosplice.results` — typed dataclasses for results / protein library.
|
|
15
|
+
- :mod:`oncosplice.weights` — model-weight resolver + downloader CLI.
|
|
16
|
+
|
|
17
|
+
Quick start:
|
|
18
|
+
|
|
19
|
+
>>> from oncosplice import OncospliceEngine
|
|
20
|
+
>>> eng = OncospliceEngine(splicing_engine="openspliceai")
|
|
21
|
+
>>> pair = eng.analyze_pair("KRAS:12:25227343:G:T", "KRAS:12:25227344:A:T")
|
|
22
|
+
>>> print(pair.summary())
|
|
23
|
+
|
|
24
|
+
Predictor-only use (no geney / seqmat needed):
|
|
25
|
+
|
|
26
|
+
>>> from oncosplice.engines import get_predictor
|
|
27
|
+
>>> p = get_predictor("openspliceai")
|
|
28
|
+
>>> pred = p.predict(sequence)
|
|
29
|
+
"""
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
# Engines + weights are dependency-light and safe to import eagerly.
|
|
33
|
+
from .engines import (
|
|
34
|
+
EnsemblePredictor,
|
|
35
|
+
OpenSpliceAI,
|
|
36
|
+
Pangolin,
|
|
37
|
+
SpliceAIKeras,
|
|
38
|
+
SpliceAIPyTorch,
|
|
39
|
+
Spliceformer,
|
|
40
|
+
SplicingPrediction,
|
|
41
|
+
SplicingPredictor,
|
|
42
|
+
get_predictor,
|
|
43
|
+
list_available_engines,
|
|
44
|
+
)
|
|
45
|
+
from .results import (
|
|
46
|
+
DoubleVariantResult,
|
|
47
|
+
MissplicingProfile,
|
|
48
|
+
MultiVariantResult,
|
|
49
|
+
ProteinLibrary,
|
|
50
|
+
SingleVariantResult,
|
|
51
|
+
SiteEpistasis,
|
|
52
|
+
)
|
|
53
|
+
from .scoring.fingerprint import splicing_outcome_fingerprint, splicing_outcome_hash
|
|
54
|
+
from .variants import Variant, VariantPair
|
|
55
|
+
|
|
56
|
+
__version__ = "3.2.0" # 3-bucket classifier: synergistic / rescue / compounding (+ non-epistatic fallback)
|
|
57
|
+
|
|
58
|
+
__all__ = [
|
|
59
|
+
"OncospliceEngine", # lazy attribute below
|
|
60
|
+
"SingleVariantResult", "DoubleVariantResult", "MultiVariantResult",
|
|
61
|
+
"SiteEpistasis", "MissplicingProfile", "ProteinLibrary",
|
|
62
|
+
"splicing_outcome_fingerprint", "splicing_outcome_hash",
|
|
63
|
+
"Variant", "VariantPair",
|
|
64
|
+
"SplicingPredictor", "SplicingPrediction",
|
|
65
|
+
"OpenSpliceAI", "SpliceAIKeras", "SpliceAIPyTorch", "Pangolin", "Spliceformer",
|
|
66
|
+
"EnsemblePredictor",
|
|
67
|
+
"get_predictor", "list_available_engines",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def __getattr__(name):
|
|
72
|
+
"""Lazy-load OncospliceEngine so that the rest of the package (engines,
|
|
73
|
+
weights, scoring) is importable without geney/seqmat installed.
|
|
74
|
+
"""
|
|
75
|
+
if name == "OncospliceEngine":
|
|
76
|
+
from .engine import OncospliceEngine
|
|
77
|
+
# Cache on the module so subsequent ``from oncosplice import X`` resolves
|
|
78
|
+
# (PEP 562 __getattr__ + ``from … import …`` interacts oddly in some
|
|
79
|
+
# CPython 3.13 builds; binding here is a robust workaround).
|
|
80
|
+
globals()["OncospliceEngine"] = OncospliceEngine
|
|
81
|
+
return OncospliceEngine
|
|
82
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|