geney 1.4.40__tar.gz → 1.4.45__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {geney-1.4.40 → geney-1.4.45}/PKG-INFO +2 -3
  2. {geney-1.4.40 → geney-1.4.45}/geney/__init__.py +18 -5
  3. geney-1.4.45/geney/engines.py +354 -0
  4. geney-1.4.45/geney/models/openspliceai-mane/10000nt/model_10000nt_rs10.pt +0 -0
  5. geney-1.4.45/geney/models/openspliceai-mane/10000nt/model_10000nt_rs11.pt +0 -0
  6. geney-1.4.45/geney/models/openspliceai-mane/10000nt/model_10000nt_rs12.pt +0 -0
  7. geney-1.4.45/geney/models/openspliceai-mane/10000nt/model_10000nt_rs13.pt +0 -0
  8. geney-1.4.45/geney/models/openspliceai-mane/10000nt/model_10000nt_rs14.pt +0 -0
  9. {geney-1.4.40 → geney-1.4.45}/geney/oncosplice.py +2 -1
  10. geney-1.4.45/geney/pipelines.py +139 -0
  11. {geney-1.4.40 → geney-1.4.45}/geney/splice_graph.py +278 -12
  12. geney-1.4.45/geney/splice_graph_archive.py +948 -0
  13. {geney-1.4.40 → geney-1.4.45}/geney/transcripts.py +1 -1
  14. {geney-1.4.40 → geney-1.4.45}/geney.egg-info/PKG-INFO +2 -3
  15. geney-1.4.45/geney.egg-info/SOURCES.txt +22 -0
  16. {geney-1.4.40 → geney-1.4.45}/geney.egg-info/requires.txt +1 -2
  17. {geney-1.4.40 → geney-1.4.45}/setup.py +4 -1
  18. geney-1.4.40/geney/engines.py +0 -307
  19. geney-1.4.40/geney/pipelines.py +0 -97
  20. geney-1.4.40/geney/samples.py +0 -3
  21. geney-1.4.40/geney/splicing_table.py +0 -142
  22. geney-1.4.40/geney/utils.py +0 -254
  23. geney-1.4.40/geney.egg-info/SOURCES.txt +0 -19
  24. {geney-1.4.40 → geney-1.4.45}/MANIFEST.in +0 -0
  25. {geney-1.4.40 → geney-1.4.45}/README.md +0 -0
  26. {geney-1.4.40 → geney-1.4.45}/geney/variants.py +0 -0
  27. {geney-1.4.40 → geney-1.4.45}/geney.egg-info/dependency_links.txt +0 -0
  28. {geney-1.4.40 → geney-1.4.45}/geney.egg-info/top_level.txt +0 -0
  29. {geney-1.4.40 → geney-1.4.45}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: geney
3
- Version: 1.4.40
3
+ Version: 1.4.45
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -18,9 +18,8 @@ Requires-Dist: pandas==2.1.4
18
18
  Requires-Dist: biopython>=1.81
19
19
  Requires-Dist: matplotlib
20
20
  Requires-Dist: seaborn
21
- Requires-Dist: tensorflow>=2.8.0
22
- Requires-Dist: keras>=2.8.0
23
21
  Requires-Dist: torch
22
+ Requires-Dist: openspliceai
24
23
  Requires-Dist: seqmat
25
24
  Dynamic: author
26
25
  Dynamic: author-email
@@ -4,12 +4,17 @@ from .engines import (
4
4
  sai_predict_probs,
5
5
  run_spliceai_seq,
6
6
  run_splicing_engine,
7
+ predict_splicing,
8
+ adjoin_splicing_outcomes,
7
9
  )
8
10
  from .transcripts import TranscriptLibrary
9
- from .splicing_table import adjoin_splicing_outcomes
10
11
  from .splice_graph import SpliceSimulator
11
- from .pipelines import oncosplice_pipeline_single_transcript
12
- from .samples import *
12
+ from .pipelines import (
13
+ oncosplice_pipeline,
14
+ oncosplice_top_isoform,
15
+ max_splicing_delta,
16
+ oncosplice_pipeline_single_transcript, # backwards compat
17
+ )
13
18
 
14
19
  __all__ = [
15
20
  "Mutation",
@@ -18,8 +23,16 @@ __all__ = [
18
23
  "sai_predict_probs",
19
24
  "run_spliceai_seq",
20
25
  "run_splicing_engine",
21
- "TranscriptLibrary",
26
+ "predict_splicing",
22
27
  "adjoin_splicing_outcomes",
28
+ "TranscriptLibrary",
23
29
  "SpliceSimulator",
30
+ "oncosplice_pipeline",
31
+ "oncosplice_top_isoform",
32
+ "max_splicing_delta",
24
33
  "oncosplice_pipeline_single_transcript",
25
- ]
34
+ ]
35
+
36
+
37
+ mut_id = 'KRAS:12:25227343:G:T'
38
+ epistasis_id = 'KRAS:12:25227343:G:T|KRAS:12:25227344:A:T'
@@ -0,0 +1,354 @@
1
+ # oncosplice/engines.py
2
+ from __future__ import annotations
3
+
4
+ from typing import Dict, List, Tuple, Optional, Union
5
+ import numpy as np
6
+
7
+ # Lazy-loaded model containers (loaded automatically on first use)
8
+ _pang_models = None
9
+ _pang_device = None
10
+
11
+
12
+ def _get_torch_device():
13
+ """Get the best available device for PyTorch."""
14
+ import sys
15
+ import torch
16
+
17
+ if sys.platform == 'darwin' and torch.backends.mps.is_available():
18
+ try:
19
+ torch.tensor([1.0], device="mps")
20
+ return torch.device("mps")
21
+ except RuntimeError:
22
+ return torch.device("cpu")
23
+ elif torch.cuda.is_available():
24
+ return torch.device("cuda")
25
+ return torch.device("cpu")
26
+
27
+
28
+ def _load_pangolin_models():
29
+ """Lazy load Pangolin models."""
30
+ global _pang_models, _pang_device
31
+
32
+ if _pang_models is not None:
33
+ return _pang_models
34
+
35
+ import torch
36
+ from pkg_resources import resource_filename
37
+ from pangolin.model import Pangolin, L, W, AR
38
+
39
+ _pang_device = _get_torch_device()
40
+ print(f"Pangolin loading to {_pang_device}...")
41
+
42
+ _pang_models = []
43
+ pang_model_nums = [0, 1, 2, 3, 4, 5, 6, 7]
44
+
45
+ for i in pang_model_nums:
46
+ for j in range(1, 6):
47
+ try:
48
+ model = Pangolin(L, W, AR).to(_pang_device)
49
+ model_path = resource_filename("pangolin", f"models/final.{j}.{i}.3")
50
+ weights = torch.load(model_path, weights_only=True, map_location=_pang_device)
51
+ model.load_state_dict(weights)
52
+ model.eval()
53
+ _pang_models.append(model)
54
+ except Exception as e:
55
+ print(f"Warning: Failed to load Pangolin model {j}.{i}: {e}")
56
+
57
+ print(f"Pangolin loaded ({len(_pang_models)} models).")
58
+ return _pang_models
59
+
60
+
61
+ _OPENSPLICEAI_MODEL_DIR = None
62
+
63
+ def _get_openspliceai_model_dir() -> str:
64
+ """Return the path to the OpenSpliceAI MANE 10000nt model directory."""
65
+ global _OPENSPLICEAI_MODEL_DIR
66
+ if _OPENSPLICEAI_MODEL_DIR is not None:
67
+ return _OPENSPLICEAI_MODEL_DIR
68
+
69
+ import os
70
+
71
+ # Models ship inside the package at geney/models/openspliceai-mane/10000nt
72
+ pkg_dir = os.path.dirname(os.path.abspath(__file__))
73
+ default = os.path.join(pkg_dir, 'models', 'openspliceai-mane', '10000nt')
74
+ default = os.path.normpath(default)
75
+
76
+ _OPENSPLICEAI_MODEL_DIR = os.environ.get('OPENSPLICEAI_MODEL_DIR', default)
77
+ return _OPENSPLICEAI_MODEL_DIR
78
+
79
+
80
+ def pang_one_hot_encode(seq: str) -> np.ndarray:
81
+ """One-hot encode DNA sequence for Pangolin model."""
82
+ if not isinstance(seq, str):
83
+ raise TypeError(f"Expected string, got {type(seq).__name__}")
84
+
85
+ IN_MAP = np.asarray([[0, 0, 0, 0], # N
86
+ [1, 0, 0, 0], # A
87
+ [0, 1, 0, 0], # C
88
+ [0, 0, 1, 0], # G
89
+ [0, 0, 0, 1]]) # T
90
+
91
+ valid_chars = set('ACGTN')
92
+ if not all(c.upper() in valid_chars for c in seq):
93
+ raise ValueError("Sequence contains invalid characters")
94
+
95
+ seq = seq.upper().replace('A', '1').replace('C', '2')
96
+ seq = seq.replace('G', '3').replace('T', '4').replace('N', '0')
97
+
98
+ seq_array = np.asarray(list(map(int, list(seq))))
99
+ return IN_MAP[seq_array.astype('int8')]
100
+
101
+
102
+
103
+
104
+ def pangolin_predict_probs(seq: str, models: list = None) -> Tuple[List[float], List[float]]:
105
+ """Predict splice site probabilities using Pangolin.
106
+
107
+ Pangolin outputs shape (1, 12, seq_len) where:
108
+ - 12 channels = 4 tissues × 3 prediction types
109
+ - For each tissue: [site_usage, acceptor_gain, donor_gain] or similar
110
+
111
+ We aggregate by taking max across tissues.
112
+ """
113
+ import torch
114
+
115
+ if models is None:
116
+ models = _load_pangolin_models()
117
+
118
+ if not models:
119
+ raise ValueError("No Pangolin models loaded")
120
+
121
+ x = pang_one_hot_encode(seq)
122
+ x = torch.tensor(x.T[None, :, :], dtype=torch.float32, device=_pang_device)
123
+
124
+ preds = []
125
+ with torch.no_grad():
126
+ for model in models:
127
+ pred = model(x)
128
+ preds.append(pred.cpu().numpy())
129
+
130
+ y = np.mean(preds, axis=0) # Shape: (1, 12, seq_len)
131
+
132
+ # Pangolin has 12 channels organized as:
133
+ # Indices 0,3,6,9: site usage scores for 4 tissues
134
+ # Indices 1,4,7,10: acceptor gain scores for 4 tissues
135
+ # Indices 2,5,8,11: donor gain scores for 4 tissues
136
+ # Take max across the 4 tissues for each type
137
+
138
+ # Acceptor: max of channels 1, 4, 7, 10
139
+ acceptor_channels = y[0, [1, 4, 7, 10], :] # (4, seq_len)
140
+ acceptor_probs = np.max(acceptor_channels, axis=0).tolist()
141
+
142
+ # Donor: max of channels 2, 5, 8, 11
143
+ donor_channels = y[0, [2, 5, 8, 11], :] # (4, seq_len)
144
+ donor_probs = np.max(donor_channels, axis=0).tolist()
145
+
146
+ return donor_probs, acceptor_probs
147
+
148
+
149
+ def sai_predict_probs(seq: str) -> Tuple[np.ndarray, np.ndarray]:
150
+ """Predict acceptor and donor probabilities using OpenSpliceAI.
151
+
152
+ Uses the OpenSpliceAI predict() function which handles encoding,
153
+ windowing, ensemble averaging, and softmax internally.
154
+
155
+ Returns (acceptor_probs, donor_probs) as numpy arrays matching the
156
+ full input sequence length.
157
+ """
158
+ from openspliceai.predict.predict import predict
159
+ import io, sys
160
+
161
+ model_dir = _get_openspliceai_model_dir()
162
+
163
+ # Suppress OpenSpliceAI's verbose print output
164
+ _stdout = sys.stdout
165
+ sys.stdout = io.StringIO()
166
+ try:
167
+ y = predict(seq, model_dir, flanking_size=10000) # (seq_len, 3)
168
+ finally:
169
+ sys.stdout = _stdout
170
+
171
+ y = y.numpy()
172
+ return y[:, 1], y[:, 2] # acceptor, donor
173
+
174
+
175
+ def run_spliceai_seq(
176
+ seq: str,
177
+ indices: Union[List[int], np.ndarray],
178
+ threshold: float = 0.0,
179
+ ) -> Tuple[Dict[int, float], Dict[int, float]]:
180
+ """Run SpliceAI on seq and return donor/acceptor sites above threshold."""
181
+ if len(indices) != len(seq):
182
+ raise ValueError(f"indices length ({len(indices)}) must match sequence length ({len(seq)})")
183
+
184
+ acc_probs, don_probs = sai_predict_probs(seq)
185
+ acceptor = {pos: p for pos, p in zip(indices, acc_probs) if p >= threshold}
186
+ donor = {pos: p for pos, p in zip(indices, don_probs) if p >= threshold}
187
+ return donor, acceptor
188
+
189
+
190
+ def _generate_random_sequence(length: int) -> str:
191
+ """Generate a random DNA sequence of given length."""
192
+ import random
193
+ return ''.join(random.choices('ACGT', k=length))
194
+
195
+
196
+ def run_splicing_engine(
197
+ seq: Optional[str] = None,
198
+ engine: str = "spliceai",
199
+ ) -> Tuple[List[float], List[float]]:
200
+ """Run specified splicing engine to predict splice site probabilities."""
201
+ if seq is None:
202
+ seq = _generate_random_sequence(15_001)
203
+
204
+ if not isinstance(seq, str) or not seq:
205
+ raise ValueError("Sequence must be a non-empty string")
206
+
207
+ valid_chars = set("ACGTN")
208
+ if not all(c.upper() in valid_chars for c in seq):
209
+ raise ValueError("Sequence contains invalid nucleotides")
210
+
211
+ match engine:
212
+ case "spliceai":
213
+ acc, don = sai_predict_probs(seq)
214
+ return don.tolist(), acc.tolist()
215
+ case "pangolin":
216
+ return pangolin_predict_probs(seq)
217
+ case _:
218
+ raise ValueError(f"Engine '{engine}' not implemented. Available: 'spliceai', 'pangolin'")
219
+
220
+
221
+ # ------------------------------------------------------------------------------
222
+ # Higher-level prediction utilities (formerly in splicing_table.py)
223
+ # ------------------------------------------------------------------------------
224
+
225
+ def predict_splicing(s, position: int, engine: str = 'spliceai', context: int = 7500):
226
+ """
227
+ Predict splicing probabilities at a given position using the specified engine.
228
+
229
+ Args:
230
+ s: Sequence object with .seq, .index, .clone(), .rev attributes
231
+ position: The genomic position to predict splicing probabilities for.
232
+ engine: The prediction engine to use. Supported: 'spliceai', 'pangolin'.
233
+ context: The length of the target central region (default: 7500).
234
+
235
+ Returns:
236
+ pd.DataFrame with position index and columns: donor_prob, acceptor_prob, nucleotides
237
+ """
238
+ import pandas as pd
239
+
240
+ if position < s.index.min() or position > s.index.max():
241
+ raise ValueError(f"Position {position} is outside sequence bounds [{s.index.min()}, {s.index.max()}]")
242
+
243
+ target = s.clone(position - context, position + context)
244
+
245
+ if len(target.seq) == 0:
246
+ raise ValueError(f"No sequence data found around position {position} with context {context}")
247
+
248
+ seq, indices = target.seq, target.index
249
+
250
+ if len(indices) == 0:
251
+ raise ValueError(f"No indices found in sequence around position {position}")
252
+
253
+ rel_pos = np.abs(indices - position).argmin()
254
+ left_missing, right_missing = max(0, context - rel_pos), max(0, context - (len(seq) - rel_pos))
255
+
256
+ if left_missing > 0 or right_missing > 0:
257
+ step = -1 if s.rev else 1
258
+
259
+ if left_missing > 0:
260
+ left_pad = np.arange(indices[0] - step * left_missing, indices[0], step)
261
+ else:
262
+ left_pad = np.array([], dtype=indices.dtype)
263
+
264
+ if right_missing > 0:
265
+ right_pad = np.arange(indices[-1] + step, indices[-1] + step * (right_missing + 1), step)
266
+ else:
267
+ right_pad = np.array([], dtype=indices.dtype)
268
+
269
+ seq = 'N' * left_missing + seq + 'N' * right_missing
270
+ indices = np.concatenate([left_pad, indices, right_pad])
271
+
272
+ donor_probs, acceptor_probs = run_splicing_engine(seq=seq, engine=engine)
273
+
274
+ seq = seq[5000:-5000]
275
+ indices = indices[5000:-5000]
276
+ expected_len = len(seq)
277
+
278
+ if len(donor_probs) != expected_len:
279
+ if len(donor_probs) > expected_len:
280
+ offset = (len(donor_probs) - expected_len) // 2
281
+ donor_probs = donor_probs[offset:offset + expected_len]
282
+ acceptor_probs = acceptor_probs[offset:offset + expected_len]
283
+ else:
284
+ pad_len = expected_len - len(donor_probs)
285
+ donor_probs = donor_probs + [0.0] * pad_len
286
+ acceptor_probs = acceptor_probs + [0.0] * pad_len
287
+
288
+ df = pd.DataFrame({
289
+ 'position': indices,
290
+ 'donor_prob': donor_probs,
291
+ 'acceptor_prob': acceptor_probs,
292
+ 'nucleotides': list(seq)
293
+ }).set_index('position').round(3)
294
+
295
+ df.attrs['name'] = s.name
296
+ return df
297
+
298
+
299
+ def adjoin_splicing_outcomes(
300
+ splicing_predictions: Dict[str, 'pd.DataFrame'],
301
+ transcript: Optional[object] = None,
302
+ ) -> 'pd.DataFrame':
303
+ """
304
+ Combine splicing predictions for multiple mutations into a multi-index DataFrame.
305
+
306
+ Args:
307
+ splicing_predictions: {label -> DF with 'donor_prob','acceptor_prob','nucleotides'}
308
+ transcript: optional transcript (must have .acceptors, .donors, .rev)
309
+ """
310
+ import pandas as pd
311
+
312
+ if not splicing_predictions:
313
+ raise ValueError("splicing_predictions cannot be empty")
314
+
315
+ dfs = []
316
+ for label, df in splicing_predictions.items():
317
+ if not isinstance(df, pd.DataFrame):
318
+ raise TypeError(f"Expected DataFrame for '{label}', got {type(df).__name__}")
319
+
320
+ required_cols = ["donor_prob", "acceptor_prob", "nucleotides"]
321
+ missing = [c for c in required_cols if c not in df.columns]
322
+ if missing:
323
+ raise ValueError(f"DataFrame for '{label}' missing required columns: {missing}")
324
+
325
+ var_df = df.rename(
326
+ columns={
327
+ "donor_prob": ("donors", f"{label}_prob"),
328
+ "acceptor_prob": ("acceptors", f"{label}_prob"),
329
+ "nucleotides": ("nts", f"{label}"),
330
+ }
331
+ )
332
+ dfs.append(var_df)
333
+
334
+ try:
335
+ full_df = pd.concat(dfs, axis=1)
336
+ except Exception as e:
337
+ raise ValueError(f"Failed to concatenate DataFrames: {e}") from e
338
+
339
+ if not isinstance(full_df.columns, pd.MultiIndex):
340
+ full_df.columns = pd.MultiIndex.from_tuples(full_df.columns)
341
+
342
+ if transcript is not None:
343
+ full_df[("acceptors", "annotated")] = full_df.apply(
344
+ lambda row: row.name in transcript.acceptors, axis=1
345
+ )
346
+ full_df[("donors", "annotated")] = full_df.apply(
347
+ lambda row: row.name in transcript.donors, axis=1
348
+ )
349
+ full_df.sort_index(axis=1, level=0, inplace=True)
350
+ full_df.sort_index(ascending=not transcript.rev, inplace=True)
351
+ else:
352
+ full_df.sort_index(axis=1, level=0, inplace=True)
353
+
354
+ return full_df
@@ -388,9 +388,10 @@ class Oncosplice:
388
388
  analysis_dict = {
389
389
  'reference_protein': self.reference_protein,
390
390
  'variant_protein': self.variant_protein,
391
+ 'aligned_reference_protein': self.alignment.seqA,
392
+ 'aligned_variant_protein': self.alignment.seqB,
391
393
  'reference_length': len(self.reference_protein),
392
394
  'variant_length': len(self.variant_protein),
393
- # 'alignment_length': len(self.alignment.seqA),
394
395
  'oncosplice_score': self.score,
395
396
  'percentile': self.percentile,
396
397
  'number_of_deletions': len(self.deletions),
@@ -0,0 +1,139 @@
1
+ # oncosplice/pipelines.py
2
+ from __future__ import annotations
3
+
4
+ from datetime import datetime
5
+ import pandas as pd
6
+
7
+ from seqmat import Gene
8
+
9
+ from .splice_graph import SpliceSimulator
10
+ from .transcripts import TranscriptLibrary
11
+ from .variants import MutationalEvent
12
+ from .Oncosplice import Oncosplice
13
+
14
+
15
+ def oncosplice_pipeline(
16
+ mut_id: str,
17
+ transcript_id: str | None = None,
18
+ splicing_engine: str = "spliceai",
19
+ organism: str = "hg38",
20
+ ) -> pd.DataFrame:
21
+ """
22
+ Run the full oncosplice pipeline for a mutation.
23
+
24
+ Returns DataFrame with all viable isoforms and their oncosplice scores.
25
+ """
26
+ m = MutationalEvent(mut_id)
27
+ assert m.compatible(), "Mutations in event are incompatible"
28
+
29
+ reference_transcript = (
30
+ Gene.from_file(m.gene, organism=organism)
31
+ .transcript(transcript_id)
32
+ .generate_pre_mrna()
33
+ .generate_mature_mrna()
34
+ .generate_protein()
35
+ )
36
+
37
+ tl = TranscriptLibrary(reference_transcript, m)
38
+ central_pos = m.central_position
39
+
40
+ tl.predict_splicing(central_pos, engine=splicing_engine, inplace=True)
41
+ splicing_results = tl.get_event_columns("event")
42
+
43
+ ss = SpliceSimulator(
44
+ splicing_results, tl.event, feature="event", max_distance=100_000_000
45
+ )
46
+
47
+ base_report = pd.Series({
48
+ "mut_id": mut_id,
49
+ "gene": m.gene,
50
+ "transcript_id": reference_transcript.transcript_id,
51
+ "primary_transcript": reference_transcript.primary_transcript,
52
+ "splicing_engine": splicing_engine,
53
+ "central_position": central_pos,
54
+ "mutation_count": len(m.positions),
55
+ "time_of_execution": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
56
+ })
57
+
58
+ ss_metadata = ss.report(central_pos)
59
+ rows = []
60
+ for variant_transcript, isoform_metadata in ss.get_viable_transcripts(metadata=True):
61
+ onco = Oncosplice(
62
+ reference_transcript.protein,
63
+ variant_transcript.protein,
64
+ reference_transcript.cons_vector,
65
+ )
66
+ rows.append(
67
+ pd.concat([
68
+ base_report,
69
+ ss_metadata,
70
+ isoform_metadata,
71
+ pd.Series({
72
+ "reference_mrna": reference_transcript.mature_mrna.seq,
73
+ "variant_mrna": variant_transcript.mature_mrna.seq,
74
+ }),
75
+ onco.get_analysis_series(),
76
+ ])
77
+ )
78
+
79
+ return pd.DataFrame(rows)
80
+
81
+
82
+ def oncosplice_top_isoform(
83
+ mut_id: str,
84
+ transcript_id: str | None = None,
85
+ splicing_engine: str = "spliceai",
86
+ organism: str = "hg38",
87
+ ) -> pd.Series | None:
88
+ """
89
+ Get the most likely non-reference isoform for a mutation.
90
+
91
+ Returns Series with full oncosplice analysis, or None if no missplicing detected.
92
+ """
93
+ df = oncosplice_pipeline(mut_id, transcript_id, splicing_engine, organism)
94
+
95
+ if df.empty:
96
+ return None
97
+
98
+ variants = df[df["summary"] != "-"]
99
+
100
+ if variants.empty:
101
+ return None
102
+
103
+ return variants.iloc[0]
104
+
105
+
106
+ def max_splicing_delta(
107
+ mut_id: str,
108
+ transcript_id: str | None = None,
109
+ splicing_engine: str = "spliceai",
110
+ organism: str = "hg38",
111
+ ) -> float:
112
+ """
113
+ Get the maximum splice site probability change for a mutation.
114
+ """
115
+ m = MutationalEvent(mut_id)
116
+ assert m.compatible(), "Mutations in event are incompatible"
117
+
118
+ reference_transcript = (
119
+ Gene.from_file(m.gene, organism=organism)
120
+ .transcript(transcript_id)
121
+ .generate_pre_mrna()
122
+ .generate_mature_mrna()
123
+ .generate_protein()
124
+ )
125
+
126
+ tl = TranscriptLibrary(reference_transcript, m)
127
+ splicing_results = tl.predict_splicing(
128
+ m.central_position, engine=splicing_engine, inplace=True
129
+ ).get_event_columns("event")
130
+
131
+ ss = SpliceSimulator(
132
+ splicing_results, tl.event, feature="event", max_distance=100_000_000
133
+ )
134
+
135
+ return ss.max_splicing_delta("event_prob")
136
+
137
+
138
+ # Keep old name for backwards compatibility
139
+ oncosplice_pipeline_single_transcript = oncosplice_pipeline