geney 1.4.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
geney/splice_graph.py ADDED
@@ -0,0 +1,371 @@
1
+ # oncosplice/splice_graph.py
2
+ from __future__ import annotations
3
+
4
+ from collections import defaultdict
5
+ from typing import Any, Dict, Generator, List, Tuple
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from pandas import Series
10
+
11
+ from .utils import short_hash_of_list # type: ignore
12
+
13
+
14
+ class SpliceSimulator:
15
+ """
16
+ Builds a splice-site graph from a splicing DataFrame and enumerates isoform paths.
17
+ """
18
+
19
+ def __init__(self, splicing_df: pd.DataFrame, transcript, max_distance: int, feature: str = "event"):
20
+ self.full_df = splicing_df
21
+ self.feature = feature
22
+ self.rev = transcript.rev
23
+ self.transcript_start = transcript.transcript_start
24
+ self.transcript_end = transcript.transcript_end
25
+ self.donors = transcript.donors
26
+ self.acceptors = transcript.acceptors
27
+ self.transcript = transcript
28
+ self.max_distance = max_distance
29
+
30
+ self.set_donor_nodes()
31
+ self.set_acceptor_nodes()
32
+
33
+ def _compute_splice_df(self, site_type: str) -> pd.DataFrame:
34
+ feature_col = f"{self.feature}_prob"
35
+ df = getattr(self.full_df, site_type + "s").copy()
36
+ site_set = getattr(self, site_type + "s")
37
+
38
+ missing = set(site_set) - set(df.index)
39
+ if missing:
40
+ df = pd.concat([df, pd.DataFrame(index=list(missing))], axis=0)
41
+ df.loc[list(missing), ["annotated", "ref_prob", feature_col]] = [True, 1, 1]
42
+
43
+ if "annotated" not in df.columns:
44
+ df["annotated"] = False
45
+ else:
46
+ df["annotated"] = df["annotated"].where(df["annotated"].notna(), False).astype(bool)
47
+
48
+ df.sort_index(ascending=not self.rev, inplace=True)
49
+
50
+ MIN_INCREASE_RATIO = 0.2
51
+
52
+ df["discovered_delta"] = np.where(
53
+ ~df["annotated"],
54
+ (df[feature_col] - df["ref_prob"]),
55
+ np.nan,
56
+ )
57
+ df["discovered_delta"] = df["discovered_delta"].where(
58
+ df["discovered_delta"] >= MIN_INCREASE_RATIO, 0
59
+ )
60
+
61
+ with np.errstate(divide="ignore", invalid="ignore"):
62
+ df["deleted_delta"] = np.where(
63
+ (df["ref_prob"] > 0) & df["annotated"],
64
+ (df[feature_col] - df["ref_prob"]) / df["ref_prob"],
65
+ 0,
66
+ )
67
+ df["deleted_delta"] = df["deleted_delta"].clip(upper=0)
68
+
69
+ df["P"] = df["annotated"].astype(float) + df["discovered_delta"] + df["deleted_delta"]
70
+ return df
71
+
72
+ @property
73
+ def donor_df(self) -> pd.DataFrame:
74
+ return self._compute_splice_df("donor")
75
+
76
+ @property
77
+ def acceptor_df(self) -> pd.DataFrame:
78
+ return self._compute_splice_df("acceptor")
79
+
80
+ def report(self, pos):
81
+ metadata = self.find_splice_site_proximity(pos)
82
+ metadata["donor_events"] = self.donor_df[
83
+ (self.donor_df.deleted_delta.abs() > 0.2)
84
+ | (self.donor_df.discovered_delta.abs() > 0.2)
85
+ ].reset_index().to_json()
86
+ metadata["acceptor_events"] = self.acceptor_df[
87
+ (self.acceptor_df.deleted_delta.abs() > 0.2)
88
+ | (self.acceptor_df.discovered_delta.abs() > 0.2)
89
+ ].reset_index().to_json()
90
+ metadata["missplicing"] = self.max_splicing_delta("event_prob")
91
+ return metadata
92
+
93
+ def max_splicing_delta(self, event: str) -> float:
94
+ all_diffs = []
95
+ for site_type in ["donors", "acceptors"]:
96
+ df = self.full_df[site_type]
97
+ diffs = (df[event] - df["ref_prob"]).tolist()
98
+ all_diffs.extend(diffs)
99
+ return max(all_diffs, key=abs)
100
+
101
+ def set_donor_nodes(self) -> None:
102
+ donors = self.donor_df.P
103
+ donor_list = list(donors[donors > 0].round(2).items())
104
+ donor_list.append((self.transcript_end, 1))
105
+ self.donor_nodes = sorted(
106
+ donor_list, key=lambda x: int(x[0]), reverse=bool(self.rev)
107
+ )
108
+
109
+ def set_acceptor_nodes(self) -> None:
110
+ acceptors = self.acceptor_df.P
111
+ acceptor_list = list(acceptors[acceptors > 0].round(2).items())
112
+ acceptor_list.insert(0, (self.transcript_start, 1.0))
113
+ self.acceptor_nodes = sorted(
114
+ acceptor_list, key=lambda x: int(x[0]), reverse=bool(self.rev)
115
+ )
116
+
117
+ def generate_graph(self) -> Dict[Tuple[int, str], List[Tuple[int, str, float]]]:
118
+ adjacency_list: Dict[Tuple[int, str], List[Tuple[int, str, float]]] = defaultdict(list)
119
+
120
+ # donor -> acceptor
121
+ for d_pos, d_prob in self.donor_nodes:
122
+ running_prob = 1.0
123
+ for a_pos, a_prob in self.acceptor_nodes:
124
+ correct_orientation = ((a_pos > d_pos and not self.rev) or (a_pos < d_pos and self.rev))
125
+ distance_valid = abs(a_pos - d_pos) <= self.max_distance
126
+ if not (correct_orientation and distance_valid):
127
+ continue
128
+
129
+ if not self.rev:
130
+ in_between_acceptors = sum(1 for a, _ in self.acceptor_nodes if d_pos < a < a_pos)
131
+ in_between_donors = sum(1 for d, _ in self.donor_nodes if d_pos < d < a_pos)
132
+ else:
133
+ in_between_acceptors = sum(1 for a, _ in self.acceptor_nodes if a_pos < a < d_pos)
134
+ in_between_donors = sum(1 for d, _ in self.donor_nodes if a_pos < d < d_pos)
135
+
136
+ if in_between_donors == 0 or in_between_acceptors == 0:
137
+ adjacency_list[(d_pos, "donor")].append((a_pos, "acceptor", a_prob))
138
+ running_prob -= a_prob
139
+ else:
140
+ if running_prob > 0:
141
+ adjacency_list[(d_pos, "donor")].append(
142
+ (a_pos, "acceptor", a_prob * running_prob)
143
+ )
144
+ running_prob -= a_prob
145
+ else:
146
+ break
147
+
148
+ # acceptor -> donor
149
+ for a_pos, a_prob in self.acceptor_nodes:
150
+ running_prob = 1.0
151
+ for d_pos, d_prob in self.donor_nodes:
152
+ correct_orientation = ((d_pos > a_pos and not self.rev) or (d_pos < a_pos and self.rev))
153
+ distance_valid = abs(d_pos - a_pos) <= self.max_distance
154
+ if not (correct_orientation and distance_valid):
155
+ continue
156
+
157
+ if not self.rev:
158
+ in_between_acceptors = sum(1 for a, _ in self.acceptor_nodes if a_pos < a < d_pos)
159
+ in_between_donors = sum(1 for d, _ in self.donor_nodes if a_pos < d < d_pos)
160
+ else:
161
+ in_between_acceptors = sum(1 for a, _ in self.acceptor_nodes if d_pos < a < a_pos)
162
+ in_between_donors = sum(1 for d, _ in self.donor_nodes if d_pos < d < a_pos)
163
+
164
+ tag = "donor" if d_pos != self.transcript_end else "transcript_end"
165
+ if in_between_acceptors == 0:
166
+ adjacency_list[(a_pos, "acceptor")].append((d_pos, tag, d_prob))
167
+ running_prob -= d_prob
168
+ else:
169
+ if running_prob > 0:
170
+ adjacency_list[(a_pos, "acceptor")].append(
171
+ (d_pos, tag, d_prob * running_prob)
172
+ )
173
+ running_prob -= d_prob
174
+ else:
175
+ break
176
+
177
+ # transcript_start -> donors
178
+ running_prob = 1.0
179
+ for d_pos, d_prob in self.donor_nodes:
180
+ correct_orientation = (
181
+ (d_pos > self.transcript_start and not self.rev)
182
+ or (d_pos < self.transcript_start and self.rev)
183
+ )
184
+ distance_valid = abs(d_pos - self.transcript_start) <= self.max_distance
185
+ if correct_orientation and distance_valid:
186
+ adjacency_list[(self.transcript_start, "transcript_start")].append(
187
+ (d_pos, "donor", d_prob)
188
+ )
189
+ running_prob -= d_prob
190
+ if running_prob <= 0:
191
+ break
192
+
193
+ # normalize outgoing edges
194
+ for key, next_nodes in adjacency_list.items():
195
+ total_prob = sum(prob for (_, _, prob) in next_nodes)
196
+ if total_prob > 0:
197
+ adjacency_list[key] = [
198
+ (pos, typ, round(prob / total_prob, 3))
199
+ for pos, typ, prob in next_nodes
200
+ ]
201
+ return adjacency_list
202
+
203
+ def find_all_paths(
204
+ self,
205
+ graph: Dict[Tuple[int, str], List[Tuple[int, str, float]]],
206
+ start: Tuple[int, str],
207
+ end: Tuple[int, str],
208
+ path: List[Tuple[int, str]] | None = None,
209
+ probability: float = 1.0,
210
+ ) -> Generator[Tuple[List[Tuple[int, str]], float], None, None]:
211
+ if path is None:
212
+ path = [start]
213
+ else:
214
+ path = path + [start]
215
+
216
+ if start == end:
217
+ yield path, probability
218
+ return
219
+ if start not in graph:
220
+ return
221
+
222
+ for next_pos, tag, prob in graph[start]:
223
+ yield from self.find_all_paths(
224
+ graph,
225
+ (next_pos, tag),
226
+ end,
227
+ path,
228
+ probability * prob,
229
+ )
230
+
231
+ def get_viable_paths(self) -> List[Tuple[List[Tuple[int, str]], float]]:
232
+ graph = self.generate_graph()
233
+ start_node = (self.transcript_start, "transcript_start")
234
+ end_node = (self.transcript_end, "transcript_end")
235
+ paths = list(self.find_all_paths(graph, start_node, end_node))
236
+ paths.sort(key=lambda x: x[1], reverse=True)
237
+ return paths
238
+
239
+ def get_viable_transcripts(self, metadata: bool = False):
240
+ graph = self.generate_graph()
241
+ start_node = (self.transcript_start, "transcript_start")
242
+ end_node = (self.transcript_end, "transcript_end")
243
+ paths = list(self.find_all_paths(graph, start_node, end_node))
244
+ paths.sort(key=lambda x: x[1], reverse=True)
245
+
246
+ for path, prob in paths:
247
+ donors = [pos for pos, typ in path if typ == "donor"]
248
+ acceptors = [pos for pos, typ in path if typ == "acceptor"]
249
+
250
+ t = self.transcript.clone()
251
+ t.donors = [d for d in donors if d != t.transcript_end]
252
+ t.acceptors = [a for a in acceptors if a != t.transcript_start]
253
+ t.path_weight = prob
254
+ t.path_hash = short_hash_of_list(tuple(donors + acceptors))
255
+ t.generate_mature_mrna().generate_protein()
256
+ if metadata:
257
+ md = pd.concat(
258
+ [
259
+ self.compare_splicing_to_reference(t),
260
+ pd.Series(
261
+ {
262
+ "isoform_prevalence": t.path_weight,
263
+ "isoform_id": t.path_hash,
264
+ }
265
+ ),
266
+ ]
267
+ )
268
+ yield t, md
269
+ else:
270
+ yield t
271
+
272
+ def find_splice_site_proximity(self, pos: int) -> Series:
273
+ def result(region, index, start, end):
274
+ return pd.Series(
275
+ {
276
+ "region": region,
277
+ "index": index + 1,
278
+ "5'_dist": abs(pos - min(start, end)),
279
+ "3'_dist": abs(pos - max(start, end)),
280
+ }
281
+ )
282
+
283
+ if not hasattr(self.transcript, "exons") or not hasattr(self.transcript, "introns"):
284
+ return pd.Series(
285
+ {"region": None, "index": None, "5'_dist": np.inf, "3'_dist": np.inf}
286
+ )
287
+
288
+ for i, (start, end) in enumerate(self.transcript.exons):
289
+ if min(start, end) <= pos <= max(start, end):
290
+ return result("exon", i, start, end)
291
+
292
+ for i, (start, end) in enumerate(self.transcript.introns):
293
+ if min(start, end) <= pos <= max(start, end):
294
+ return result("intron", i, start, end)
295
+
296
+ return pd.Series(
297
+ {"region": None, "index": None, "5'_dist": np.inf, "3'_dist": np.inf}
298
+ )
299
+
300
+ def define_missplicing_events(self, var) -> Tuple[str, str, str, str, str]:
301
+ ref = self.transcript
302
+ ref_introns, ref_exons = getattr(ref, "introns", []), getattr(ref, "exons", [])
303
+ var_introns, var_exons = getattr(var, "introns", []), getattr(var, "exons", [])
304
+
305
+ num_ref_exons = len(ref_exons)
306
+ num_ref_introns = len(ref_introns)
307
+
308
+ pes, pir, es, ne, ir = [], [], [], [], []
309
+
310
+ for exon_count, (t1, t2) in enumerate(ref_exons):
311
+ for (s1, s2) in var_exons:
312
+ if (not ref.rev and ((s1 == t1 and s2 < t2) or (s1 > t1 and s2 == t2))) or (
313
+ ref.rev and ((s1 == t1 and s2 > t2) or (s1 < t1 and s2 == t2))
314
+ ):
315
+ pes.append(
316
+ f"Exon {exon_count+1}/{num_ref_exons} truncated: {(t1, t2)} --> {(s1, s2)}"
317
+ )
318
+
319
+ for intron_count, (t1, t2) in enumerate(ref_introns):
320
+ for (s1, s2) in var_introns:
321
+ if (not ref.rev and ((s1 == t1 and s2 < t2) or (s1 > t1 and s2 == t2))) or (
322
+ ref.rev and ((s1 == t1 and s2 > t2) or (s1 < t1 and s2 == t2))
323
+ ):
324
+ pir.append(
325
+ f"Intron {intron_count+1}/{num_ref_introns} partially retained: {(t1, t2)} --> {(s1, s2)}"
326
+ )
327
+
328
+ for exon_count, (t1, t2) in enumerate(ref_exons):
329
+ if t1 not in var.acceptors and t2 not in var.donors:
330
+ es.append(
331
+ f"Exon {exon_count+1}/{num_ref_exons} skipped: {(t1, t2)}"
332
+ )
333
+
334
+ for (s1, s2) in var_exons:
335
+ if s1 not in ref.acceptors and s2 not in ref.donors:
336
+ ne.append(f"Novel Exon: {(s1, s2)}")
337
+
338
+ for intron_count, (t1, t2) in enumerate(ref_introns):
339
+ if t1 not in var.donors and t2 not in var.acceptors:
340
+ ir.append(
341
+ f"Intron {intron_count+1}/{num_ref_introns} retained: {(t1, t2)}"
342
+ )
343
+
344
+ return ",".join(pes), ",".join(pir), ",".join(es), ",".join(ne), ",".join(ir)
345
+
346
+ def summarize_missplicing_event(self, pes, pir, es, ne, ir) -> str:
347
+ event = []
348
+ if pes:
349
+ event.append("PES")
350
+ if es:
351
+ event.append("ES")
352
+ if pir:
353
+ event.append("PIR")
354
+ if ir:
355
+ event.append("IR")
356
+ if ne:
357
+ event.append("NE")
358
+ return ",".join(event) if event else "-"
359
+
360
+ def compare_splicing_to_reference(self, transcript_variant) -> Series:
361
+ pes, pir, es, ne, ir = self.define_missplicing_events(transcript_variant)
362
+ return pd.Series(
363
+ {
364
+ "pes": pes,
365
+ "pir": pir,
366
+ "es": es,
367
+ "ne": ne,
368
+ "ir": ir,
369
+ "summary": self.summarize_missplicing_event(pes, pir, es, ne, ir),
370
+ }
371
+ )
@@ -0,0 +1,142 @@
1
+ # oncosplice/splicing_table.py
2
+ from __future__ import annotations
3
+
4
+ from typing import Dict, Optional, Union
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from .engines import run_splicing_engine
9
+
10
+
11
+
12
+ def predict_splicing(s, position: int, engine: str = 'spliceai', context: int = 7500,
13
+ ) -> Union['SeqMat', pd.DataFrame]:
14
+ """
15
+ Predict splicing probabilities at a given position using the specified engine.
16
+
17
+ Args:
18
+ position (int): The genomic position to predict splicing probabilities for.
19
+ engine (str): The prediction engine to use. Supported: 'spliceai', 'pangolin'.
20
+ context (int): The length of the target central region (default: 7500).
21
+ format (str): Output format for the splicing engine results.
22
+
23
+ Returns:
24
+ pd.DataFrame: A DataFrame containing:
25
+ - position: The genomic position
26
+ - donor_prob: Probability of being a donor splice site
27
+ - acceptor_prob: Probability of being an acceptor splice site
28
+ - nucleotides: The nucleotide sequence at that position
29
+
30
+ Raises:
31
+ ValueError: If an unsupported engine is provided.
32
+ IndexError: If the position is not found in the sequence.
33
+ """
34
+ # Validate position is within sequence bounds
35
+ if position < s.index.min() or position > s.index.max():
36
+ raise ValueError(f"Position {position} is outside sequence bounds [{s.index.min()}, {s.index.max()}]")
37
+
38
+ # Retrieve extended context (includes flanks) around the position.
39
+ target = s.clone(position - context, position + context)
40
+
41
+ # Check if target clone resulted in empty sequence
42
+ if len(target.seq) == 0:
43
+ raise ValueError(f"No sequence data found around position {position} with context {context}")
44
+
45
+ seq, indices = target.seq, target.index
46
+
47
+ # Validate indices array is not empty
48
+ if len(indices) == 0:
49
+ raise ValueError(f"No indices found in sequence around position {position}")
50
+
51
+ # Find relative position within the context window
52
+ rel_pos = np.abs(indices - position).argmin()
53
+ left_missing, right_missing = max(0, context - rel_pos), max(0, context - (len(seq) - rel_pos))
54
+ # print(left_missing, right_missing)
55
+ if left_missing > 0 or right_missing > 0:
56
+ step = -1 if s.rev else 1
57
+
58
+ if left_missing > 0:
59
+ left_pad = np.arange(indices[0] - step * left_missing, indices[0], step)
60
+ else:
61
+ left_pad = np.array([], dtype=indices.dtype)
62
+
63
+ if right_missing > 0:
64
+ right_pad = np.arange(indices[-1] + step, indices[-1] + step * (right_missing + 1), step)
65
+ else:
66
+ right_pad = np.array([], dtype=indices.dtype)
67
+
68
+ seq = 'N' * left_missing + seq + 'N' * right_missing
69
+ indices = np.concatenate([left_pad, indices, right_pad])
70
+
71
+ # Run the splicing prediction engine (function assumed to be defined externally)
72
+ donor_probs, acceptor_probs = run_splicing_engine(seq=seq, engine=engine)
73
+ # Trim off the fixed flanks before returning results.
74
+ seq = seq[5000:-5000]
75
+ indices = indices[5000:-5000]
76
+ df = pd.DataFrame({
77
+ 'position': indices,
78
+ 'donor_prob': donor_probs,
79
+ 'acceptor_prob': acceptor_probs,
80
+ 'nucleotides': list(seq)
81
+ }).set_index('position').round(3)
82
+
83
+ df.attrs['name'] = s.name
84
+ return df
85
+
86
+
87
+
88
+ def adjoin_splicing_outcomes(
89
+ splicing_predictions: Dict[str, pd.DataFrame],
90
+ transcript: Optional[object] = None,
91
+ ) -> pd.DataFrame:
92
+ """
93
+ Combine splicing predictions for multiple mutations into a multi-index DataFrame.
94
+
95
+ splicing_predictions: {label -> DF with 'donor_prob','acceptor_prob','nucleotides'}
96
+ transcript: optional transcript (must have .acceptors, .donors, .rev)
97
+ """
98
+ if not splicing_predictions:
99
+ raise ValueError("splicing_predictions cannot be empty")
100
+
101
+ dfs = []
102
+ for label, df in splicing_predictions.items():
103
+ if not isinstance(df, pd.DataFrame):
104
+ raise TypeError(f"Expected DataFrame for '{label}', got {type(df).__name__}")
105
+
106
+ required_cols = ["donor_prob", "acceptor_prob", "nucleotides"]
107
+ missing = [c for c in required_cols if c not in df.columns]
108
+ if missing:
109
+ raise ValueError(
110
+ f"DataFrame for '{label}' missing required columns: {missing}"
111
+ )
112
+
113
+ var_df = df.rename(
114
+ columns={
115
+ "donor_prob": ("donors", f"{label}_prob"),
116
+ "acceptor_prob": ("acceptors", f"{label}_prob"),
117
+ "nucleotides": ("nts", f"{label}"),
118
+ }
119
+ )
120
+ dfs.append(var_df)
121
+
122
+ try:
123
+ full_df = pd.concat(dfs, axis=1)
124
+ except Exception as e:
125
+ raise ValueError(f"Failed to concatenate DataFrames: {e}") from e
126
+
127
+ if not isinstance(full_df.columns, pd.MultiIndex):
128
+ full_df.columns = pd.MultiIndex.from_tuples(full_df.columns)
129
+
130
+ if transcript is not None:
131
+ full_df[("acceptors", "annotated")] = full_df.apply(
132
+ lambda row: row.name in transcript.acceptors, axis=1
133
+ )
134
+ full_df[("donors", "annotated")] = full_df.apply(
135
+ lambda row: row.name in transcript.donors, axis=1
136
+ )
137
+ full_df.sort_index(axis=1, level=0, inplace=True)
138
+ full_df.sort_index(ascending=not transcript.rev, inplace=True)
139
+ else:
140
+ full_df.sort_index(axis=1, level=0, inplace=True)
141
+
142
+ return full_df
geney/transcripts.py ADDED
@@ -0,0 +1,68 @@
1
+ # oncosplice/transcripts.py
2
+ from __future__ import annotations
3
+
4
+ from typing import Dict, Iterable, List, Tuple, Optional
5
+
6
+ from .splicing_table import adjoin_splicing_outcomes, predict_splicing
7
+
8
+
9
+ class TranscriptLibrary:
10
+ """
11
+ Holds a reference transcript and mutated variants derived from a MutationalEvent.
12
+
13
+ _transcripts: {'ref': ref_transcript, 'event': all_mutations, 'mut1': first mutation, ...}
14
+ """
15
+
16
+ def __init__(self, reference_transcript, mutations: Iterable[Tuple[int, str, str]]):
17
+ self.ref = reference_transcript.clone()
18
+ self.event = reference_transcript.clone()
19
+ self._transcripts: Dict[str, object] = {"ref": self.ref, "event": self.event}
20
+
21
+ for i, (pos, ref, alt) in enumerate(mutations):
22
+ self.event.pre_mrna.apply_mutations((pos, ref, alt))
23
+ if len(list(mutations)) > 1:
24
+ t = reference_transcript.clone()
25
+ t.pre_mrna.apply_mutations((pos, ref, alt))
26
+ name = f"mut{i+1}"
27
+ self._transcripts[name] = t
28
+ setattr(self, name, t)
29
+
30
+ setattr(self, "ref", self.ref)
31
+ setattr(self, "event", self.event)
32
+
33
+ def predict_splicing(self, pos, engine: str = "spliceai", inplace: bool = False):
34
+ """
35
+ Run splicing predictions for all transcripts at a genomic position.
36
+ Assumes each transcript has pre_mrna.predict_splicing(pos, engine, inplace=True)
37
+ and stores results in pre_mrna.predicted_splicing.
38
+ """
39
+ splicing_predictions = {
40
+ k: predict_splicing(t.pre_mrna, pos, engine=engine)
41
+ for k, t in self._transcripts.items()
42
+ }
43
+ self.splicing_results = adjoin_splicing_outcomes(
44
+ {k: df for k, df in splicing_predictions.items()},
45
+ self.ref,
46
+ )
47
+ if inplace:
48
+ return self
49
+
50
+ return self.splicing_results
51
+
52
+ def get_event_columns(self, event_name: str, sites=("donors", "acceptors")):
53
+ """
54
+ Extract selected columns for a given event label ('event', 'mut1', etc.).
55
+ Returns a DataFrame subset of self.splicing_results.
56
+ """
57
+ if not hasattr(self, "splicing_results"):
58
+ raise ValueError("You must run predict_splicing() first.")
59
+
60
+ metrics = (f"{event_name}_prob", "ref_prob", "annotated")
61
+ cols = [(site, metric) for site in sites for metric in metrics]
62
+ return self.splicing_results.loc[:, cols]
63
+
64
+ def __getitem__(self, key):
65
+ return self._transcripts[key]
66
+
67
+ def __iter__(self):
68
+ return iter(self._transcripts.items())