geney 1.4.30__py2.py3-none-any.whl → 1.4.32__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geney might be problematic. Click here for more details.
- geney/utils/SeqMats.py +26 -12
- geney/utils/spliceai_pytorch_utils.py +30 -13
- geney/utils/splicing_utils.py +3 -4
- {geney-1.4.30.dist-info → geney-1.4.32.dist-info}/METADATA +1 -1
- {geney-1.4.30.dist-info → geney-1.4.32.dist-info}/RECORD +7 -7
- {geney-1.4.30.dist-info → geney-1.4.32.dist-info}/WHEEL +0 -0
- {geney-1.4.30.dist-info → geney-1.4.32.dist-info}/top_level.txt +0 -0
geney/utils/SeqMats.py
CHANGED
|
@@ -215,6 +215,8 @@ class SeqMat:
|
|
|
215
215
|
# else:
|
|
216
216
|
# raise ValueError(f"Unsupported mutation {pos}:{ref}:{alt}.")
|
|
217
217
|
# Bucket mutations
|
|
218
|
+
|
|
219
|
+
|
|
218
220
|
subs, ins, dels = [], [], []
|
|
219
221
|
for p, r, a in mutations:
|
|
220
222
|
# left-normalize
|
|
@@ -286,18 +288,30 @@ class SeqMat:
|
|
|
286
288
|
self.notes['total_mutations'] = self.notes.get('total_mutations', 0) + applied_mutations
|
|
287
289
|
return self
|
|
288
290
|
|
|
289
|
-
def complement(self) -> SeqMat:
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
291
|
+
# def complement(self) -> SeqMat:
|
|
292
|
+
# comp = {b'A': b'T', b'T': b'A', b'C': b'G', b'G': b'C', b'-': b'-'}
|
|
293
|
+
# nts = np.array([comp[x] for x in self.seq_array['nt']], dtype='S1')
|
|
294
|
+
# new = self.clone()
|
|
295
|
+
# new.seq_array['nt'] = nts
|
|
296
|
+
# return new
|
|
297
|
+
#
|
|
298
|
+
# def reverse_complement(self) -> SeqMat:
|
|
299
|
+
# new = self.complement().clone()
|
|
300
|
+
# new.seq_array = new.seq_array[::-1].copy()
|
|
301
|
+
# new.rev = not self.rev
|
|
302
|
+
# return new
|
|
303
|
+
def complement(self) -> "SeqMat":
|
|
304
|
+
comp_dict = {b"A": b"T", b"T": b"A", b"C": b"G", b"G": b"C", b"-": b"-", b"N": b"N"}
|
|
305
|
+
comp_seq = np.array([comp_dict[nt] for nt in self.seq_array["nt"]], dtype="S1")
|
|
306
|
+
new_instance = self.clone()
|
|
307
|
+
new_instance.seq_array["nt"] = comp_seq
|
|
308
|
+
return new_instance
|
|
295
309
|
|
|
296
|
-
def reverse_complement(self) -> SeqMat:
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
return
|
|
310
|
+
def reverse_complement(self) -> "SeqMat":
|
|
311
|
+
rev_comp_seq = self.complement().seq_array[::-1]
|
|
312
|
+
self.seq_array = rev_comp_seq.copy()
|
|
313
|
+
self.rev = not self.rev
|
|
314
|
+
return self
|
|
301
315
|
|
|
302
316
|
def __getitem__(self, key: Union[int, slice]) -> np.ndarray:
|
|
303
317
|
coords = self.seq_array['index']
|
|
@@ -424,7 +438,7 @@ class SeqMat:
|
|
|
424
438
|
|
|
425
439
|
# Run the splicing prediction engine (function assumed to be defined externally)
|
|
426
440
|
from .splicing_utils import run_splicing_engine
|
|
427
|
-
donor_probs, acceptor_probs = run_splicing_engine(seq, engine)
|
|
441
|
+
donor_probs, acceptor_probs = run_splicing_engine(seq=seq, splicing_engine=engine)
|
|
428
442
|
# Trim off the fixed flanks before returning results.
|
|
429
443
|
seq = seq[5000:-5000]
|
|
430
444
|
indices = indices[5000:-5000]
|
|
@@ -11,29 +11,44 @@ from spliceai_pytorch import SpliceAI
|
|
|
11
11
|
model = SpliceAI.from_preconfigured('10k')
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
device = torch.device('cpu')
|
|
14
15
|
if sys.platform == 'darwin':
|
|
15
16
|
device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
|
|
16
17
|
|
|
17
18
|
if sys.platform == 'linux':
|
|
18
19
|
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
|
19
20
|
|
|
20
|
-
|
|
21
21
|
print(f"SpliceAI loaded to {device}.")
|
|
22
22
|
model.to(device)
|
|
23
23
|
|
|
24
|
-
def one_hot_encode(seq):
|
|
25
24
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
25
|
+
def one_hot_encode(seq: str) -> torch.Tensor:
|
|
26
|
+
"""
|
|
27
|
+
One-hot encodes a nucleotide sequence into shape [L, 4] (A, C, G, T).
|
|
28
|
+
Unknowns (N or other) are mapped to all-zero vectors.
|
|
29
|
+
"""
|
|
30
|
+
map = np.array([
|
|
31
|
+
[0, 0, 0, 0], # index 0: unknown (N, etc.)
|
|
32
|
+
[1, 0, 0, 0], # A
|
|
33
|
+
[0, 1, 0, 0], # C
|
|
34
|
+
[0, 0, 1, 0], # G
|
|
35
|
+
[0, 0, 0, 1], # T
|
|
36
|
+
], dtype=np.float32)
|
|
37
|
+
|
|
38
|
+
# Build mapping: ASCII values
|
|
39
|
+
ascii_seq = np.frombuffer(seq.upper().encode("ascii"), dtype=np.uint8)
|
|
31
40
|
|
|
32
|
-
|
|
33
|
-
|
|
41
|
+
# A=65, C=67, G=71, T=84 → map A/C/G/T to 1/2/3/4; others to 0
|
|
42
|
+
code_map = np.zeros(128, dtype=np.uint8)
|
|
43
|
+
code_map[ord('A')] = 1
|
|
44
|
+
code_map[ord('C')] = 2
|
|
45
|
+
code_map[ord('G')] = 3
|
|
46
|
+
code_map[ord('T')] = 4
|
|
34
47
|
|
|
35
|
-
|
|
48
|
+
indices = code_map[ascii_seq] # shape [L]
|
|
49
|
+
onehot = map[indices] # shape [L, 4]
|
|
36
50
|
|
|
51
|
+
return torch.tensor(onehot, dtype=torch.float32)
|
|
37
52
|
|
|
38
53
|
def sai_predict_probs(seq: str, model) -> list:
|
|
39
54
|
'''
|
|
@@ -50,10 +65,12 @@ def sai_predict_probs(seq: str, model) -> list:
|
|
|
50
65
|
is the donor probability. These probabilities corresponds to the
|
|
51
66
|
middel <L NTs> NTs of the input seq.
|
|
52
67
|
'''
|
|
53
|
-
x = one_hot_encode(seq)[None, :]
|
|
68
|
+
x = one_hot_encode(seq)[None, :, :].transpose(1, 2) # shape: [1, 4, L]
|
|
54
69
|
y = model(x)
|
|
55
|
-
y =
|
|
56
|
-
|
|
70
|
+
probs = torch.softmax(y, dim=1) # shape: [1, 3, L]
|
|
71
|
+
acceptor_probs = probs[0, :, 1] # [L]
|
|
72
|
+
donor_probs = probs[0, :, 2] # [L]
|
|
73
|
+
return acceptor_probs.tolist(), donor_probs.tolist()
|
|
57
74
|
|
|
58
75
|
|
|
59
76
|
def run_spliceai_seq(seq, indices, threshold=0):
|
geney/utils/splicing_utils.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
from typing import List, Tuple, Optional
|
|
5
5
|
|
|
6
|
-
# def run_splicing_engine(seq: Optional[str] = None, engine: str = 'spliceai') -> Tuple[List[float], List[float]]:
|
|
7
6
|
def run_splicing_engine(seq: Optional[str] = None, engine: str = 'spliceai') -> Tuple[List[float], List[float]]:
|
|
8
7
|
"""
|
|
9
8
|
Run the specified splicing engine to predict splice site probabilities on a sequence.
|
|
@@ -25,9 +24,10 @@ def run_splicing_engine(seq: Optional[str] = None, engine: str = 'spliceai') ->
|
|
|
25
24
|
|
|
26
25
|
match engine:
|
|
27
26
|
case 'spliceai':
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
from geney.utils.spliceai_utils import sai_predict_probs, sai_models
|
|
28
|
+
acceptor_probs, donor_probs = sai_predict_probs(seq, models=sai_models)
|
|
30
29
|
|
|
30
|
+
case 'spliceai-pytorch':
|
|
31
31
|
from geney.utils.spliceai_pytorch_utils import sai_predict_probs, model
|
|
32
32
|
acceptor_probs, donor_probs = sai_predict_probs(seq, model=model)
|
|
33
33
|
|
|
@@ -41,7 +41,6 @@ def run_splicing_engine(seq: Optional[str] = None, engine: str = 'spliceai') ->
|
|
|
41
41
|
return donor_probs, acceptor_probs
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
|
|
45
44
|
def adjoin_splicing_outcomes(splicing_predictions, transcript=None):
|
|
46
45
|
"""
|
|
47
46
|
Predicts splicing effect for multiple mutations and organizes the output as a multi-index DataFrame.
|
|
@@ -37,17 +37,17 @@ geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4w
|
|
|
37
37
|
geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
|
|
38
38
|
geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
|
|
39
39
|
geney/utils/Fasta_segment.py,sha256=weB5NJ65P0XiyAJCiCHx4T9sHC1pWLpuQeOy0B85gyg,11364
|
|
40
|
-
geney/utils/SeqMats.py,sha256
|
|
40
|
+
geney/utils/SeqMats.py,sha256=-eoSKJFZR5OelhjXVkTIJbqYgp2xoWvgo8KWfKIBkRk,18197
|
|
41
41
|
geney/utils/SeqMatsOld.py,sha256=syRU5DAuTh3xUfGW_qP9wlcBO5pHsG_y5PlrfXTIxUY,18502
|
|
42
42
|
geney/utils/TranscriptLibrary.py,sha256=W1hv4Y8wRlmwTs3iFdn4_IqS-2suVDzZe4fwti2KbR4,2076
|
|
43
43
|
geney/utils/__init__.py,sha256=-nJ-DMx1JzP-ZCe_QuQCeM0ZYIT_16jxoXDhUaO_4Oc,714
|
|
44
44
|
geney/utils/mutation_utils.py,sha256=r-pHr56gEa5kh_DPX8MjFY3ZfYaOtyo4CUfJ5ZHlXPw,3243
|
|
45
45
|
geney/utils/pangolin_utils.py,sha256=JQSPbWxdzqGFYfWQktkfLMaMSGR28eGQhNzO7MLMe5M,6162
|
|
46
|
-
geney/utils/spliceai_pytorch_utils.py,sha256=
|
|
46
|
+
geney/utils/spliceai_pytorch_utils.py,sha256=wwBoT2utKZjjaWAUo11mSoympVK1vkNxAxcJvXAh8SM,2792
|
|
47
47
|
geney/utils/spliceai_utils.py,sha256=VtrIbjyQxk_3lw86eWjftRYyal9OzxArJ0GV5u_ymTg,2721
|
|
48
|
-
geney/utils/splicing_utils.py,sha256=
|
|
48
|
+
geney/utils/splicing_utils.py,sha256=Z0Z645fJhFnszpIueeMNaa-NMRLFFyJoRBR_yFbzrN0,20965
|
|
49
49
|
geney/utils/utils.py,sha256=GXqlatNhix1akt3fburNzIwhiW9ZdCQSt2vmU80neyA,2370
|
|
50
|
-
geney-1.4.
|
|
51
|
-
geney-1.4.
|
|
52
|
-
geney-1.4.
|
|
53
|
-
geney-1.4.
|
|
50
|
+
geney-1.4.32.dist-info/METADATA,sha256=SLfl8T6fgOlpb2Plf1iJxcaD-IXjclU4ZFcD3SWZTbM,990
|
|
51
|
+
geney-1.4.32.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
52
|
+
geney-1.4.32.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
53
|
+
geney-1.4.32.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|