geney 1.4.27__py2.py3-none-any.whl → 1.4.28__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geney/utils/spliceai_pytorch_utils.py +65 -0
- geney/utils/splicing_utils.py +2 -1
- {geney-1.4.27.dist-info → geney-1.4.28.dist-info}/METADATA +1 -1
- {geney-1.4.27.dist-info → geney-1.4.28.dist-info}/RECORD +6 -5
- {geney-1.4.27.dist-info → geney-1.4.28.dist-info}/WHEEL +0 -0
- {geney-1.4.27.dist-info → geney-1.4.28.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
|
|
2
|
+
import absl.logging
|
|
3
|
+
absl.logging.set_verbosity(absl.logging.ERROR)
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
import torch
|
|
10
|
+
from spliceai_pytorch import SpliceAI
|
|
11
|
+
model = SpliceAI.from_preconfigured('10k')
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
if sys.platform == 'darwin':
|
|
15
|
+
device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
|
|
16
|
+
|
|
17
|
+
if sys.platform == 'linux':
|
|
18
|
+
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
print(f"SpliceAI loaded to {device}.")
|
|
22
|
+
model.to(device)
|
|
23
|
+
|
|
24
|
+
def one_hot_encode(seq):
|
|
25
|
+
|
|
26
|
+
map = np.asarray([[0, 0, 0, 0],
|
|
27
|
+
[1, 0, 0, 0],
|
|
28
|
+
[0, 1, 0, 0],
|
|
29
|
+
[0, 0, 1, 0],
|
|
30
|
+
[0, 0, 0, 1]])
|
|
31
|
+
|
|
32
|
+
seq = seq.upper().replace('A', '\x01').replace('C', '\x02')
|
|
33
|
+
seq = seq.replace('G', '\x03').replace('T', '\x04').replace('N', '\x00')
|
|
34
|
+
|
|
35
|
+
return map[np.fromstring(seq, np.int8) % 5]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def sai_predict_probs(seq: str, model) -> list:
|
|
39
|
+
'''
|
|
40
|
+
Predicts the donor and acceptor junction probability of each
|
|
41
|
+
NT in seq using SpliceAI.
|
|
42
|
+
|
|
43
|
+
Let m:=2*sai_mrg_context + L be the input seq length. It is assumed
|
|
44
|
+
that the input seq has the following structure:
|
|
45
|
+
|
|
46
|
+
seq = |<sai_mrg_context NTs><L NTs><sai_mrg_context NTs>|
|
|
47
|
+
|
|
48
|
+
The returned probability matrix is of size 2XL, where
|
|
49
|
+
the first row is the acceptor probability and the second row
|
|
50
|
+
is the donor probability. These probabilities corresponds to the
|
|
51
|
+
middel <L NTs> NTs of the input seq.
|
|
52
|
+
'''
|
|
53
|
+
x = one_hot_encode(seq)[None, :]
|
|
54
|
+
y = model(x)
|
|
55
|
+
y = y[0, :, 1:].T
|
|
56
|
+
return y[0, :], y[1, :]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def run_spliceai_seq(seq, indices, threshold=0):
|
|
60
|
+
# seq = 'N' * 5000 + seq + 'N' * 5000
|
|
61
|
+
ref_seq_probs_temp = sai_predict_probs(seq, model)
|
|
62
|
+
ref_seq_acceptor_probs, ref_seq_donor_probs = ref_seq_probs_temp[0, :], ref_seq_probs_temp[1, :]
|
|
63
|
+
acceptor_indices = {a: b for a, b in list(zip(indices, ref_seq_acceptor_probs)) if b >= threshold}
|
|
64
|
+
donor_indices = {a: b for a, b in list(zip(indices, ref_seq_donor_probs)) if b >= threshold}
|
|
65
|
+
return donor_indices, acceptor_indices
|
geney/utils/splicing_utils.py
CHANGED
|
@@ -20,8 +20,9 @@ def run_splicing_engine(seq: str, engine: str = 'spliceai') -> Tuple[List[float]
|
|
|
20
20
|
match engine:
|
|
21
21
|
case 'spliceai':
|
|
22
22
|
from geney.utils.spliceai_utils import sai_predict_probs, sai_models
|
|
23
|
+
from geney.utils.spliceai_pytorch_utils import sai_predict_probs, model
|
|
23
24
|
# print(seq)
|
|
24
|
-
acceptor_probs, donor_probs = sai_predict_probs(seq, models=
|
|
25
|
+
acceptor_probs, donor_probs = sai_predict_probs(seq, models=model)
|
|
25
26
|
case 'pangolin':
|
|
26
27
|
from geney.utils.pangolin_utils import pangolin_predict_probs, pang_models
|
|
27
28
|
# print(seq)
|
|
@@ -43,10 +43,11 @@ geney/utils/TranscriptLibrary.py,sha256=W1hv4Y8wRlmwTs3iFdn4_IqS-2suVDzZe4fwti2K
|
|
|
43
43
|
geney/utils/__init__.py,sha256=-nJ-DMx1JzP-ZCe_QuQCeM0ZYIT_16jxoXDhUaO_4Oc,714
|
|
44
44
|
geney/utils/mutation_utils.py,sha256=r-pHr56gEa5kh_DPX8MjFY3ZfYaOtyo4CUfJ5ZHlXPw,3243
|
|
45
45
|
geney/utils/pangolin_utils.py,sha256=JQSPbWxdzqGFYfWQktkfLMaMSGR28eGQhNzO7MLMe5M,6162
|
|
46
|
+
geney/utils/spliceai_pytorch_utils.py,sha256=PfMgaoG6ftWfqKZKc_JNqj5wqQRUR2B-4YF22-zNh1M,2079
|
|
46
47
|
geney/utils/spliceai_utils.py,sha256=VtrIbjyQxk_3lw86eWjftRYyal9OzxArJ0GV5u_ymTg,2721
|
|
47
|
-
geney/utils/splicing_utils.py,sha256=
|
|
48
|
+
geney/utils/splicing_utils.py,sha256=pg5UnlGPaVgV3mw1GUzC-J-bDC1EtjTgkWsqtN-XVMM,20714
|
|
48
49
|
geney/utils/utils.py,sha256=m51Vd0cEbrcIHo6_8BAuI9YSPcKRs22e5LfVd2Qj6Is,2181
|
|
49
|
-
geney-1.4.
|
|
50
|
-
geney-1.4.
|
|
51
|
-
geney-1.4.
|
|
52
|
-
geney-1.4.
|
|
50
|
+
geney-1.4.28.dist-info/METADATA,sha256=q7DMJSJXyJ2IJ1qQD8xofQ4f2-cZZi6TvTUmEvfbyaA,990
|
|
51
|
+
geney-1.4.28.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
|
|
52
|
+
geney-1.4.28.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
53
|
+
geney-1.4.28.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|