geney 1.4.42__py3-none-any.whl → 1.4.44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geney/engines.py +32 -94
- geney/models/openspliceai-mane/10000nt/model_10000nt_rs10.pt +0 -0
- geney/models/openspliceai-mane/10000nt/model_10000nt_rs11.pt +0 -0
- geney/models/openspliceai-mane/10000nt/model_10000nt_rs12.pt +0 -0
- geney/models/openspliceai-mane/10000nt/model_10000nt_rs13.pt +0 -0
- geney/models/openspliceai-mane/10000nt/model_10000nt_rs14.pt +0 -0
- {geney-1.4.42.dist-info → geney-1.4.44.dist-info}/METADATA +2 -4
- geney-1.4.44.dist-info/RECORD +16 -0
- geney-1.4.42.dist-info/RECORD +0 -11
- {geney-1.4.42.dist-info → geney-1.4.44.dist-info}/WHEEL +0 -0
- {geney-1.4.42.dist-info → geney-1.4.44.dist-info}/top_level.txt +0 -0
geney/engines.py
CHANGED
|
@@ -6,9 +6,7 @@ import numpy as np
|
|
|
6
6
|
|
|
7
7
|
# Lazy-loaded model containers (loaded automatically on first use)
|
|
8
8
|
_pang_models = None
|
|
9
|
-
_sai_models = None
|
|
10
9
|
_pang_device = None
|
|
11
|
-
_sai_device = None
|
|
12
10
|
|
|
13
11
|
|
|
14
12
|
def _get_torch_device():
|
|
@@ -27,21 +25,6 @@ def _get_torch_device():
|
|
|
27
25
|
return torch.device("cpu")
|
|
28
26
|
|
|
29
27
|
|
|
30
|
-
def _get_tensorflow_device():
|
|
31
|
-
"""Get the best available TensorFlow device."""
|
|
32
|
-
import sys
|
|
33
|
-
import tensorflow as tf
|
|
34
|
-
|
|
35
|
-
try:
|
|
36
|
-
if tf.config.list_physical_devices('GPU'):
|
|
37
|
-
return '/GPU:0'
|
|
38
|
-
elif sys.platform == 'darwin' and tf.config.list_physical_devices('MPS'):
|
|
39
|
-
return '/device:GPU:0'
|
|
40
|
-
except Exception:
|
|
41
|
-
pass
|
|
42
|
-
return '/CPU:0'
|
|
43
|
-
|
|
44
|
-
|
|
45
28
|
def _load_pangolin_models():
|
|
46
29
|
"""Lazy load Pangolin models."""
|
|
47
30
|
global _pang_models, _pang_device
|
|
@@ -75,49 +58,23 @@ def _load_pangolin_models():
|
|
|
75
58
|
return _pang_models
|
|
76
59
|
|
|
77
60
|
|
|
78
|
-
|
|
79
|
-
"""Lazy load SpliceAI models."""
|
|
80
|
-
global _sai_models, _sai_device
|
|
61
|
+
_OPENSPLICEAI_MODEL_DIR = None
|
|
81
62
|
|
|
82
|
-
|
|
83
|
-
|
|
63
|
+
def _get_openspliceai_model_dir() -> str:
|
|
64
|
+
"""Return the path to the OpenSpliceAI MANE 10000nt model directory."""
|
|
65
|
+
global _OPENSPLICEAI_MODEL_DIR
|
|
66
|
+
if _OPENSPLICEAI_MODEL_DIR is not None:
|
|
67
|
+
return _OPENSPLICEAI_MODEL_DIR
|
|
84
68
|
|
|
85
69
|
import os
|
|
86
|
-
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
|
87
|
-
|
|
88
|
-
import sys
|
|
89
|
-
import tensorflow as tf
|
|
90
|
-
from keras.models import load_model
|
|
91
|
-
from importlib import resources
|
|
92
70
|
|
|
93
|
-
|
|
94
|
-
|
|
71
|
+
# Models ship inside the package at geney/models/openspliceai-mane/10000nt
|
|
72
|
+
pkg_dir = os.path.dirname(os.path.abspath(__file__))
|
|
73
|
+
default = os.path.join(pkg_dir, 'models', 'openspliceai-mane', '10000nt')
|
|
74
|
+
default = os.path.normpath(default)
|
|
95
75
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
_sai_models = []
|
|
100
|
-
|
|
101
|
-
try:
|
|
102
|
-
if sys.platform == 'darwin':
|
|
103
|
-
model_filenames = [f"models/spliceai{i}.h5" for i in range(1, 6)]
|
|
104
|
-
model_paths = [resources.files('spliceai').joinpath(f) for f in model_filenames]
|
|
105
|
-
else:
|
|
106
|
-
model_paths = [f"/tamir2/nicolaslynn/tools/SpliceAI/spliceai/models/spliceai{i}.h5"
|
|
107
|
-
for i in range(1, 6)]
|
|
108
|
-
|
|
109
|
-
with tf.device(_sai_device):
|
|
110
|
-
for i, model_path in enumerate(model_paths):
|
|
111
|
-
try:
|
|
112
|
-
model = load_model(str(model_path))
|
|
113
|
-
_sai_models.append(model)
|
|
114
|
-
except Exception as e:
|
|
115
|
-
print(f"Warning: Failed to load SpliceAI model {i+1}: {e}")
|
|
116
|
-
except Exception as e:
|
|
117
|
-
print(f"Error loading SpliceAI models: {e}")
|
|
118
|
-
|
|
119
|
-
print(f"SpliceAI loaded ({len(_sai_models)} models).")
|
|
120
|
-
return _sai_models
|
|
76
|
+
_OPENSPLICEAI_MODEL_DIR = os.environ.get('OPENSPLICEAI_MODEL_DIR', default)
|
|
77
|
+
return _OPENSPLICEAI_MODEL_DIR
|
|
121
78
|
|
|
122
79
|
|
|
123
80
|
def pang_one_hot_encode(seq: str) -> np.ndarray:
|
|
@@ -142,25 +99,6 @@ def pang_one_hot_encode(seq: str) -> np.ndarray:
|
|
|
142
99
|
return IN_MAP[seq_array.astype('int8')]
|
|
143
100
|
|
|
144
101
|
|
|
145
|
-
def one_hot_encode(seq: str) -> np.ndarray:
|
|
146
|
-
"""One-hot encode DNA sequence for SpliceAI model."""
|
|
147
|
-
if not isinstance(seq, str):
|
|
148
|
-
raise TypeError(f"Expected string, got {type(seq).__name__}")
|
|
149
|
-
|
|
150
|
-
valid_chars = set('ACGTN')
|
|
151
|
-
if not all(c.upper() in valid_chars for c in seq):
|
|
152
|
-
raise ValueError("Sequence contains invalid characters")
|
|
153
|
-
|
|
154
|
-
encoding_map = np.asarray([[0, 0, 0, 0], # N
|
|
155
|
-
[1, 0, 0, 0], # A
|
|
156
|
-
[0, 1, 0, 0], # C
|
|
157
|
-
[0, 0, 1, 0], # G
|
|
158
|
-
[0, 0, 0, 1]]) # T
|
|
159
|
-
|
|
160
|
-
seq = seq.upper().replace('A', '\x01').replace('C', '\x02')
|
|
161
|
-
seq = seq.replace('G', '\x03').replace('T', '\x04').replace('N', '\x00')
|
|
162
|
-
|
|
163
|
-
return encoding_map[np.frombuffer(seq.encode('latin1'), np.int8) % 5]
|
|
164
102
|
|
|
165
103
|
|
|
166
104
|
def pangolin_predict_probs(seq: str, models: list = None) -> Tuple[List[float], List[float]]:
|
|
@@ -208,30 +146,30 @@ def pangolin_predict_probs(seq: str, models: list = None) -> Tuple[List[float],
|
|
|
208
146
|
return donor_probs, acceptor_probs
|
|
209
147
|
|
|
210
148
|
|
|
211
|
-
def sai_predict_probs(seq: str
|
|
212
|
-
"""Predict
|
|
213
|
-
if models is None:
|
|
214
|
-
models = _load_spliceai_models()
|
|
149
|
+
def sai_predict_probs(seq: str) -> Tuple[np.ndarray, np.ndarray]:
|
|
150
|
+
"""Predict acceptor and donor probabilities using OpenSpliceAI.
|
|
215
151
|
|
|
216
|
-
|
|
217
|
-
|
|
152
|
+
Uses the OpenSpliceAI predict() function which handles encoding,
|
|
153
|
+
windowing, ensemble averaging, and softmax internally.
|
|
218
154
|
|
|
219
|
-
|
|
220
|
-
|
|
155
|
+
Returns (acceptor_probs, donor_probs) as numpy arrays matching the
|
|
156
|
+
full input sequence length.
|
|
157
|
+
"""
|
|
158
|
+
from openspliceai.predict.predict import predict
|
|
159
|
+
import io, sys
|
|
221
160
|
|
|
222
|
-
|
|
161
|
+
model_dir = _get_openspliceai_model_dir()
|
|
223
162
|
|
|
224
|
-
#
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
y =
|
|
233
|
-
y
|
|
234
|
-
return y[0, :], y[1, :]
|
|
163
|
+
# Suppress OpenSpliceAI's verbose print output
|
|
164
|
+
_stdout = sys.stdout
|
|
165
|
+
sys.stdout = io.StringIO()
|
|
166
|
+
try:
|
|
167
|
+
y = predict(seq, model_dir, flanking_size=10000) # (seq_len, 3)
|
|
168
|
+
finally:
|
|
169
|
+
sys.stdout = _stdout
|
|
170
|
+
|
|
171
|
+
y = y.numpy()
|
|
172
|
+
return y[:, 1], y[:, 2] # acceptor, donor
|
|
235
173
|
|
|
236
174
|
|
|
237
175
|
def run_spliceai_seq(
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: geney
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.44
|
|
4
4
|
Summary: A Python package for gene expression modeling.
|
|
5
5
|
Home-page: https://github.com/nicolaslynn/geney
|
|
6
6
|
Author: Nicolas Lynn
|
|
@@ -18,11 +18,9 @@ Requires-Dist: pandas==2.1.4
|
|
|
18
18
|
Requires-Dist: biopython>=1.81
|
|
19
19
|
Requires-Dist: matplotlib
|
|
20
20
|
Requires-Dist: seaborn
|
|
21
|
-
Requires-Dist: tensorflow>=2.8.0
|
|
22
|
-
Requires-Dist: keras>=2.8.0
|
|
23
21
|
Requires-Dist: torch
|
|
22
|
+
Requires-Dist: openspliceai
|
|
24
23
|
Requires-Dist: seqmat
|
|
25
|
-
Requires-Dist: h5py
|
|
26
24
|
Dynamic: author
|
|
27
25
|
Dynamic: author-email
|
|
28
26
|
Dynamic: classifier
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
geney/__init__.py,sha256=AIKhk1FPsnObK-GrBXU1T780KX0i1K-q34r4zh3ojok,993
|
|
2
|
+
geney/engines.py,sha256=QfwzcLZvRPNVuiO7VQuSf-5Ay-4lYxw7J5zYygCPPeM,12288
|
|
3
|
+
geney/oncosplice.py,sha256=rEVNhHugtzOvwicdjzRqbcyWG-KM0JIgvQa9Mpo23p4,18076
|
|
4
|
+
geney/pipelines.py,sha256=ply3zS5zgA-4I-vCRF0_bJmZ4CtIrzSSpUQOsh3AZmo,4469
|
|
5
|
+
geney/splice_graph.py,sha256=BnJDSOq3mePC0I0cFyfXyUyePANwhrk3LlBPoV1vzSs,24081
|
|
6
|
+
geney/transcripts.py,sha256=BBgyeqF4jeIiHaD_bXxgOTXz19kdUgjcPVo4ClpcSUg,2594
|
|
7
|
+
geney/variants.py,sha256=vjbiBH-duZ4TJZyXwXbQ_VmJxCFafjeDwLNTZg3ubSc,11832
|
|
8
|
+
geney/models/openspliceai-mane/10000nt/model_10000nt_rs10.pt,sha256=ew1q9iOeJzkiJrutoxDunizgztFJ-2_f-JlsEsujzIU,2878124
|
|
9
|
+
geney/models/openspliceai-mane/10000nt/model_10000nt_rs11.pt,sha256=OP5Eae_wn_5cqikbwiQkjIHtaKzKC3Ka-4dFCQmoEw4,2878124
|
|
10
|
+
geney/models/openspliceai-mane/10000nt/model_10000nt_rs12.pt,sha256=SfOOP9Doe4HOItmG0RaOubLFVmi_1qqgZNoQgu5mhmc,2878124
|
|
11
|
+
geney/models/openspliceai-mane/10000nt/model_10000nt_rs13.pt,sha256=uuJ1a0UvMdRbdGBIFlYdp5KE15G-N_RnSRpoNCyBmyA,2878124
|
|
12
|
+
geney/models/openspliceai-mane/10000nt/model_10000nt_rs14.pt,sha256=z-J0ExHtAtqzBCSSpb4UApsY1jvmk3UelTQxWp5MYnE,2878124
|
|
13
|
+
geney-1.4.44.dist-info/METADATA,sha256=pUGCeXBltgnICOYVdgH81g_sDo2LlPL486-4vYGLCrM,919
|
|
14
|
+
geney-1.4.44.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
15
|
+
geney-1.4.44.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
16
|
+
geney-1.4.44.dist-info/RECORD,,
|
geney-1.4.42.dist-info/RECORD
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
geney/__init__.py,sha256=AIKhk1FPsnObK-GrBXU1T780KX0i1K-q34r4zh3ojok,993
|
|
2
|
-
geney/engines.py,sha256=9_oNsoluJsjdLC3cyWttjHF3cuQoy65FWgS4r7ehzek,14296
|
|
3
|
-
geney/oncosplice.py,sha256=rEVNhHugtzOvwicdjzRqbcyWG-KM0JIgvQa9Mpo23p4,18076
|
|
4
|
-
geney/pipelines.py,sha256=ply3zS5zgA-4I-vCRF0_bJmZ4CtIrzSSpUQOsh3AZmo,4469
|
|
5
|
-
geney/splice_graph.py,sha256=BnJDSOq3mePC0I0cFyfXyUyePANwhrk3LlBPoV1vzSs,24081
|
|
6
|
-
geney/transcripts.py,sha256=BBgyeqF4jeIiHaD_bXxgOTXz19kdUgjcPVo4ClpcSUg,2594
|
|
7
|
-
geney/variants.py,sha256=vjbiBH-duZ4TJZyXwXbQ_VmJxCFafjeDwLNTZg3ubSc,11832
|
|
8
|
-
geney-1.4.42.dist-info/METADATA,sha256=ZTuy2ydDiv2Pndu1R1TADe_3kA1FN7xbI51NdiZmQgs,972
|
|
9
|
-
geney-1.4.42.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
10
|
-
geney-1.4.42.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
|
|
11
|
-
geney-1.4.42.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|