geney 1.4.41__py3-none-any.whl → 1.4.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
geney/__init__.py CHANGED
@@ -15,6 +15,7 @@ from .pipelines import (
15
15
  max_splicing_delta,
16
16
  oncosplice_pipeline_single_transcript, # backwards compat
17
17
  )
18
+ # from .samples import *
18
19
 
19
20
  __all__ = [
20
21
  "Mutation",
@@ -35,4 +36,5 @@ __all__ = [
35
36
 
36
37
 
37
38
  mut_id = 'KRAS:12:25227343:G:T'
38
- epistasis_id = 'KRAS:12:25227343:G:T|KRAS:12:25227344:A:T'
39
+ epistasis_id = 'KRAS:12:25227343:G:T|KRAS:12:25227344:A:T'
40
+
geney/engines.py CHANGED
@@ -6,9 +6,7 @@ import numpy as np
6
6
 
7
7
  # Lazy-loaded model containers (loaded automatically on first use)
8
8
  _pang_models = None
9
- _sai_models = None
10
9
  _pang_device = None
11
- _sai_device = None
12
10
 
13
11
 
14
12
  def _get_torch_device():
@@ -27,21 +25,6 @@ def _get_torch_device():
27
25
  return torch.device("cpu")
28
26
 
29
27
 
30
- def _get_tensorflow_device():
31
- """Get the best available TensorFlow device."""
32
- import sys
33
- import tensorflow as tf
34
-
35
- try:
36
- if tf.config.list_physical_devices('GPU'):
37
- return '/GPU:0'
38
- elif sys.platform == 'darwin' and tf.config.list_physical_devices('MPS'):
39
- return '/device:GPU:0'
40
- except Exception:
41
- pass
42
- return '/CPU:0'
43
-
44
-
45
28
  def _load_pangolin_models():
46
29
  """Lazy load Pangolin models."""
47
30
  global _pang_models, _pang_device
@@ -75,49 +58,23 @@ def _load_pangolin_models():
75
58
  return _pang_models
76
59
 
77
60
 
78
- def _load_spliceai_models():
79
- """Lazy load SpliceAI models."""
80
- global _sai_models, _sai_device
61
+ _OPENSPLICEAI_MODEL_DIR = None
81
62
 
82
- if _sai_models is not None:
83
- return _sai_models
63
+ def _get_openspliceai_model_dir() -> str:
64
+ """Return the path to the OpenSpliceAI MANE 10000nt model directory."""
65
+ global _OPENSPLICEAI_MODEL_DIR
66
+ if _OPENSPLICEAI_MODEL_DIR is not None:
67
+ return _OPENSPLICEAI_MODEL_DIR
84
68
 
85
69
  import os
86
- os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
87
-
88
- import sys
89
- import tensorflow as tf
90
- from keras.models import load_model
91
- from importlib import resources
92
70
 
93
- import absl.logging
94
- absl.logging.set_verbosity(absl.logging.ERROR)
71
+ # Models ship inside the package at geney/models/openspliceai-mane/10000nt
72
+ pkg_dir = os.path.dirname(os.path.abspath(__file__))
73
+ default = os.path.join(pkg_dir, 'models', 'openspliceai-mane', '10000nt')
74
+ default = os.path.normpath(default)
95
75
 
96
- _sai_device = _get_tensorflow_device()
97
- print(f"SpliceAI loading to {_sai_device}...")
98
-
99
- _sai_models = []
100
-
101
- try:
102
- if sys.platform == 'darwin':
103
- model_filenames = [f"models/spliceai{i}.h5" for i in range(1, 6)]
104
- model_paths = [resources.files('spliceai').joinpath(f) for f in model_filenames]
105
- else:
106
- model_paths = [f"/tamir2/nicolaslynn/tools/SpliceAI/spliceai/models/spliceai{i}.h5"
107
- for i in range(1, 6)]
108
-
109
- with tf.device(_sai_device):
110
- for i, model_path in enumerate(model_paths):
111
- try:
112
- model = load_model(str(model_path))
113
- _sai_models.append(model)
114
- except Exception as e:
115
- print(f"Warning: Failed to load SpliceAI model {i+1}: {e}")
116
- except Exception as e:
117
- print(f"Error loading SpliceAI models: {e}")
118
-
119
- print(f"SpliceAI loaded ({len(_sai_models)} models).")
120
- return _sai_models
76
+ _OPENSPLICEAI_MODEL_DIR = os.environ.get('OPENSPLICEAI_MODEL_DIR', default)
77
+ return _OPENSPLICEAI_MODEL_DIR
121
78
 
122
79
 
123
80
  def pang_one_hot_encode(seq: str) -> np.ndarray:
@@ -142,25 +99,6 @@ def pang_one_hot_encode(seq: str) -> np.ndarray:
142
99
  return IN_MAP[seq_array.astype('int8')]
143
100
 
144
101
 
145
- def one_hot_encode(seq: str) -> np.ndarray:
146
- """One-hot encode DNA sequence for SpliceAI model."""
147
- if not isinstance(seq, str):
148
- raise TypeError(f"Expected string, got {type(seq).__name__}")
149
-
150
- valid_chars = set('ACGTN')
151
- if not all(c.upper() in valid_chars for c in seq):
152
- raise ValueError("Sequence contains invalid characters")
153
-
154
- encoding_map = np.asarray([[0, 0, 0, 0], # N
155
- [1, 0, 0, 0], # A
156
- [0, 1, 0, 0], # C
157
- [0, 0, 1, 0], # G
158
- [0, 0, 0, 1]]) # T
159
-
160
- seq = seq.upper().replace('A', '\x01').replace('C', '\x02')
161
- seq = seq.replace('G', '\x03').replace('T', '\x04').replace('N', '\x00')
162
-
163
- return encoding_map[np.frombuffer(seq.encode('latin1'), np.int8) % 5]
164
102
 
165
103
 
166
104
  def pangolin_predict_probs(seq: str, models: list = None) -> Tuple[List[float], List[float]]:
@@ -208,30 +146,30 @@ def pangolin_predict_probs(seq: str, models: list = None) -> Tuple[List[float],
208
146
  return donor_probs, acceptor_probs
209
147
 
210
148
 
211
- def sai_predict_probs(seq: str, models: list = None) -> Tuple[np.ndarray, np.ndarray]:
212
- """Predict donor and acceptor probabilities using SpliceAI."""
213
- if models is None:
214
- models = _load_spliceai_models()
149
+ def sai_predict_probs(seq: str) -> Tuple[np.ndarray, np.ndarray]:
150
+ """Predict acceptor and donor probabilities using OpenSpliceAI.
215
151
 
216
- if not models:
217
- raise ValueError("No SpliceAI models loaded")
152
+ Uses the OpenSpliceAI predict() function which handles encoding,
153
+ windowing, ensemble averaging, and softmax internally.
218
154
 
219
- if len(seq) < 1000:
220
- raise ValueError(f"Sequence too short: {len(seq)} (expected >= 1000)")
155
+ Returns (acceptor_probs, donor_probs) as numpy arrays matching the
156
+ full input sequence length.
157
+ """
158
+ from openspliceai.predict.predict import predict
159
+ import io, sys
221
160
 
222
- x = one_hot_encode(seq)[None, :].astype(np.float32)
161
+ model_dir = _get_openspliceai_model_dir()
223
162
 
224
- # Use direct model call instead of .predict() to avoid Jupyter kernel issues
225
- preds = []
226
- for model in models:
227
- pred = model(x, training=False)
228
- if hasattr(pred, 'numpy'):
229
- pred = pred.numpy()
230
- preds.append(pred)
231
-
232
- y = np.mean(preds, axis=0)
233
- y = y[0, :, 1:].T
234
- return y[0, :], y[1, :]
163
+ # Suppress OpenSpliceAI's verbose print output
164
+ _stdout = sys.stdout
165
+ sys.stdout = io.StringIO()
166
+ try:
167
+ y = predict(seq, model_dir, flanking_size=10000) # (seq_len, 3)
168
+ finally:
169
+ sys.stdout = _stdout
170
+
171
+ y = y.numpy()
172
+ return y[:, 1], y[:, 2] # acceptor, donor
235
173
 
236
174
 
237
175
  def run_spliceai_seq(
geney/oncosplice.py CHANGED
@@ -388,9 +388,10 @@ class Oncosplice:
388
388
  analysis_dict = {
389
389
  'reference_protein': self.reference_protein,
390
390
  'variant_protein': self.variant_protein,
391
+ 'aligned_reference_protein': self.alignment.seqA,
392
+ 'aligned_variant_protein': self.alignment.seqB,
391
393
  'reference_length': len(self.reference_protein),
392
394
  'variant_length': len(self.variant_protein),
393
- # 'alignment_length': len(self.alignment.seqA),
394
395
  'oncosplice_score': self.score,
395
396
  'percentile': self.percentile,
396
397
  'number_of_deletions': len(self.deletions),
geney/pipelines.py CHANGED
@@ -34,6 +34,10 @@ def oncosplice_pipeline(
34
34
  .generate_protein()
35
35
  )
36
36
 
37
+ # Truncate protein at first stop codon
38
+ if '*' in reference_transcript.protein:
39
+ reference_transcript.protein = reference_transcript.protein[:reference_transcript.protein.index('*') + 1]
40
+
37
41
  tl = TranscriptLibrary(reference_transcript, m)
38
42
  central_pos = m.central_position
39
43
 
@@ -71,6 +75,7 @@ def oncosplice_pipeline(
71
75
  pd.Series({
72
76
  "reference_mrna": reference_transcript.mature_mrna.seq,
73
77
  "variant_mrna": variant_transcript.mature_mrna.seq,
78
+ "conservation_vector": onco.conservation_vector,
74
79
  }),
75
80
  onco.get_analysis_series(),
76
81
  ])
@@ -123,6 +128,10 @@ def max_splicing_delta(
123
128
  .generate_protein()
124
129
  )
125
130
 
131
+ # Truncate protein at first stop codon
132
+ if '*' in reference_transcript.protein:
133
+ reference_transcript.protein = reference_transcript.protein[:reference_transcript.protein.index('*') + 1]
134
+
126
135
  tl = TranscriptLibrary(reference_transcript, m)
127
136
  splicing_results = tl.predict_splicing(
128
137
  m.central_position, engine=splicing_engine, inplace=True
geney/splice_graph.py CHANGED
@@ -422,6 +422,10 @@ class SpliceSimulator:
422
422
  t.path_hash = _short_hash(tuple(donors + acceptors))
423
423
  t.generate_mature_mrna().generate_protein()
424
424
 
425
+ # Truncate protein at first stop codon
426
+ if '*' in t.protein:
427
+ t.protein = t.protein[:t.protein.index('*') + 1]
428
+
425
429
  # Filter out implausible IR paths (where cryptic sites compensate)
426
430
  if self._is_implausible_ir_path(t):
427
431
  continue
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: geney
3
- Version: 1.4.41
3
+ Version: 1.4.44
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -18,11 +18,9 @@ Requires-Dist: pandas==2.1.4
18
18
  Requires-Dist: biopython>=1.81
19
19
  Requires-Dist: matplotlib
20
20
  Requires-Dist: seaborn
21
- Requires-Dist: tensorflow>=2.8.0
22
- Requires-Dist: keras>=2.8.0
23
21
  Requires-Dist: torch
22
+ Requires-Dist: openspliceai
24
23
  Requires-Dist: seqmat
25
- Requires-Dist: h5py
26
24
  Dynamic: author
27
25
  Dynamic: author-email
28
26
  Dynamic: classifier
@@ -0,0 +1,16 @@
1
+ geney/__init__.py,sha256=AIKhk1FPsnObK-GrBXU1T780KX0i1K-q34r4zh3ojok,993
2
+ geney/engines.py,sha256=QfwzcLZvRPNVuiO7VQuSf-5Ay-4lYxw7J5zYygCPPeM,12288
3
+ geney/oncosplice.py,sha256=rEVNhHugtzOvwicdjzRqbcyWG-KM0JIgvQa9Mpo23p4,18076
4
+ geney/pipelines.py,sha256=ply3zS5zgA-4I-vCRF0_bJmZ4CtIrzSSpUQOsh3AZmo,4469
5
+ geney/splice_graph.py,sha256=BnJDSOq3mePC0I0cFyfXyUyePANwhrk3LlBPoV1vzSs,24081
6
+ geney/transcripts.py,sha256=BBgyeqF4jeIiHaD_bXxgOTXz19kdUgjcPVo4ClpcSUg,2594
7
+ geney/variants.py,sha256=vjbiBH-duZ4TJZyXwXbQ_VmJxCFafjeDwLNTZg3ubSc,11832
8
+ geney/models/openspliceai-mane/10000nt/model_10000nt_rs10.pt,sha256=ew1q9iOeJzkiJrutoxDunizgztFJ-2_f-JlsEsujzIU,2878124
9
+ geney/models/openspliceai-mane/10000nt/model_10000nt_rs11.pt,sha256=OP5Eae_wn_5cqikbwiQkjIHtaKzKC3Ka-4dFCQmoEw4,2878124
10
+ geney/models/openspliceai-mane/10000nt/model_10000nt_rs12.pt,sha256=SfOOP9Doe4HOItmG0RaOubLFVmi_1qqgZNoQgu5mhmc,2878124
11
+ geney/models/openspliceai-mane/10000nt/model_10000nt_rs13.pt,sha256=uuJ1a0UvMdRbdGBIFlYdp5KE15G-N_RnSRpoNCyBmyA,2878124
12
+ geney/models/openspliceai-mane/10000nt/model_10000nt_rs14.pt,sha256=z-J0ExHtAtqzBCSSpb4UApsY1jvmk3UelTQxWp5MYnE,2878124
13
+ geney-1.4.44.dist-info/METADATA,sha256=pUGCeXBltgnICOYVdgH81g_sDo2LlPL486-4vYGLCrM,919
14
+ geney-1.4.44.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
15
+ geney-1.4.44.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
16
+ geney-1.4.44.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,11 +0,0 @@
1
- geney/__init__.py,sha256=nkhniqCNWJzrb7xHgTDFEXSvRVdggb9ZCJ7ih7HEYq8,966
2
- geney/engines.py,sha256=9_oNsoluJsjdLC3cyWttjHF3cuQoy65FWgS4r7ehzek,14296
3
- geney/oncosplice.py,sha256=eGQQl9ftmoFENMYBWoJtenKWmzyxR9N1of5cZst_bHQ,18014
4
- geney/pipelines.py,sha256=gsy-gmHIi260SC5MKQ9IBSE0wko8Tvd7IC3wj083mPQ,3996
5
- geney/splice_graph.py,sha256=PANtLUAQiz578NZwxVlTSgboetnToHnQSkYpT0zbi_w,23931
6
- geney/transcripts.py,sha256=BBgyeqF4jeIiHaD_bXxgOTXz19kdUgjcPVo4ClpcSUg,2594
7
- geney/variants.py,sha256=vjbiBH-duZ4TJZyXwXbQ_VmJxCFafjeDwLNTZg3ubSc,11832
8
- geney-1.4.41.dist-info/METADATA,sha256=zuzWKIEeHSaFr08eRUjq3ZSiloOepcCD_QRG5ifS8j0,972
9
- geney-1.4.41.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
- geney-1.4.41.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
11
- geney-1.4.41.dist-info/RECORD,,