geney 1.4.30__py2.py3-none-any.whl → 1.4.32__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of geney might be problematic. Click here for more details.

geney/utils/SeqMats.py CHANGED
@@ -215,6 +215,8 @@ class SeqMat:
215
215
  # else:
216
216
  # raise ValueError(f"Unsupported mutation {pos}:{ref}:{alt}.")
217
217
  # Bucket mutations
218
+
219
+
218
220
  subs, ins, dels = [], [], []
219
221
  for p, r, a in mutations:
220
222
  # left-normalize
@@ -286,18 +288,30 @@ class SeqMat:
286
288
  self.notes['total_mutations'] = self.notes.get('total_mutations', 0) + applied_mutations
287
289
  return self
288
290
 
289
- def complement(self) -> SeqMat:
290
- comp = {b'A': b'T', b'T': b'A', b'C': b'G', b'G': b'C', b'-': b'-'}
291
- nts = np.array([comp[x] for x in self.seq_array['nt']], dtype='S1')
292
- new = self.clone()
293
- new.seq_array['nt'] = nts
294
- return new
291
+ # def complement(self) -> SeqMat:
292
+ # comp = {b'A': b'T', b'T': b'A', b'C': b'G', b'G': b'C', b'-': b'-'}
293
+ # nts = np.array([comp[x] for x in self.seq_array['nt']], dtype='S1')
294
+ # new = self.clone()
295
+ # new.seq_array['nt'] = nts
296
+ # return new
297
+ #
298
+ # def reverse_complement(self) -> SeqMat:
299
+ # new = self.complement().clone()
300
+ # new.seq_array = new.seq_array[::-1].copy()
301
+ # new.rev = not self.rev
302
+ # return new
303
+ def complement(self) -> "SeqMat":
304
+ comp_dict = {b"A": b"T", b"T": b"A", b"C": b"G", b"G": b"C", b"-": b"-", b"N": b"N"}
305
+ comp_seq = np.array([comp_dict[nt] for nt in self.seq_array["nt"]], dtype="S1")
306
+ new_instance = self.clone()
307
+ new_instance.seq_array["nt"] = comp_seq
308
+ return new_instance
295
309
 
296
- def reverse_complement(self) -> SeqMat:
297
- new = self.complement().clone()
298
- new.seq_array = new.seq_array[::-1].copy()
299
- new.rev = not self.rev
300
- return new
310
+ def reverse_complement(self) -> "SeqMat":
311
+ rev_comp_seq = self.complement().seq_array[::-1]
312
+ self.seq_array = rev_comp_seq.copy()
313
+ self.rev = not self.rev
314
+ return self
301
315
 
302
316
  def __getitem__(self, key: Union[int, slice]) -> np.ndarray:
303
317
  coords = self.seq_array['index']
@@ -424,7 +438,7 @@ class SeqMat:
424
438
 
425
439
  # Run the splicing prediction engine (function assumed to be defined externally)
426
440
  from .splicing_utils import run_splicing_engine
427
- donor_probs, acceptor_probs = run_splicing_engine(seq, engine)
441
+ donor_probs, acceptor_probs = run_splicing_engine(seq=seq, splicing_engine=engine)
428
442
  # Trim off the fixed flanks before returning results.
429
443
  seq = seq[5000:-5000]
430
444
  indices = indices[5000:-5000]
@@ -11,29 +11,44 @@ from spliceai_pytorch import SpliceAI
11
11
  model = SpliceAI.from_preconfigured('10k')
12
12
 
13
13
 
14
+ device = torch.device('cpu')
14
15
  if sys.platform == 'darwin':
15
16
  device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
16
17
 
17
18
  if sys.platform == 'linux':
18
19
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
19
20
 
20
-
21
21
  print(f"SpliceAI loaded to {device}.")
22
22
  model.to(device)
23
23
 
24
- def one_hot_encode(seq):
25
24
 
26
- map = np.asarray([[0, 0, 0, 0],
27
- [1, 0, 0, 0],
28
- [0, 1, 0, 0],
29
- [0, 0, 1, 0],
30
- [0, 0, 0, 1]])
25
+ def one_hot_encode(seq: str) -> torch.Tensor:
26
+ """
27
+ One-hot encodes a nucleotide sequence into shape [L, 4] (A, C, G, T).
28
+ Unknowns (N or other) are mapped to all-zero vectors.
29
+ """
30
+ map = np.array([
31
+ [0, 0, 0, 0], # index 0: unknown (N, etc.)
32
+ [1, 0, 0, 0], # A
33
+ [0, 1, 0, 0], # C
34
+ [0, 0, 1, 0], # G
35
+ [0, 0, 0, 1], # T
36
+ ], dtype=np.float32)
37
+
38
+ # Build mapping: ASCII values
39
+ ascii_seq = np.frombuffer(seq.upper().encode("ascii"), dtype=np.uint8)
31
40
 
32
- seq = seq.upper().replace('A', '\x01').replace('C', '\x02')
33
- seq = seq.replace('G', '\x03').replace('T', '\x04').replace('N', '\x00')
41
+ # A=65, C=67, G=71, T=84 → map A/C/G/T to 1/2/3/4; others to 0
42
+ code_map = np.zeros(128, dtype=np.uint8)
43
+ code_map[ord('A')] = 1
44
+ code_map[ord('C')] = 2
45
+ code_map[ord('G')] = 3
46
+ code_map[ord('T')] = 4
34
47
 
35
- return map[np.fromstring(seq, np.int8) % 5]
48
+ indices = code_map[ascii_seq] # shape [L]
49
+ onehot = map[indices] # shape [L, 4]
36
50
 
51
+ return torch.tensor(onehot, dtype=torch.float32)
37
52
 
38
53
  def sai_predict_probs(seq: str, model) -> list:
39
54
  '''
@@ -50,10 +65,12 @@ def sai_predict_probs(seq: str, model) -> list:
50
65
  is the donor probability. These probabilities corresponds to the
51
66
  middel <L NTs> NTs of the input seq.
52
67
  '''
53
- x = one_hot_encode(seq)[None, :]
68
+ x = one_hot_encode(seq)[None, :, :].transpose(1, 2) # shape: [1, 4, L]
54
69
  y = model(x)
55
- y = y[0, :, 1:].T
56
- return y[0, :], y[1, :]
70
+ probs = torch.softmax(y, dim=1) # shape: [1, 3, L]
71
+ acceptor_probs = probs[0, :, 1] # [L]
72
+ donor_probs = probs[0, :, 2] # [L]
73
+ return acceptor_probs.tolist(), donor_probs.tolist()
57
74
 
58
75
 
59
76
  def run_spliceai_seq(seq, indices, threshold=0):
@@ -3,7 +3,6 @@
3
3
  import pandas as pd
4
4
  from typing import List, Tuple, Optional
5
5
 
6
- # def run_splicing_engine(seq: Optional[str] = None, engine: str = 'spliceai') -> Tuple[List[float], List[float]]:
7
6
  def run_splicing_engine(seq: Optional[str] = None, engine: str = 'spliceai') -> Tuple[List[float], List[float]]:
8
7
  """
9
8
  Run the specified splicing engine to predict splice site probabilities on a sequence.
@@ -25,9 +24,10 @@ def run_splicing_engine(seq: Optional[str] = None, engine: str = 'spliceai') ->
25
24
 
26
25
  match engine:
27
26
  case 'spliceai':
28
- # from geney.utils.spliceai_utils import sai_predict_probs, sai_models
29
- # acceptor_probs, donor_probs = sai_predict_probs(seq, models=sai_models)
27
+ from geney.utils.spliceai_utils import sai_predict_probs, sai_models
28
+ acceptor_probs, donor_probs = sai_predict_probs(seq, models=sai_models)
30
29
 
30
+ case 'spliceai-pytorch':
31
31
  from geney.utils.spliceai_pytorch_utils import sai_predict_probs, model
32
32
  acceptor_probs, donor_probs = sai_predict_probs(seq, model=model)
33
33
 
@@ -41,7 +41,6 @@ def run_splicing_engine(seq: Optional[str] = None, engine: str = 'spliceai') ->
41
41
  return donor_probs, acceptor_probs
42
42
 
43
43
 
44
-
45
44
  def adjoin_splicing_outcomes(splicing_predictions, transcript=None):
46
45
  """
47
46
  Predicts splicing effect for multiple mutations and organizes the output as a multi-index DataFrame.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geney
3
- Version: 1.4.30
3
+ Version: 1.4.32
4
4
  Summary: A Python package for gene expression modeling.
5
5
  Home-page: https://github.com/nicolaslynn/geney
6
6
  Author: Nicolas Lynn
@@ -37,17 +37,17 @@ geney/translation_initiation/tis_utils.py,sha256=AF3siFjuQH-Rs44EV-80zHdbxRMvN4w
37
37
  geney/translation_initiation/resources/kozak_pssm.json,sha256=pcd0Olziutq-6H3mFWDCD9cujQ_AlZO-iiOvBl82hqE,1165
38
38
  geney/translation_initiation/resources/tis_regressor_model.joblib,sha256=IXb4DUDhJ5rBDKcqMk9zE3ECTZZcdj7Jixz3KpoZ7OA,2592025
39
39
  geney/utils/Fasta_segment.py,sha256=weB5NJ65P0XiyAJCiCHx4T9sHC1pWLpuQeOy0B85gyg,11364
40
- geney/utils/SeqMats.py,sha256=PaUp6PMKYDYZ8RTodmKAmCa9ywHnkqSEqTjPoATr82k,17616
40
+ geney/utils/SeqMats.py,sha256=-eoSKJFZR5OelhjXVkTIJbqYgp2xoWvgo8KWfKIBkRk,18197
41
41
  geney/utils/SeqMatsOld.py,sha256=syRU5DAuTh3xUfGW_qP9wlcBO5pHsG_y5PlrfXTIxUY,18502
42
42
  geney/utils/TranscriptLibrary.py,sha256=W1hv4Y8wRlmwTs3iFdn4_IqS-2suVDzZe4fwti2KbR4,2076
43
43
  geney/utils/__init__.py,sha256=-nJ-DMx1JzP-ZCe_QuQCeM0ZYIT_16jxoXDhUaO_4Oc,714
44
44
  geney/utils/mutation_utils.py,sha256=r-pHr56gEa5kh_DPX8MjFY3ZfYaOtyo4CUfJ5ZHlXPw,3243
45
45
  geney/utils/pangolin_utils.py,sha256=JQSPbWxdzqGFYfWQktkfLMaMSGR28eGQhNzO7MLMe5M,6162
46
- geney/utils/spliceai_pytorch_utils.py,sha256=PfMgaoG6ftWfqKZKc_JNqj5wqQRUR2B-4YF22-zNh1M,2079
46
+ geney/utils/spliceai_pytorch_utils.py,sha256=wwBoT2utKZjjaWAUo11mSoympVK1vkNxAxcJvXAh8SM,2792
47
47
  geney/utils/spliceai_utils.py,sha256=VtrIbjyQxk_3lw86eWjftRYyal9OzxArJ0GV5u_ymTg,2721
48
- geney/utils/splicing_utils.py,sha256=Hn7conjcEb3Qc5MTN2DvnlZVxgn3U_IKqOAkGuIik6g,21052
48
+ geney/utils/splicing_utils.py,sha256=Z0Z645fJhFnszpIueeMNaa-NMRLFFyJoRBR_yFbzrN0,20965
49
49
  geney/utils/utils.py,sha256=GXqlatNhix1akt3fburNzIwhiW9ZdCQSt2vmU80neyA,2370
50
- geney-1.4.30.dist-info/METADATA,sha256=_DIWzuIrV5xrRCN8VNgctibosZNMXMEVIRsX7d24WNc,990
51
- geney-1.4.30.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
52
- geney-1.4.30.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
53
- geney-1.4.30.dist-info/RECORD,,
50
+ geney-1.4.32.dist-info/METADATA,sha256=SLfl8T6fgOlpb2Plf1iJxcaD-IXjclU4ZFcD3SWZTbM,990
51
+ geney-1.4.32.dist-info/WHEEL,sha256=AHX6tWk3qWuce7vKLrj7lnulVHEdWoltgauo8bgCXgU,109
52
+ geney-1.4.32.dist-info/top_level.txt,sha256=O-FuNUMb5fn9dhZ-dYCgF0aZtfi1EslMstnzhc5IIVo,6
53
+ geney-1.4.32.dist-info/RECORD,,
File without changes